From: miles Date: Thu, 19 Jul 2012 21:02:54 +0000 (+0000) Subject: Tab Delimited File option fully functional. See tmp.txt for an example output file. X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=29be9abc66d00b6cb54260c44a9a6211fe1f81c2;p=old-jaiswallab-svn%2F.git Tab Delimited File option fully functional. See tmp.txt for an example output file. svn path=/; revision=365 --- diff --git a/Personnel/miles/2/src/Heatmap.java b/Personnel/miles/2/src/Heatmap.java new file mode 100644 index 0000000..f20e09a --- /dev/null +++ b/Personnel/miles/2/src/Heatmap.java @@ -0,0 +1,330 @@ +import java.sql.*; +import java.io.*; +import java.util.Properties; +import java.util.Scanner; +import java.awt.*; +import java.awt.image.ImageProducer; + +import javax.swing.*; + + +/** + * @author miles + * + */ +public class Heatmap { + String[] species; + static int[] allClusterIDs; + + public static void main(String[] args) { + //String[] geneIDs = new String[] {"mgf009407m", "GRMZM2G457201_T02", "LOC_Os03g53530.1", "Bradi3g44220.1"}; + Species[] species = getHeatChartData(args); + + // IF asked for table + generateDelimitedTable(species); + + // IF asked for image + // generateHeatMapImage(species) + + // IF asked for webpage + //displayHeatChart(species); + } + + private static void generateDelimitedTable(Species[] species) { + System.out.println("About to create a new tab delimited file with data requested." + "\n" + "Please name new file:"); + Scanner in = new Scanner(System.in); + String filename = in.next(); + in.close(); + FileWriter fileWriter = null; + + try { + fileWriter = new FileWriter(filename); + } catch (IOException e) { + System.out.println("File failed to create"); + } + + PrintWriter printWriter = new PrintWriter(fileWriter); + printWriter.print("Species \t"); + for (int clust = 0; clust < allClusterIDs.length; clust ++) { + printWriter.print("Cluster " + clust + "\t"); + } + + printWriter.print("\n"); + + + for (int s = 0; s < species.length; s ++) { + printWriter.print("\n" + species[s].name); + for (int c = 0; c < allClusterIDs.length; c ++) { + printWriter.print("\t"); + int frequency = species[s].findClusterFrequency(allClusterIDs[c]); + // -1 means that species contains no genes in the specified cluster, but can't have negative color + // ensure that frequency is never negative + if (frequency == -1) { + frequency = 0; + } + printWriter.print(frequency); + } + } + printWriter.close(); + } + + // creates a connection, distributes work and information to the various helper functions + // gathers the final list of species and their geneMaps in one place + public static Species[] getHeatChartData(String[] geneIDs) { // TODO make error messages more helpful + int[] clusterIDs; + + Connection connection = openConnection(); + Statement statement = null; + + connection = selectDatabase(statement, connection); + + clusterIDs = getAllClusters(geneIDs, connection); + allClusterIDs = clusterIDs; + + Species[] species = findAllSpecies(clusterIDs, connection); + System.out.println(species.length); + for(int i = 0; i < species.length; i ++) { + System.out.println(species[i].name + " " + species[i].geneMap[0][1]); + } + + return species; + } + + // prints out the heatMap + public static void displayHeatChart(Species[] species) { + Frame f = new Frame("heatMapFrame"); + ImageProducer imgProd = null; + Image i = f.createImage(imgProd); + Graphics g; + g = i.getGraphics(); + + for (int s = 0; s < species.length; s ++) { + for (int c = 0; c < allClusterIDs.length; c ++) { + int frequency = species[s].findClusterFrequency(allClusterIDs[c]); + // -1 means that species contains no genes in the specified cluster, but can't have negative color + // ensure that frequency is never negative + if (frequency == -1) { + frequency = 0; + } + Color color = new Color(frequency * 5, 50, 50); // TODO adjust based on maximum number of hits per species/cluster pair + g.setColor(color); + g.drawRect((s*25), (c*25), 25, 25); //TODO how do I create a graphics object? I can't manipulate it if I don't have it but I can't seem to create one either. + } + } + } + + // generates the logic to gather the list of all species that fall into the given cluster ID's + // generates a list of unique species objects and their geneMaps + private static Species[] findAllSpecies(int[] clusterIDs, Connection connection) { + Species[] rawSpecies; + int speciesCount = 0; + String currentSpecies = null; + + String inStatement = getInStatement(clusterIDs, connection); + + PreparedStatement findSpecies = null; + ResultSet rs = null; + + try { + findSpecies = connection.prepareStatement( + "select super_id, species, count(species) from super_clust where super_id " + + inStatement + + "group by species, super_id order by species"); + } catch (SQLException e) { + System.out.println("Statement failed to prepare"); + } + + + try { + rs = findSpecies.executeQuery(); + } + catch (SQLException e) { + System.out.println("Could not execute Query, possible error in statement"); + } + + try { + rs.last(); + speciesCount = rs.getRow(); // remember that the query returns a distinct row for each species/super_id combo + rs.beforeFirst(); // meaning that the number of rows = species * unique cluster ids + rs.next(); // TODO must figure out how to ensure that ALL clusters for each species are grouped in ONE species object + // WITHOUT causing errors. Above statement isn't quite true, for some reason some species aren't getting double counted, but others are. + currentSpecies = rs.getString(2); + System.out.println(allClusterIDs.length); + } + catch (SQLException e) { + System.out.println("Error in returned data - check database"); + } + + rawSpecies = new Species[speciesCount]; // initialize it to the correct size + speciesCount = 0; // reusing the species count variable for other purposes + + try { + rawSpecies[speciesCount] = new Species(); + rawSpecies[speciesCount].setName(currentSpecies); + rawSpecies[speciesCount].addCluster(rs.getInt(1), rs.getInt(3)); + while(rs.next()) { + String name = rs.getString(2); + if (name == currentSpecies) { + rawSpecies[speciesCount].addCluster(rs.getInt(1), rs.getInt(3)); + } else { + currentSpecies = name; + speciesCount ++; + rawSpecies[speciesCount] = new Species(); + rawSpecies[speciesCount].setName(currentSpecies); + rawSpecies[speciesCount].addCluster(rs.getInt(1), rs.getInt(3)); + } + } + } + catch (SQLException e) { + System.out.println("Unexpected Error"); + } + + rawSpecies = groupSpecies(rawSpecies); + + return rawSpecies; + + } + + private static Species[] groupSpecies(Species[] rawSpecies) { + Species[] species = new Species[rawSpecies.length]; + int cs = 0; + species[0] = rawSpecies[0]; + Species lastSpecies = species[0]; + String currentSpecies; + for(int count = 0; count < rawSpecies.length; count ++) { + currentSpecies = rawSpecies[count].name; + if (currentSpecies.equals(lastSpecies.name)) { + lastSpecies.addCluster(rawSpecies[count].geneMap[0][0], rawSpecies[count].geneMap[0][1]); + } else { + species[cs] = lastSpecies; + cs ++; + lastSpecies = rawSpecies[count]; + } + } + int unique = 0; + for(int count = 0; (count < species.length && species[count] != null); count ++) { + unique = count + 1; + } + System.out.println(unique + "unique"); + Species[] finalSpecies = new Species[unique]; + for (int count = 0; count < unique; count ++) { + finalSpecies[count] = species[count]; + } + return finalSpecies; + } + + + + private static String getInStatement(int[] clusterIDs, Connection connection) { + String inStatement = "IN ("; + for (int i = 0; i < clusterIDs.length; i ++) { + inStatement = inStatement + clusterIDs[i]; + if(i+1 < clusterIDs.length) { + inStatement = inStatement + ","; + } else { + inStatement = inStatement + ")"; + } + } + return inStatement; + } + + // finds ALL clusters that the given geneID's fall into + private static int[] getAllClusters(String[] geneIDs, Connection connection) { + ResultSet rs = null; + int[] allClusters; + + + String logicStatement = "gene = '"; + for (int i = 0; i < geneIDs.length; i ++) { + logicStatement = logicStatement + geneIDs[i]; + if(i+1 < geneIDs.length) { + logicStatement = logicStatement + "' OR gene = '"; + } else { + logicStatement = logicStatement + "'"; + } + } + + // finds the clusters that the genes are in + PreparedStatement findCluster; + try { + findCluster = connection.prepareStatement("SELECT DISTINCT super_id FROM super_clust WHERE " + + logicStatement); + } catch (SQLException e1) { + System.out.println("Statment failed to prepare"); + findCluster = null; + } + + try { + rs = findCluster.executeQuery(); + } catch (SQLException e) { + System.out.println("Unable to execute Query"); + } + + int totalClusters = 0; + + try { + rs.last(); + totalClusters = rs.getRow(); + rs.beforeFirst(); + } catch (SQLException e) { + System.out.println("Error in data, unable to parse"); + } + + allClusters = new int[totalClusters]; + + for (int i = 0; i < geneIDs.length; i ++) { + try { + rs.next(); + allClusters[i] = rs.getInt(1); + } catch (SQLException e) { + System.out.println("Genes overlap"); + } + } + + return allClusters; + } + + // selects the database to use + private static Connection selectDatabase(Statement statement, Connection connection) { + PreparedStatement useDatabase; + try { + useDatabase = connection.prepareStatement("USE inparanoid_data"); + } catch (SQLException e2) { + System.out.println("Unexpected Error"); + useDatabase = null; + } + try { + useDatabase.execute(); + } catch (SQLException e) { + System.out.println("Unexpected Error"); + } + return connection; + } + + private static Connection openConnection() { + Properties properties = new Properties(); + properties.setProperty("user", "inparanoid-read-user"); + properties.setProperty("password", "inparanoid-read-user_pw"); + properties.setProperty("autoReconnect", "true"); + properties.setProperty("useOldUTF8Behavior", "true"); + properties.setProperty("zeroDateTimeBehavior", "convertToNull"); + properties.setProperty("dontTrackOpenResources", "true"); + String url = "jdbc:mysql://floret.cgrb.oregonstate.edu:3306/inparanoid_data"; + + try { + Class.forName("com.mysql.jdbc.Driver").newInstance(); //Or any other driver + } + catch(Exception x){ + System.out.println( "Unable to load the driver class!" ); + } + try { + Connection c = DriverManager.getConnection(url, properties); + return c; + } + catch(SQLException x) { + System.out.println("Couldn’t get connection!"); + } + return null; + } + +} diff --git a/Personnel/miles/2/src/Main.java b/Personnel/miles/2/src/Main.java deleted file mode 100644 index 4d21fc2..0000000 --- a/Personnel/miles/2/src/Main.java +++ /dev/null @@ -1,228 +0,0 @@ -import java.sql.*; -import java.util.Properties; -import java.awt.GradientPaint; - - -/** - * @author miles - * - */ -public class Main { - String[] species; - static int[] allClusterIDs; - - public static void main(String[] args) { - String[] geneIDs = new String[] {"mgf009407m", "GRMZM2G457201_T02"}; - getHeatChartData(geneIDs); - } - - // creates a connection, distributes work and information to the various helper functions - // gathers the final list of species and their geneMaps in one place - public static Species[] getHeatChartData(String[] geneIDs) { // TODO make error messages more helpful - int[] clusterIDs; - - Connection connection = openConnection(); - Statement statement = null; - - connection = selectDatabase(statement, connection); - - clusterIDs = getAllClusters(geneIDs, connection); - allClusterIDs = clusterIDs; - - Species[] species = findAllSpecies(clusterIDs, connection); - // TODO find a way to sort the species - //species.sort; - - for(int i = 0; i < species.length; i ++) { - System.out.println(species[i].name + " " + species[i].geneMap[0][1]); - } - - return species; - } - - // prints out the heatMap - public void displayHeatChart(Species[] species) { - - } - - // generates the logic to gather the list of all species that fall into the given cluster ID's - // generates a list of unique species objects and their geneMaps - private static Species[] findAllSpecies(int[] clusterIDs, Connection connection) { - Species[] rawSpecies; - int speciesCount = 0; - String currentSpecies = null; - - String inStatement = getInStatement(clusterIDs, connection); - - PreparedStatement findSpecies = null; - ResultSet rs = null; - - try { - findSpecies = connection.prepareStatement( - "select super_id, species, count(species) from super_clust where super_id " - + inStatement + - "group by species, super_id order by species"); - } catch (SQLException e) { - System.out.println("Statement failed to prepare"); - } - - - try { - rs = findSpecies.executeQuery(); - } - catch (SQLException e) { - System.out.println("Could not execute Query, possible error in statement"); - } - - try { - rs.last(); - speciesCount = rs.getRow(); - rs.beforeFirst(); - rs.next(); - currentSpecies = rs.getString(2); - } - catch (SQLException e) { - System.out.println("Error in returned data - check database"); - } - - rawSpecies = new Species[speciesCount]; // initialize it to the correct size - speciesCount = 0; // reusing the species count variable for other purposes - - try { - rawSpecies[speciesCount] = new Species(); - rawSpecies[speciesCount].setName(currentSpecies); - rawSpecies[speciesCount].addCluster(rs.getInt(1), rs.getInt(3)); - while(rs.next()) { - String name = rs.getString(2); - if (name == currentSpecies) { - rawSpecies[speciesCount].addCluster(rs.getInt(1), rs.getInt(3)); - } else { - currentSpecies = name; - speciesCount ++; - rawSpecies[speciesCount] = new Species(); - rawSpecies[speciesCount].setName(currentSpecies); - rawSpecies[speciesCount].addCluster(rs.getInt(1), rs.getInt(3)); - } - } - } - catch (SQLException e) { - System.out.println("Unexpected Error"); - } - - return rawSpecies; - - } - - private static String getInStatement(int[] clusterIDs, Connection connection) { - String inStatement = "IN ("; - for (int i = 0; i < clusterIDs.length; i ++) { - inStatement = inStatement + clusterIDs[i]; - if(i+1 < clusterIDs.length) { - inStatement = inStatement + ","; - } else { - inStatement = inStatement + ")"; - } - } - return inStatement; - } - - // finds ALL clusters that the given geneID's fall into - private static int[] getAllClusters(String[] geneIDs, Connection connection) { - ResultSet rs = null; - int[] allClusters; - - - String logicStatement = "gene = '"; - for (int i = 0; i < geneIDs.length; i ++) { - logicStatement = logicStatement + geneIDs[i]; - if(i+1 < geneIDs.length) { - logicStatement = logicStatement + "' OR gene = '"; - } else { - logicStatement = logicStatement + "'"; - } - } - - // finds the clusters that the genes are in - PreparedStatement findCluster; - try { - findCluster = connection.prepareStatement("SELECT DISTINCT super_id FROM super_clust WHERE " - + logicStatement); - } catch (SQLException e1) { - System.out.println("Statment failed to prepare"); - findCluster = null; - } - - try { - rs = findCluster.executeQuery(); - } catch (SQLException e) { - System.out.println("Unable to execute Query"); - } - - int totalClusters = 0; - - try { - rs.last(); - totalClusters = rs.getRow(); - rs.beforeFirst(); - } catch (SQLException e) { - System.out.println("Error in data, unable to parse"); - } - - allClusters = new int[totalClusters]; - - for (int i = 0; i < geneIDs.length; i ++) { - try { - rs.next(); - allClusters[i] = rs.getInt(1); - } catch (SQLException e) { - System.out.println("Unexpected Error"); - } - } - - return allClusters; - } - - // selects the database to use - private static Connection selectDatabase(Statement statement, Connection connection) { - PreparedStatement useDatabase; - try { - useDatabase = connection.prepareStatement("USE inparanoid_data"); - } catch (SQLException e2) { - System.out.println("Unexpected Error"); - useDatabase = null; - } - try { - useDatabase.execute(); - } catch (SQLException e) { - System.out.println("Unexpected Error"); - } - return connection; - } - - private static Connection openConnection() { - Properties properties = new Properties(); - properties.setProperty("user", "inparanoid-read-user"); - properties.setProperty("password", "inparanoid-read-user_pw"); - properties.setProperty("autoReconnect", "true"); - properties.setProperty("useOldUTF8Behavior", "true"); - properties.setProperty("zeroDateTimeBehavior", "convertToNull"); - properties.setProperty("dontTrackOpenResources", "true"); - String url = "jdbc:mysql://floret.cgrb.oregonstate.edu:3306/inparanoid_data"; - - try { - Class.forName("com.mysql.jdbc.Driver").newInstance(); //Or any other driver - } - catch(Exception x){ - System.out.println( "Unable to load the driver class!" ); - } - try { - Connection c = DriverManager.getConnection(url, properties); - return c; - } - catch(SQLException x) { - System.out.println("Couldn’t get connection!"); - } - return null; - } - -} diff --git a/Personnel/miles/2/src/Species.java b/Personnel/miles/2/src/Species.java index dd55da6..cf7bb31 100644 --- a/Personnel/miles/2/src/Species.java +++ b/Personnel/miles/2/src/Species.java @@ -1,5 +1,3 @@ -import java.util.ArrayList; - public class Species { public String name; @@ -22,10 +20,32 @@ public class Species { // adds a cluster, if the cluster already exists in this species' gene map, then it adds a hit to the count public void addCluster(int clusterID, int hits) { - geneMap = new int[geneMap.length + 1][2]; - geneMap[clusterTotal][0] = clusterID; - geneMap[clusterTotal][1] = hits; + int[][] tmpMap = new int[geneMap.length + 1][2]; + for(int i = 0; i < geneMap.length; i ++) { + tmpMap[i] = geneMap[i]; + } + tmpMap[clusterTotal][0] = clusterID; + tmpMap[clusterTotal][1] = hits; clusterTotal ++; + geneMap = tmpMap; + } + + public int findClusterFrequency(int clusterID) { + int index = findClusterIndex(clusterID); + if (index == -1) { + return index; + } + return geneMap[index][1]; + } + + private int findClusterIndex(int clusterID) { + int index = -1; + for (int i = 0; i < geneMap.length; i ++) { + if (geneMap[i][0] == clusterID) { + index = i; + } + } + return index; } } diff --git a/Personnel/miles/2/tmp b/Personnel/miles/2/tmp new file mode 100644 index 0000000..723c2d7 --- /dev/null +++ b/Personnel/miles/2/tmp @@ -0,0 +1,32 @@ +Species Cluster 0 Cluster 1 Cluster 2 + +Arabidopsis_lyrata 1 1 1 +Arabidopsis_thaliana 0 1 1 +Batrachochytrium_distachyon 0 0 1 +Brachypodium_distachyon 1 2 1 +Caenorhabditis_elegans 0 0 1 +Carica_papaya 0 0 1 +Chlamydomonas_reinhardtii 0 0 1 +Cucumis_sativus 0 1 2 +Danio_rerio 0 0 1 +Drosophila_melanogaster 0 0 1 +Ectocarpus_siliculosus 0 1 1 +Fragaria_vesca 0 1 1 +Glycine_max 0 4 2 +Homo_sapiens 0 0 2 +Laccaria_bicolor 0 0 1 +Magnaporthe_grissa 0 0 1 +Manihot_esculenta 0 1 1 +Mimulus_guttatus 0 1 1 +Neurospora_crassa 0 0 1 +Oryza_sativa 2 1 1 +Pediculus_humanus 1 0 1 +Physcomitrella_patens 0 0 1 +Populus_trichocarpa 0 2 1 +Prunus_persica 0 1 1 +Rattus_norvegicus 0 1 1 +Ricinus_communis 0 1 1 +Saccharomyces_cerevisiae 0 0 1 +Selaginella_moellendorffii 0 1 1 +Sorghum_bicolor 0 1 1 +Vitis_vinifera 0 1 1 \ No newline at end of file