Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Justin's initial import of his scripts
authorelserj <elserj@localhost>
Fri, 14 Aug 2009 21:18:55 +0000 (21:18 +0000)
committerelserj <elserj@localhost>
Fri, 14 Aug 2009 21:18:55 +0000 (21:18 +0000)
svn path=/; revision=1

interactome_scripts/check_interact.pl [new file with mode: 0755]
interactome_scripts/flowering_interaction.pl [new file with mode: 0755]
interactome_scripts/interact.pl [new file with mode: 0755]
interactome_scripts/interact_perl_sort.pl [new file with mode: 0755]
interactome_scripts/stats_interact.pl [new file with mode: 0755]
interactome_scripts/stats_interact_Ath.pl [new file with mode: 0755]
interactome_scripts/tree_script.sh [new file with mode: 0755]
server_admin_scripts/linkchecker.pl [new file with mode: 0755]

diff --git a/interactome_scripts/check_interact.pl b/interactome_scripts/check_interact.pl
new file mode 100755 (executable)
index 0000000..9949a05
--- /dev/null
@@ -0,0 +1,59 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+# print usage if other than two arguments to the script are found
+if ($#ARGV != 1) {
+ print "usage: check_interact.pl Experimental_file Predicted_file\n";
+ exit;
+}
+
+# open the experimental data file for reading
+open(exper_file,"$ARGV[0]") || die "Error: file '$ARGV[0]' can not be opened\n";
+
+# add the data to an array, calculate the length and get rid of newlines
+my @exper_array=<exper_file>;
+my $exper_len=@exper_array;
+
+
+close(exper_file);
+
+# do the same for the predicted file
+open(predict_file, "$ARGV[1]") || die "Error: file '$ARGV[1]' can not be opened\n";
+
+my @predict_array=<predict_file>;
+my $predict_len=@predict_array;
+
+close(predict_file);
+
+# loop through each experimentally verified gene interaction
+for (my $i=0;$i<$exper_len;$i++) {
+       my ($exper_gene, $exper_inter_type, $exper_inter) = split ("\t", $exper_array[$i]);
+       # get rid of line feed and carriage return characters
+       $exper_inter =~ s/\r//g;
+       $exper_inter =~ s/\n//g;
+       # Add prefixes to gene identifiers
+       $exper_gene = "LOC_" . $exper_gene;
+       $exper_inter = "LOC_" . $exper_inter;
+       # put the genes in alphabetical order so that order doesn't matter (predicted ones are done in interact.pl
+       #    See comments in interact.pl for more info
+       if ($exper_gene gt $exper_inter) {
+               my $temp = $exper_gene;
+               $exper_gene=$exper_inter;
+               $exper_inter = $temp;
+       }
+       
+       # loop through each predicted gene interaction
+       for (my $j=0;$j<$predict_len;$j++) {
+               my ($predict_gene, $predict_inter_type, $predict_inter) = split ("\t", $predict_array[$j]);
+               $predict_inter =~ s/\r//g;
+               $predict_inter =~ s/\n//g;
+               
+               # if both the intereaction genes have matches in both files (experiment and predicted)
+               if ($exper_gene eq $predict_gene and $exper_inter eq $predict_inter) {
+                       print "match found with $predict_gene -> $predict_inter\n";
+               }
+       }
+}
+
diff --git a/interactome_scripts/flowering_interaction.pl b/interactome_scripts/flowering_interaction.pl
new file mode 100755 (executable)
index 0000000..6b1d643
--- /dev/null
@@ -0,0 +1,3 @@
+#!/usr/bin/perl
+
+
diff --git a/interactome_scripts/interact.pl b/interactome_scripts/interact.pl
new file mode 100755 (executable)
index 0000000..8172c96
--- /dev/null
@@ -0,0 +1,278 @@
+#!/usr/bin/perl
+#  Originally written by Justin Elser 2/2/09
+#  Version 0.95 - completed 2/19/09
+#  Version 0.96 - still working on
+#  This file takes the two files that define the network and the orthologies and
+#       outputs a predicted interactome
+#
+#   updated 2/19/09 to filter out duplicates A -> B = B -> A in the interactions
+#
+#  0.96 will add the ability to use a separate (optional) paralog list
+use strict;
+use warnings;
+
+# print usage if other than three arguments to the script are found
+if ($#ARGV < 2 || $#ARGV > 3) {
+ print "usage: interact.pl Network_file Orthology_file Paralog_file(optional) Output_file\n";
+ exit;
+}
+
+my $network_file_name = $ARGV[0];
+my $ortho_file_name = $ARGV[1];
+
+my $para_file_name = "";
+my $out_file_name = "";
+my $para = "";
+
+if ($#ARGV ==3) {
+       $para_file_name = $ARGV[2];
+       $out_file_name = $ARGV[3];
+       $para=1; # true
+}else{
+       $out_file_name = $ARGV[2];
+       $para=0; #false
+}
+
+my %para_hash;
+
+# read in the paralogs first so it is not in the loop, but the hash is available to the loop
+if ($para) {
+       
+       open(paralog_file, "$para_file_name") || die "Error: file '$para_file_name' can not be opened\n";
+       
+       my $min_ident_para = 0; # set the threshold for paralog identities
+       while(<paralog_file>) {
+               my $entry = $_;
+               # strip off newline characters
+               $entry =~ s/\r//g;
+               $entry =~ s/\n//g;
+               # split the columns into 3 separate variables
+               my ($gene_1, $gene_2, $ident) = split("\t", $entry);
+               # skip if $ident is empty or Nan
+               next if(!defined($ident));
+               next if($ident =~ /D/);
+               next if($ident eq "");
+               next if($ident =~ /\%/);
+               # change gene ids to all caps
+               $gene_1 =~ tr/a-z/A-Z/;
+               $gene_2 =~ tr/a-z/A-Z/;
+               
+               # put the genes in ASCII order to help remove dupes
+               if($gene_1 gt $gene_2) {
+                       my $temp = $gene_1;
+                       $gene_1 = $gene_2;
+                       $gene_2 = $temp;
+               }
+               
+               # only use paralogs with high confidence values
+               if($ident>=$min_ident_para) {
+                       # most genes will show up many times, so push values for hash key
+                       if(defined($para_hash{$gene_1})) {
+                               my $oldhash = $para_hash{$gene_1};
+                               $oldhash->{'gene2'} = "$oldhash->{'gene2'}\t$gene_2";;
+                               $para_hash{$gene_1} = $oldhash;
+                               #print "$oldhash\n";
+                       }else{
+                               my %hash;
+                               $hash{'gene1'} = $gene_1;
+                               $hash{'gene2'} = $gene_2;
+                               $para_hash{$gene_1} = \%hash;
+                       }
+               }
+               
+       }
+
+       close(paralog_file);
+
+}
+
+#while (my $key = each %para_hash) {
+#      my $value = $para_hash{$key}->{'gene2'};
+#      print "$key\t$value\n";
+#}
+#exit;
+
+
+
+# Set the minimum % identity to be used.  Higher values mean higher confidence in 
+#  orthology
+#my $min_ident=33;
+# loop through minimum identity, decreasing by 5
+for(my $min_ident=100; $min_ident>=0; $min_ident-=50) {
+
+# Two files are to be read as input.  This one describes the interactions (network map)
+open(network_file,"$network_file_name") || die "Error: file '$network_file_name' can not be opened\n";
+
+# import the network map entries into a hash table
+my %map_hash;
+my @map_array;
+
+while(<network_file>){
+       my $entry = $_;
+       # strip off newline characters
+       $entry =~ s/\r//g;
+       $entry =~ s/\n//g;
+       # split the 3 columns into separate variables
+       my ($map_gene_id, $interaction_type, $interaction_gene) = split("\t", $entry);
+       # change gene ids to all caps
+       $map_gene_id =~ tr/a-z/A-Z/;
+       $interaction_gene =~ tr/a-z/A-Z/;
+       
+       
+       # some genes have multiple interactions
+       if(defined($map_hash{$map_gene_id})){
+               my $oldhash = $map_hash{$map_gene_id};
+               $oldhash->{'type'} = "$oldhash->{'type'}\t$interaction_type";;
+               $oldhash->{'inter'} = "$oldhash->{'inter'}\t$interaction_gene";;
+               $map_hash{$map_gene_id} = $oldhash;
+       }else{
+               my %hash;
+               $hash{'map'} = $map_gene_id;
+               $hash{'type'} = $interaction_type;
+               $hash{'inter'} = $interaction_gene;
+               $map_hash{$map_gene_id} = \%hash;
+               push @map_array, $map_gene_id;
+       }
+}
+
+# close file
+close(network_file);
+
+
+# This file gives the orthologs
+open(ortho_file,"<$ortho_file_name") || die "Error: file '$ortho_file_name' can not be opened\n";
+
+my %ortho_hash;
+my @ortho_array;
+
+# import the ortholog entries into a hash table
+while(<ortho_file>){
+       my $id1 = $_;
+       # strip off newline characters
+       $id1 =~ s/\r//g;
+       $id1 =~ s/\n//g;
+       # split the 3 columns into separate variables
+       my ($gene_id, $ortho_id, $ident_id) = split("\t", $id1);
+       # strip off gene id suffix and convert to all caps
+       $gene_id =~ s/\-TAIR\-G//g;
+       $gene_id =~ tr/a-z/A-Z/;
+       $ortho_id =~ tr/a-z/A-Z/;
+       # check to make sure the %identity is above the minimum defined above
+       if($ident_id =~ /\d/ && $ident_id>=$min_ident){
+               # if gene_id already found, add the new ortholog to hash
+               if(defined($ortho_hash{$gene_id})){
+                       my $oldhash = $ortho_hash{$gene_id};
+                       $oldhash->{'ortho'} = "$oldhash->{'ortho'}\t$ortho_id";;
+                       $ortho_hash{$gene_id} = $oldhash;
+               # if new gene_id, create hash element
+               }else{
+                       my %hash;
+                       $hash{'ortho'} = $ortho_id;
+                       $hash{'gene'} = $gene_id;
+                       $ortho_hash{$gene_id} = \%hash;
+                       push @ortho_array, $gene_id;
+               }
+       }
+}
+#  close file
+close(ortho_file);
+
+#  open file for output
+open(output_file,">$out_file_name" . "_$min_ident" . ".sif") || die "Error: file '$out_file_name' can not be opened for writing\n";
+my $out_file = "$out_file_name" . "_$min_ident" . ".sif";
+
+
+foreach my $id_entry (@map_array) {
+       my $inter_type=$map_hash{$id_entry}->{'type'};
+       my $gene_map=$map_hash{$id_entry}->{'inter'};
+       #print "$id_entry\t$gene_map\n";
+       
+       # create array of interaction types
+       my @inter_array = split("\t", $inter_type);
+               
+       # create array of interaction genes
+       my @gene_map_array = split("\t", $gene_map);
+       
+       # check to see if ortholog exists for reference gene
+       if(exists($ortho_hash{$id_entry})) {
+               # create array of orthologs for reference gene
+               my @gene_ortho_array = split("\t", $ortho_hash{$id_entry}->{'ortho'});
+               # find the length of the array from map_hash to be used in loop
+               my $max_index = @inter_array;
+
+               # run for each ortholog for reference gene
+               foreach my $gene_ortho (@gene_ortho_array) {
+                       # for each element of the arrays inter_array and gene_map_array
+                       for (my $i=0; $i<$max_index; $i++) {
+                               my $gene_map_ortho=$gene_map_array[$i];
+                               # check to make sure that an ortholog for the interaction gene is defined
+                               if(exists($ortho_hash{$gene_map_ortho})) {
+                                       # create array of orthologs for interaction genes
+                                       my @gene_map_ortho_array = split("\t", $ortho_hash{$gene_map_ortho}->{'ortho'});
+                                       # loop through each interaction gene
+                                       foreach my $gene_map_inter (@gene_map_ortho_array) {
+                                               # if using paralogs, loop through paralogs for both genes
+                                               if ($para) {
+                                                       my @gene_1_array;
+                                                       my @gene_2_array;
+                                                       #print "$gene_ortho\n";
+                                                       if (exists($para_hash{$gene_ortho})) {
+                                                               @gene_1_array = split("\t", $para_hash{$gene_ortho}->{'gene2'});
+                                                               #print "filling array 1\n";
+                                                       }
+                                                       # this will add back the original gene to the beginning of the array
+                                                       unshift(@gene_1_array, $gene_ortho);
+                                                       #print "$gene_ortho\n array @gene_1_array\n";
+                                                       #print "$gene_ortho\t$gene_map_inter\t\t\t$gene_1_array[0]\t$gene_1_array[1]\n";
+                                                       
+                                                       if (exists($para_hash{$gene_map_inter})) {
+                                                               @gene_2_array = split("\t", $para_hash{$gene_map_inter}->{'gene2'});
+                                                               #print "filling array 2\n";
+                                                       }
+                                                       unshift(@gene_2_array, $gene_map_inter);
+                                                       #print "@gene_2_array\n";
+                                                       
+                                                       foreach my $gene_1 (@gene_1_array) {
+                                                               foreach my $gene_2 (@gene_2_array) {
+                                                                       if ($gene_1 gt $gene_2) {
+                                                                               my $temp = $gene_1;
+                                                                               $gene_1 = $gene_2;
+                                                                               $gene_2 = $temp;
+                                                                       }
+                                                                       printf output_file "$gene_1\t$inter_array[$i]\t$gene_2\n";
+                                                                       #printf output_file "$gene_1\n";
+                                                               }
+                                                       }
+                                                       
+                                               } else {
+                                                       # check to see if the two entries are in alphatical order
+                                                       #   if they are not, swap them so they are
+                                                       #   Note that if the interaction type depends on direction, this breaks that
+                                                       if ($gene_ortho gt $gene_map_inter) { # if inter_type depends on direction, can extend the if statement to account for specific interactions to be skipped
+                                                               my $temp = $gene_ortho;
+                                                               $gene_ortho = $gene_map_inter;
+                                                               $gene_map_inter = $temp;
+                                                       }
+                                               
+                                                       # print results to output file in sif format
+                                                       printf output_file "$gene_ortho\t$inter_array[$i]\t$gene_map_inter\n";
+                                               }
+                                       }
+
+                               }
+                       }
+               }
+       }
+
+}
+
+print "min_ident = $min_ident\n";
+
+       
+close(output_file);
+# sort the file and get rid of duplicates
+#   Note: this requires replacing the file, hence the mv command
+system "sort $out_file | uniq > $out_file.tmp; mv $out_file.tmp $out_file";
+# end loop of minimum identities ($min_ident)
+} 
+       
diff --git a/interactome_scripts/interact_perl_sort.pl b/interactome_scripts/interact_perl_sort.pl
new file mode 100755 (executable)
index 0000000..c30cbf5
--- /dev/null
@@ -0,0 +1,306 @@
+#!/usr/bin/perl
+#
+#
+#  ONLY USE THIS PROGRAM FOR SMALL INTERACTION CASES!!!!!!!!!!!!!!
+#
+#
+#
+#  Originally written by Justin Elser 2/2/09
+#  Version 0.95 - completed 2/19/09
+#  Version 0.96 - still working on
+#  This file takes the two files that define the network and the orthologies and
+#       outputs a predicted interactome
+#
+#   updated 2/19/09 to filter out duplicates A -> B = B -> A in the interactions
+#
+#  this version of interact.pl will try to do the sort in perl using hashes, hopefully faster than the linux sort
+#   After some testing, it may be faster (don't know), but is horribly memory inefficient.  Took over 4 GB of RAM for min_ident=50.  
+#     Would totally kill a system to try to do min_ident=0.  Maybe on a machine with >=64GB RAM?
+#
+#
+#
+#
+#  ONLY USE THIS PROGRAM FOR SMALL INTERACTION CASES!!!!!!!!!!!!!!
+use strict;
+use warnings;
+
+# print usage if other than three arguments to the script are found
+if ($#ARGV < 2 || $#ARGV > 3) {
+ print "usage: interact_perl_sort.pl Network_file Orthology_file Paralog_file(optional) Output_file\n";
+ exit;
+}
+
+my $network_file_name = $ARGV[0];
+my $ortho_file_name = $ARGV[1];
+
+my $para_file_name = "";
+my $out_file_name = "";
+my $para = "";
+
+if ($#ARGV ==3) {
+       $para_file_name = $ARGV[2];
+       $out_file_name = $ARGV[3];
+       $para=1; # true
+}else{
+       $out_file_name = $ARGV[2];
+       $para=0; #false
+}
+
+my %para_hash;
+
+# read in the paralogs first so it is not in the loop, but the hash is available to the loop
+if ($para) {
+       
+       open(paralog_file, "$para_file_name") || die "Error: file '$para_file_name' can not be opened\n";
+       
+       my $min_ident_para = 0; # set the threshold for paralog identities
+       while(<paralog_file>) {
+               my $entry = $_;
+               # strip off newline characters
+               $entry =~ s/\r//g;
+               $entry =~ s/\n//g;
+               # split the columns into 3 separate variables
+               my ($gene_1, $gene_2, $ident) = split("\t", $entry);
+               # skip if $ident is empty or Nan
+               next if(!defined($ident));
+               next if($ident =~ /D/);
+               next if($ident eq "");
+               next if($ident =~ /\%/);
+               # change gene ids to all caps
+               $gene_1 =~ tr/a-z/A-Z/;
+               $gene_2 =~ tr/a-z/A-Z/;
+               
+               # put the genes in ASCII order to help remove dupes
+               if($gene_1 gt $gene_2) {
+                       my $temp = $gene_1;
+                       $gene_1 = $gene_2;
+                       $gene_2 = $temp;
+               }
+               
+               # only use paralogs with high confidence values
+               if($ident>=$min_ident_para) {
+                       # most genes will show up many times, so push values for hash key
+                       if(defined($para_hash{$gene_1})) {
+                               my $oldhash = $para_hash{$gene_1};
+                               $oldhash->{'gene2'} = "$oldhash->{'gene2'}\t$gene_2";;
+                               $para_hash{$gene_1} = $oldhash;
+                               #print "$oldhash\n";
+                       }else{
+                               my %hash;
+                               $hash{'gene1'} = $gene_1;
+                               $hash{'gene2'} = $gene_2;
+                               $para_hash{$gene_1} = \%hash;
+                       }
+               }
+               
+       }
+
+       close(paralog_file);
+
+}
+
+#while (my $key = each %para_hash) {
+#      my $value = $para_hash{$key}->{'gene2'};
+#      print "$key\t$value\n";
+#}
+#exit;
+
+my %unsorted_hash;
+
+
+# Set the minimum % identity to be used.  Higher values mean higher confidence in 
+#  orthology
+#my $min_ident=33;
+# loop through minimum identity, decreasing by 5
+for(my $min_ident=100; $min_ident>=0; $min_ident-=50) {
+
+# Two files are to be read as input.  This one describes the interactions (network map)
+open(network_file,"$network_file_name") || die "Error: file '$network_file_name' can not be opened\n";
+
+# import the network map entries into a hash table
+my %map_hash;
+my @map_array;
+
+while(<network_file>){
+       my $entry = $_;
+       # strip off newline characters
+       $entry =~ s/\r//g;
+       $entry =~ s/\n//g;
+       # split the 3 columns into separate variables
+       my ($map_gene_id, $interaction_type, $interaction_gene) = split("\t", $entry);
+       # change gene ids to all caps
+       $map_gene_id =~ tr/a-z/A-Z/;
+       $interaction_gene =~ tr/a-z/A-Z/;
+       
+       
+       # some genes have multiple interactions
+       if(defined($map_hash{$map_gene_id})){
+               my $oldhash = $map_hash{$map_gene_id};
+               $oldhash->{'type'} = "$oldhash->{'type'}\t$interaction_type";;
+               $oldhash->{'inter'} = "$oldhash->{'inter'}\t$interaction_gene";;
+               $map_hash{$map_gene_id} = $oldhash;
+       }else{
+               my %hash;
+               $hash{'map'} = $map_gene_id;
+               $hash{'type'} = $interaction_type;
+               $hash{'inter'} = $interaction_gene;
+               $map_hash{$map_gene_id} = \%hash;
+               push @map_array, $map_gene_id;
+       }
+}
+
+# close file
+close(network_file);
+
+
+# This file gives the orthologs
+open(ortho_file,"<$ortho_file_name") || die "Error: file '$ortho_file_name' can not be opened\n";
+
+my %ortho_hash;
+my @ortho_array;
+
+# import the ortholog entries into a hash table
+while(<ortho_file>){
+       my $id1 = $_;
+       # strip off newline characters
+       $id1 =~ s/\r//g;
+       $id1 =~ s/\n//g;
+       # split the 3 columns into separate variables
+       my ($gene_id, $ortho_id, $ident_id) = split("\t", $id1);
+       # strip off gene id suffix and convert to all caps
+       $gene_id =~ s/\-TAIR\-G//g;
+       $gene_id =~ tr/a-z/A-Z/;
+       $ortho_id =~ tr/a-z/A-Z/;
+       # check to make sure the %identity is above the minimum defined above
+       if($ident_id =~ /\d/ && $ident_id>=$min_ident){
+               # if gene_id already found, add the new ortholog to hash
+               if(defined($ortho_hash{$gene_id})){
+                       my $oldhash = $ortho_hash{$gene_id};
+                       $oldhash->{'ortho'} = "$oldhash->{'ortho'}\t$ortho_id";;
+                       $ortho_hash{$gene_id} = $oldhash;
+               # if new gene_id, create hash element
+               }else{
+                       my %hash;
+                       $hash{'ortho'} = $ortho_id;
+                       $hash{'gene'} = $gene_id;
+                       $ortho_hash{$gene_id} = \%hash;
+                       push @ortho_array, $gene_id;
+               }
+       }
+}
+#  close file
+close(ortho_file);
+
+#  open file for output
+open(output_file,">$out_file_name" . "_$min_ident" . ".sif") || die "Error: file '$out_file_name' can not be opened for writing\n";
+my $out_file = "$out_file_name" . "_$min_ident" . ".sif";
+
+my $line_num = 0;
+
+#my %sorted_hash;
+foreach my $id_entry (@map_array) {
+       my $inter_type=$map_hash{$id_entry}->{'type'};
+       my $gene_map=$map_hash{$id_entry}->{'inter'};
+       #print "$id_entry\t$gene_map\n";
+       
+       # create array of interaction types
+       my @inter_array = split("\t", $inter_type);
+               
+       # create array of interaction genes
+       my @gene_map_array = split("\t", $gene_map);
+       
+       # check to see if ortholog exists for reference gene
+       if(exists($ortho_hash{$id_entry})) {
+               # create array of orthologs for reference gene
+               my @gene_ortho_array = split("\t", $ortho_hash{$id_entry}->{'ortho'});
+               # find the length of the array from map_hash to be used in loop
+               my $max_index = @inter_array;
+
+               # run for each ortholog for reference gene
+               foreach my $gene_ortho (@gene_ortho_array) {
+                       # for each element of the arrays inter_array and gene_map_array
+                       for (my $i=0; $i<$max_index; $i++) {
+                               my $gene_map_ortho=$gene_map_array[$i];
+                               # check to make sure that an ortholog for the interaction gene is defined
+                               if(exists($ortho_hash{$gene_map_ortho})) {
+                                       # create array of orthologs for interaction genes
+                                       my @gene_map_ortho_array = split("\t", $ortho_hash{$gene_map_ortho}->{'ortho'});
+                                       # loop through each interaction gene
+                                       foreach my $gene_map_inter (@gene_map_ortho_array) {
+                                               # if using paralogs, loop through paralogs for both genes
+                                               if ($para) {
+                                                       my @gene_1_array;
+                                                       my @gene_2_array;
+                                                       #print "$gene_ortho\n";
+                                                       if (exists($para_hash{$gene_ortho})) {
+                                                               @gene_1_array = split("\t", $para_hash{$gene_ortho}->{'gene2'});
+                                                               #print "filling array 1\n";
+                                                       }
+                                                       # this will add back the original gene to the beginning of the array
+                                                       unshift(@gene_1_array, $gene_ortho);
+                                                       #print "$gene_ortho\n array @gene_1_array\n";
+                                                       #print "$gene_ortho\t$gene_map_inter\t\t\t$gene_1_array[0]\t$gene_1_array[1]\n";
+                                                       
+                                                       if (exists($para_hash{$gene_map_inter})) {
+                                                               @gene_2_array = split("\t", $para_hash{$gene_map_inter}->{'gene2'});
+                                                               #print "filling array 2\n";
+                                                       }
+                                                       unshift(@gene_2_array, $gene_map_inter);
+                                                       #print "@gene_2_array\n";
+                                                       
+                                                       foreach my $gene_1 (@gene_1_array) {
+                                                               foreach my $gene_2 (@gene_2_array) {
+                                                                       if ($gene_1 gt $gene_2) {
+                                                                               my $temp = $gene_1;
+                                                                               $gene_1 = $gene_2;
+                                                                               $gene_2 = $temp;
+                                                                       }
+                                                                       $unsorted_hash{$line_num} = "$gene_1\t$inter_array[$i]\t$gene_2";
+                                                                       $line_num++;
+                                                                       #printf output_file "$gene_1\t$inter_array[$i]\t$gene_2\n";
+                                                                       #printf output_file "$gene_1\n";
+                                                               }
+                                                       }
+                                                       
+                                               } else {
+                                                       # check to see if the two entries are in alphatical order
+                                                       #   if they are not, swap them so they are
+                                                       #   Note that if the interaction type depends on direction, this breaks that
+                                                       if ($gene_ortho gt $gene_map_inter) { # if inter_type depends on direction, can extend the if statement to account for specific interactions to be skipped
+                                                               my $temp = $gene_ortho;
+                                                               $gene_ortho = $gene_map_inter;
+                                                               $gene_map_inter = $temp;
+                                                       }
+                                               
+                                                       $unsorted_hash{$line_num} = "$gene_ortho\t$inter_array[$i]\t$gene_map_inter";
+                                                       $line_num++;
+                                                       # print results to output file in sif format
+                                                       #printf output_file "$gene_ortho\t$inter_array[$i]\t$gene_map_inter\n";
+                                               }
+                                       }
+
+                               }
+                       }
+               }
+       }
+
+}
+
+print "min_ident = $min_ident\n";
+
+foreach my $key (sort keys %unsorted_hash) {
+               my $value = $unsorted_hash{$key};
+               print output_file "$value\n";
+       }
+print "done sorting\n";
+
+%unsorted_hash = ();
+close(output_file);
+# sort the file and get rid of duplicates
+#   Note: this requires replacing the file, hence the mv command
+#system "sort $out_file | uniq > $out_file.tmp; mv $out_file.tmp $out_file";
+# end loop of minimum identities ($min_ident)
+} 
+
+#sub by_ascii { $unsorted_hash{$a} <=> $unsorted_hash{$b} }
+       
diff --git a/interactome_scripts/stats_interact.pl b/interactome_scripts/stats_interact.pl
new file mode 100755 (executable)
index 0000000..d8e9a86
--- /dev/null
@@ -0,0 +1,103 @@
+#!/usr/bin/perl
+
+##############################################################
+#  Written by Justin Elser 2/23/09                           #
+#   Dept of BPP, Oregon State University                     #
+#   elserj@science.oregonstate.edu                           #
+#  Version 0.9                                               #
+#   this program is meant to be part of a pipeline including #
+#      interact.pl, therefore, it is written assuming the    #
+#      input files are from that program (interact.pl)       #
+#                                                            #
+#  still to do-----                                          #
+#       not all stats are computed yet                       #
+##############################################################
+
+use strict;
+use warnings;
+
+if ($#ARGV != 1) {
+       print "usage :   stats_interact.pl prefix_for_in_files output_file\n";
+       exit;
+}
+open(main_output_file, ">$ARGV[1]") || die "Error: file '$ARGV[1]' can not be opened\n";
+my $out_file = "$ARGV[1]";
+
+printf main_output_file "Minimum identity\tNum of lines\tUnique gene count\tMax Gene IDs\t# max Gene\t# min genes\n";
+
+for (my $min_ident=100; $min_ident>=0; $min_ident-=5) {
+       open (in_file_prefix, "$ARGV[0]" . "_$min_ident" . ".sif") || die "Error: file '$ARGV[0]' can not be opened\n";
+       
+       open (output_file, ">$ARGV[1]" . "_$min_ident") || die "Error: file '$ARGV[1]' can not be opened\n";
+
+       
+       # start doing the stats
+       #  earlier versions of the script did this using system calls to cat, grep, uniq, etc...  
+       #  implementing directly in perl has increased the performance of the program greatly
+       my $count = 0;  # total number of unique interactions
+       my %seen = ();  # hash that stores how many times each gene has been seen
+       while (my $line = <in_file_prefix>) {
+               $count++;
+               my ($gene_1,$inter,$gene_2) = split ("\t",$line);
+               chomp $gene_2;
+               if ($gene_1 eq $gene_2) {
+                       $seen{"$gene_1"}++;
+               }
+               else {
+                       $seen{"$gene_1"}++;
+                       $seen{"$gene_2"}++;
+               }
+       }
+       my $uniq_gene_count = keys %seen; # gives the count of unique genes
+       
+       # this block populates the max and min lists.  max_list holds the gene id(s) 
+       #   that comes up the most.  min_list holds the gene ids that only show up once
+       my $max = undef;
+       my @max_list = undef;
+       my @min_list = ();
+       while( my ($key, $value) = each %seen ) {
+               if(!defined $max) {
+                       $max = $value;
+                       @max_list = ($key);
+               } elsif ($seen{$key} == $max) {
+                       push @max_list, $key;
+               } elsif ($seen{$key} > $max) {
+                       $max = $value;
+                       @max_list = ($key);
+               }
+               if($value == 1) {
+                       push @min_list, $key;
+               }
+       }
+       
+       my $min_count = @min_list;
+       
+       printf main_output_file "$min_ident\t$count\t$uniq_gene_count\t@max_list\t$max\t$min_count\n";
+
+
+       
+       my %hash = ();
+       
+       # this block will calculate the number of genes that have occurred a given number of times
+       #  for example, if there are 10 genes that occur 5 times, the key of this hash is 5, value is 10
+       while( my ($key, $value) = each %seen ) {
+               $hash{"$value"}++;
+       }
+       
+       foreach my $key (sort by_number keys %hash) { # see the sub description below
+               my $value = $hash{$key};
+               print output_file "$key\t$value\n";
+       }
+       
+
+       close(in_file_prefix);
+       close(output_file);
+       
+}
+
+close(main_output_file);
+
+# the perl sort() sorts by ASCII value, not numerically, this subroutine fixes the sort
+sub by_number {
+       if ($a < $b) {-1} elsif ($a > $b) {1} else {0}
+}
diff --git a/interactome_scripts/stats_interact_Ath.pl b/interactome_scripts/stats_interact_Ath.pl
new file mode 100755 (executable)
index 0000000..16b3806
--- /dev/null
@@ -0,0 +1,103 @@
+#!/usr/bin/perl
+
+##############################################################
+#  Written by Justin Elser 2/23/09                           #
+#   Dept of BPP, Oregon State University                     #
+#   elserj@science.oregonstate.edu                           #
+#  Version 0.9                                               #
+#   this program is meant to be part of a pipeline including #
+#      interact.pl, therefore, it is written assuming the    #
+#      input files are from that program (interact.pl)       #
+#                                                            #
+#  still to do-----                                          #
+#       not all stats are computed yet                       #
+##############################################################
+
+use strict;
+use warnings;
+
+if ($#ARGV != 1) {
+       print "usage :   stats_interact_Ath.pl prefix_for_in_files output_file\n";
+       exit;
+}
+open(main_output_file, ">$ARGV[1]") || die "Error: file '$ARGV[1]' can not be opened\n";
+my $out_file = "$ARGV[1]";
+
+printf main_output_file "Num of lines\tUnique gene count\tMax Gene IDs\t# max Gene\t# min genes\n";
+
+
+       open (in_file_prefix, "$ARGV[0]") || die "Error: file '$ARGV[0]' can not be opened\n";
+       
+       open (output_file, ">$ARGV[1]" . "_num") || die "Error: file '$ARGV[1]' can not be opened\n";
+
+       
+       # start doing the stats
+       #  earlier versions of the script did this using system calls to cat, grep, uniq, etc...  
+       #  implementing directly in perl has increased the performance of the program greatly
+       my $count = 0;  # total number of unique interactions
+       my %seen = ();  # hash that stores how many times each gene has been seen
+       while (my $line = <in_file_prefix>) {
+               $count++;
+               my ($gene_1,$inter,$gene_2) = split ("\t",$line);
+               chomp $gene_2;
+               if ($gene_1 eq $gene_2) {
+                       $seen{"$gene_1"}++;
+               }
+               else {
+                       $seen{"$gene_1"}++;
+                       $seen{"$gene_2"}++;
+               }
+       }
+       my $uniq_gene_count = keys %seen; # gives the count of unique genes
+       
+       # this block populates the max and min lists.  max_list holds the gene id(s) 
+       #   that comes up the most.  min_list holds the gene ids that only show up once
+       my $max = undef;
+       my @max_list = undef;
+       my @min_list = ();
+       while( my ($key, $value) = each %seen ) {
+               if(!defined $max) {
+                       $max = $value;
+                       @max_list = ($key);
+               } elsif ($seen{$key} == $max) {
+                       push @max_list, $key;
+               } elsif ($seen{$key} > $max) {
+                       $max = $value;
+                       @max_list = ($key);
+               }
+               if($value == 1) {
+                       push @min_list, $key;
+               }
+       }
+       
+       my $min_count = @min_list;
+       
+       printf main_output_file "$count\t$uniq_gene_count\t@max_list\t$max\t$min_count\n";
+
+
+       
+       my %hash = ();
+       
+       # this block will calculate the number of genes that have occurred a given number of times
+       #  for example, if there are 10 genes that occur 5 times, the key of this hash is 5, value is 10
+       while( my ($key, $value) = each %seen ) {
+               $hash{"$value"}++;
+       }
+       
+       foreach my $key (sort by_number keys %hash) { # see the sub description below
+               my $value = $hash{$key};
+               print output_file "$key\t$value\n";
+       }
+       
+
+       close(in_file_prefix);
+       close(output_file);
+       
+
+
+close(main_output_file);
+
+# the perl sort() sorts by ASCII value, not numerically, this subroutine fixes the sort
+sub by_number {
+       if ($a < $b) {-1} elsif ($a > $b) {1} else {0}
+}
diff --git a/interactome_scripts/tree_script.sh b/interactome_scripts/tree_script.sh
new file mode 100755 (executable)
index 0000000..0269100
--- /dev/null
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+#[ -r $1 ] || [ $2="rice" ] || (echo "usage: $0 input_file rice/Ath" exit)
+
+if [ ! -r $1 ]
+then
+       echo "usage: $0 input_file rice/Ath"
+       exit
+fi
+
+case $2 in
+Ath)
+alphas=( At2g23070 At3g50000 At5g67380 At2g23080 )
+betas=( At4g17640 At5g47080 At3g60250 At2g44680 )
+;;
+rice)
+alphas=( LOC_Os03g55490 LOC_Os03g10940 LOC_Os03g55389 LOC_Os07g02350 )
+betas=( LOC_Os10g41520 LOC_Os07g31280 )
+;;
+*)
+echo "usage: $0 input_file rice/Ath"
+exit
+;;
+esac
+
+first=1
+for id in ${alphas[@]}
+do
+       if [ $first -eq 1 ]
+       then 
+       first=0
+       alphagrep="$id"
+       else
+       alphagrep="$echo $alphagrep|$id"
+       fi
+done
+
+first=1
+for id in ${betas[@]}
+do
+        if [ $first -eq 1 ]
+        then 
+       first=0
+        betagrep="$id"
+       else
+        betagrep="$echo $betagrep|$id"
+        fi
+done
+
+case $2 in
+Ath)
+egrep -i ""$alphagrep"" $1 | egrep -i ""$betagrep"" | sed "s/${alphas[0]}/${alphas[0]}_alpha/gi" | sed "s/${alphas[1]}/${alphas[1]}_alpha/gi" | sed "s/${alphas[2]}/${alphas[2]}_alpha/gi" | sed "s/${alphas[3]}/${alphas[3]}_alpha/gi" | sed "s/${betas[0]}/${betas[0]}_beta/gi" | sed "s/${betas[1]}/${betas[1]}_beta/gi" | sed "s/${betas[2]}/${betas[2]}_beta/gi" | sed "s/${betas[3]}/${betas[3]}_beta/gi" 
+;;
+rice)
+egrep -i ""$alphagrep"" $1 | egrep -i ""$betagrep"" | sed "s/${alphas[0]}/${alphas[0]}_alpha/gi" | sed "s/${alphas[1]}/${alphas[1]}_alpha/gi" | sed "s/${alphas[2]}/${alphas[2]}_alpha/gi" | sed "s/${alphas[3]}/${alphas[3]}_alpha/gi" | sed "s/${betas[0]}/${betas[0]}_beta/gi" | sed "s/${betas[1]}/${betas[1]}_beta/gi"
+;;
+esac
diff --git a/server_admin_scripts/linkchecker.pl b/server_admin_scripts/linkchecker.pl
new file mode 100755 (executable)
index 0000000..1b42ca0
--- /dev/null
@@ -0,0 +1,435 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use English qw( -no_match_vars );
+use File::Basename;
+use Getopt::Long;
+use Gramene::Config;
+use HTML::Lint;
+use Mail::Sendmail;
+use Pod::Usage;
+use Readonly;
+use WWW::Mechanize::Timed;
+use YAML qw(Load);
+
+$| = 1;
+
+Readonly my $VERSION   
+    => sprintf '%d.%02d', qq$Revision: 1.14 $ =~ /(\d+)\.(\d+)/;
+
+my %parents         = ();
+my %failed          = ();
+my %previously_seen = ();
+
+my $cfile   =       Gramene::Config->new;
+my %options = (
+    'noisy'      => 0, 
+    'validate'   => 0,
+    'send_email' => 0,
+    'site'       => $cfile->get('link_checker')->{'site'},
+    'man'        => 0,
+    'version'    => 0,
+    'email_from' => $cfile->get('link_checker')->{'email_from'},
+    'email_to'   => $cfile->get('link_checker')->{'email_to'},
+    'depth'      => 10_000_000,        #stop checking after a depth of 10,000,000 (essentially, "don't stop")
+    'content_test_file'  => $cfile->get('link_checker')->{'content_test_file'},
+    'white_list_file'      => $cfile->get('link_checker')->{'white_list_file'},
+    'black_list_file'      => $cfile->get('link_checker')->{'black_list_file'},
+);
+
+GetOptions(
+    \%options,
+    'noisy',
+    'validate',
+    'send_email',
+    'to_console',
+    'site=s',
+    'email_from=s',
+    'email_to=s',
+    'content_test_file=s',
+    'white_list_file=s',
+    'black_list_file=s',
+    'depth=i',
+    'help',
+    'man',
+    'version',
+);
+
+if ( $options{'help'} || $options{'man'} ) {
+    pod2usage({
+        -exitval => 0,
+        -verbose => $options{'man'} ? 2 : 1
+    });
+}; 
+
+if ( $options{'version'} ) {
+    my $prog = basename( $PROGRAM_NAME );
+    print "$prog v$VERSION\n";
+    exit 0;
+}
+my ($global_tests, $url_tests) = ({}, {});
+
+if ( $options{'content_test_file'} ) {
+    open (my $test_fh, "<", $options{'content_test_file'}) or die "Could not open $options{'content_test_file'}";
+    local $/ = undef;
+    my $file_contents = <$test_fh>;
+    close $test_fh;
+    ($global_tests, $url_tests) = Load($file_contents);
+}
+
+my ($noisy, $validate, $send_email, $site, $from, $to, $depth) = @options{qw(noisy validate send_email site email_from email_to depth)};
+
+if ( $site !~ m{^http://} ) {
+    $site = 'http://' . $site;
+}
+
+$site = fix_link($site);
+
+my $white_list = {};
+my $black_list = {};
+foreach my $set ((['white_list_file', $white_list], ['black_list_file', $black_list])) {
+       my ($option, $list) = @$set;
+       if ( $options{$option} ) {
+               open (my $list_fh, "<", $options{$option}) or die "Could not open $options{$option}";
+               local $/ = undef;
+               my $file_contents = <$list_fh>;
+               close $list_fh;
+               my $url_array = Load($file_contents);
+               %$list = map {($_, 1)} map {/^http/ ? $_ : $site . $_} @$url_array;
+
+       }
+}
+
+my $message         =   undef;
+my (@slow, @invalid, @failed, @content_errors);
+
+
+my @links = ([$site, $depth]);
+
+while (@links) {
+
+       my $set = shift @links;
+
+       my ($link, $depth) = @$set;
+
+       #make any formatting mods we want to the link
+       $link = fix_link($link);
+       
+       next if $depth <= 0;
+    next if ! traceable_url($link);
+    will_grab_link($link);
+
+    push @links, grab_links($link, $depth - 1);
+
+}
+
+# failures are a special case. We need to format at the end, so we can ensure that we include all parents that reference this link as a failure.
+if (@failed) {
+       foreach my $link (@failed) {
+               $link = format_failed_link($link);
+       }
+}
+
+if (@slow || @invalid || @failed || @content_errors) {
+    @slow    = ("No slow links found\n") if !@slow;
+    @failed  = ("No failed found\n")     if !@failed;
+    @content_errors = ("No content errors found\n") if ! @content_errors;
+    $message .= "Slow links:\n===========\n" . join('', @slow) . "\n";
+    $message .= "Failed links:\n=============\n" . join('', @failed) . "\n";
+    $message .= "Validation errors:\n================\n" . join('', @invalid) . "\n" if @invalid;
+    $message .= "Content errors:\n================\n" . join('', @content_errors) . "\n" if @content_errors;
+}
+else {
+    $message = "No problems found";
+}
+
+#if we want to send the message AND we actually have one to send AND we have somewhere to send it, then send it...
+if ($send_email && $message && $to && $from) {
+    my %mail = (
+        'To'        => $to,
+        'From'      => $from,
+        'Subject'   => "Gramene broken links on $site at " . localtime,
+        'Message'   => $message,
+    );
+
+    sendmail(%mail) or die $Mail::Sendmail::error;
+}
+else {
+    print "$message\n";
+}
+
+# this function has been modified. Instead of a list of links, it now returns a list of arrayrefs.
+# the first element is the link, and the second is the depth at which it was found.
+# if the depth hits 0, we stop
+sub grab_links {
+    my $link    = shift or die "Cannot grab links w/o url";
+    my $depth   = shift;
+
+    my $mech = WWW::Mechanize::Timed->new();
+    
+       return () if previously_seen_url($link);
+    
+    $mech->get($link);
+    
+    my $time = $mech->client_response_receive_time;
+    
+    link_is_slow($link, $time) if is_slow($time);
+    
+    if ($mech->status =~ /^2/) {
+    
+        if ( $mech->is_html ) {
+            my $lint = HTML::Lint->new();
+            $lint->parse( $mech->content );
+            
+            validation_errors($link, $lint->errors) if $lint->errors;
+            
+            if ($options{'content_test_file'}) {
+               check_contents($link, $mech->content);
+            }
+            
+        }
+            
+        my @links = map {$_->URI->abs} $mech->links;
+        foreach my $child (@links) {
+                       $child = fix_link($child);
+            push @{$parents{$child}}, $link;
+        }
+        return map {[ $_, $depth - 1]} @links;
+    }
+    else {
+        $failed{$link} = $mech->status;
+        failed_link($link);
+        return ();
+    }
+}
+
+sub fix_link {
+       my $link = shift;
+       
+       return $link if $white_list->{$link};
+       
+       #ice any anchors. For our purposes, they're superfluous.
+       $link =~ s/#.+//;
+       #nuke query strings, too. To try and cut down on links traversed.
+       $link =~ s/\?.+//;
+       #and drop any trailing slashes
+       $link =~ s!/$!!;
+       
+       return $link;
+}
+
+# Returns a boolean value whether we think a link is slow or not.
+
+sub is_slow {
+    my $time = shift || 0;
+    
+    return $time > 2;
+}
+
+# These are the URLs that we assume we want to follow.
+
+sub traceable_url {
+    my $link = shift;
+
+    return 0 if
+        $link !~ m!$site!
+        || $link =~ /^javascript:/
+        || $link !~ /http/
+        || $black_list->{$link}
+        #|| $previously_seen{$link}++
+    ;
+    
+    return 1;
+}
+
+sub previously_seen_url {
+       my $link = shift;
+       return $previously_seen{$link}++;
+}
+
+#the various IO functions
+sub will_grab_link {
+    my $link = shift;
+    
+    my $output = "Grabbing ($link)\n";
+    
+    print STDERR $output if $noisy;
+    
+    #$message .= $output if $validate;
+}
+
+sub link_is_slow {
+
+    my $link        = shift;
+    my $time        = shift;
+    
+    my $output      = "Slow link: $link - took $time seconds\n\n";
+    
+    push @slow, $output;
+    
+    print STDERR $output if $noisy;
+}
+
+sub validation_errors {
+
+    my $page    = shift;
+    my @errors  = @_;
+    
+    my $output  = "Validation errors on page $page\n";
+    
+    foreach my $error (@errors) {
+        $output .= "\t" . $error->as_string . "\n";
+    }
+
+    $output .= "\n";
+    
+    if ($validate) {
+        push @invalid, $output;
+    }
+    
+    print STDERR $output if $noisy && $validate;
+}
+
+sub failed_link {
+    my $link    = shift;
+    
+    push @failed, $link;
+    
+    print STDERR format_failed_link($link) if $noisy;
+}
+
+sub format_failed_link {
+       my $link    = shift;
+    my $output  = "FAILED $link: " . $failed{$link} . "\n";
+    $output    .= "linked from:\n";
+    my $seen    = {};
+    foreach my $parent (grep {! $seen->{$_}++} @{$parents{$link}}) {
+        $output .= "\t$parent\n";
+    }
+
+    $output .= "\n";
+    
+    return $output;
+}
+
+sub check_contents {
+       my $link    = shift;
+       my $content = shift;
+       
+       my @should_find     = @{ $global_tests->{'valid'} };
+       my @should_not_find = @{ $global_tests->{'invalid'} };
+       
+       if ($url_tests->{$link}) {
+               push @should_find,     @{ $url_tests->{$link}->{'valid'} };
+               push @should_not_find, @{ $url_tests->{$link}->{'invalid'} };
+       }
+       
+       my @return = ();
+       
+       foreach my $should (@should_find) {
+               if ($content !~ /$should/) {
+                       push @return, "  Could not find text: $should";
+               }
+       }
+       
+       foreach my $shouldnt (@should_not_find) {
+               if ($content =~ /$shouldnt/) {
+                       push @return, "  Incorrectly found text: $shouldnt";
+               }
+       }
+       
+       if (@return) {
+               my $output = join("\n", "CONTENT ERROR: $link", @return) . "\n";
+               push @content_errors, $output;
+               print STDERR $output if $noisy;
+       }
+               
+}
+__END__
+
+# ----------------------------------------------------
+
+=head1 NAME
+
+linkchecker.pl - check links on a website
+
+=head1 VERSION
+
+This documentation refers to linkchecker.pl version $Revision: 1.14 $
+
+=head1 SYNOPSIS
+
+  linkchecker.pl [options]
+
+Options:
+
+  --noisy       Prints out progress to STDERR as the program 
+                is running.  Defaults to 0
+  --validate    Includes HTML validation errors. Defaults to 0.
+                WARNING - this can produce a lot of output
+  --send_email  Will send out the results report to the address 
+                specified in email_to in the linkchecker section of
+                the config file If this flag isn't set, the final
+                report will be printed to the console instead. This
+                flag should be set in the crontab
+  --email_from  The email address the report will be sent from.
+                Defaults to the address in the config file
+  --email_to    The email address the report will be sent to.
+                This is usually a group mailing list.
+                Defaults to the address in the config file
+  --site        The site you want to run the checker on. Defaults 
+                to the site in the link_checker section of the config
+  --depth       depth to follow links to, based off of the front page. 
+                --depth 2 follows links two deep, --depth 1 one, and so on.
+                file
+  --content_test_file This is a YAML file that contains information on text
+                that you should or should NOT find at the given URL. Reads
+                the file from the config file by default. See full docs for YAML
+                file format.
+  --white_list_file URLs are normally cleaned up a little bit. Anchors are dropped,
+                query strings are dropped, and so on. The white list file is a YAML
+                file that lets us provide links that should be viewed as is.
+  --black_list_file a YAML file containing URLs that we don't want to visit on the
+                site, for whatever reason.
+  --help        Show brief help and exit
+  --man         Show full documentation
+  --version     Show version and exit
+
+=head1 DESCRIPTION
+
+Checks your links. By default, runs against the site listed in the
+link_checker section of the gramene config file.
+
+=head1 AUTHOR
+
+Jim Thomason E<lt>thomason@cshl.eduE<gt>.
+
+=head1 EXAMPLE YAML
+---
+valid:
+  - Gramene
+invalid:
+  - Error
+---
+http://dev.gramene.org :
+  valid:
+    - Release #29
+    - Gramene
+  invalid:
+    - Release #28
+    - Gramene Sucks
+
+The first YAML doc section contains global values. Things we should find on every
+or no page. The second doc section refers to specific content at individual URLs.
+
+See http://www.yaml.org/ for information on YAML docs.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2007-2009 Cold Spring Harbor Laboratory
+
+This library is free software;  you can redistribute it and/or modify 
+it under the same terms as Perl itself.
+
+=cut