From e2bdb1c70b4d33990dd17fe4250e3c3424a2a00d Mon Sep 17 00:00:00 2001 From: athreyab Date: Mon, 16 Apr 2012 20:47:29 +0000 Subject: [PATCH] illumina parse scripts svn path=/; revision=325 --- .../athreyab/illumina_parse/illumina_parse.pl | 121 ++++++++++++++++++ .../interactions/interactionPathsFromTsv.pl | 4 +- 2 files changed, 123 insertions(+), 2 deletions(-) create mode 100755 Personnel/athreyab/illumina_parse/illumina_parse.pl diff --git a/Personnel/athreyab/illumina_parse/illumina_parse.pl b/Personnel/athreyab/illumina_parse/illumina_parse.pl new file mode 100755 index 0000000..d876a78 --- /dev/null +++ b/Personnel/athreyab/illumina_parse/illumina_parse.pl @@ -0,0 +1,121 @@ +#!/usr/bin/perl + +use Switch; + + +$optionId = $ARGV[0]; + +if($optionId eq "-co" && ($#ARGV == 2 || $#ARGV == 3)){ + $in_folder = $ARGV[1]; + $out_folder = $ARGV[2]; + $slash_string = $ARGV[3]; + if($slash_string eq ""){ + $slash_string = '\\'; + } + process_folders($in_folder,$out_folder,"",$slash_string); +} + +elsif($optionId eq "-fi" && ($#ARGV == 3 || $#ARGV == 4)){ + $in_folder = $ARGV[1]; + $out_folder = $ARGV[2]; + $max_rate = $ARGV[3]; + $slash_string = $ARGV[4]; + if($max_rate =~ /[^0-9\.]/){ + print "\nmax_rate supplied: '$max_rate', is not a number. check if you supplied all 4 params - option(-fi), an input directory, output directory and a max_rate \n"; + showUsage(); + } + if($slash_string eq ""){ + $slash_string = '\\'; + } + process_folders($in_folder,$out_folder,$max_rate,$slash_string); +} + +else{ + print "Incorrect parameters were supplied\n"; + showUsage(); +} + +sub showUsage +{ + print "\nUsage: \n"; + print "convert fastq to fasta format:\t./illumina_parse -co []\n"; + print "filter low quality entries:\t./illumina_parse -fi []\n"; + print "[] = optional parameter\n"; + die ""; +} + +sub isnan { ! defined( $_[0] <=> 9**9**9 ) } + +sub filterRead { + my $seq =shift; + my $maxRate =shift; + my $length =length($seq); + my $count =0; + $count++ while $seq=~m/N/g; + $count++ while $seq=~m/\./g; + return 1 if(($count/$length)<=$maxRate); + return 0; +} + +sub process_folders{ + $in_folder = $_[0]; + $out_folder = $_[1]; + $max_rate = $_[2]; + $slash_string = $_[3]; + opendir(DIR, $in_folder) or die "can't open directory $in_folder: $!\n"; + unless(-d $out_folder){ + mkdir $out_folder or die "can't create directory $out_folder: $! \n"; + } + @files = readdir DIR; + $y_entries = 0; + $total_entries = 0; + $high_qual_entries = 0; + foreach $fileName (@files) { + if(index($fileName,'.fastq') != -1){ + open(in_file, "<","$in_folder/$fileName") || die "Error: file '$in_folder/$fileName' can not be opened\n"; + print "processing file $in_folder/$fileName\n"; + $fileName =~ s/.fastq/.fa/; + open(out_file,">>","$out_folder/$fileName") || die "Error: file $out_folder/$fileName can not be opened\n"; + until(eof(in_file)){ + $h1=; + $seq=; + $h2=; + $qual=; + $total_entries = $total_entries + 1; + chomp $h1; + chomp $seq; + $h1=~s/\@/>/; + if(index($h1, ' 1:Y') != -1 || index($h1,' 2:Y') != -1){ + $y_entries = $y_entries + 1; + next; + } + if(index($h1, ' 1:N') != -1){ + $temp = $slash_string."1:N"; + $h1 =~ s/ 1:N/$temp/; + $h1 = $h1.$slash_string."1"; + } + elsif(index($h1, ' 2:N') != -1){ + $temp = $slash_string."2:N"; + $h1 =~ s/ 2:N/$temp/; + $h1 = $h1.$slash_string."2"; + } + + if($max_rate eq ""){ + print out_file "$h1\n$seq\n"; + } + elsif(filterRead($seq,$max_rate)){ + print out_file "$h1\n$seq\n"; + $high_qual_entries = $high_qual_entries + 1; + } + } + close in_file; + close out_file; + print "successfully written $fileName\n"; + } + } + print "total entries: $total_entries\n"; + print "low quality entries(entries containing 1:Y or 2:Y in the header): $y_entries\n"; + $qual_entries = $total_entries - $high_qual_entries; + print "entries removed due to exceeding max_rate: $qual_entries \n"; + closedir DIR; +} diff --git a/Personnel/athreyab/interactions/interactionPathsFromTsv.pl b/Personnel/athreyab/interactions/interactionPathsFromTsv.pl index 950f0aa..c81d3dd 100644 --- a/Personnel/athreyab/interactions/interactionPathsFromTsv.pl +++ b/Personnel/athreyab/interactions/interactionPathsFromTsv.pl @@ -75,11 +75,11 @@ sub importSeedInteractionData(){ print "interaction_id:$interaction_id\n"; #if not, insert data into to the database if(isEmpty($interaction_id)){ - $query = "INSERT INTO Interaction(`object_id_left`,`object_id_right`,`interaction_type_id`,`mode_of_action_id`, + $query = "INSERT INTO Interaction(`object_id_left`,`object_id_right`,`interaction_type_id`,`mode_of_action_id`, `interactor_type_id_left`,`interactor_type_id_right`,`evidence_id`,`evidence_code_id`,`experiment_id`,`comments`,`curator_id`,`date`) VALUES ('$obj_id_left','$obj_id_right','$interaction_type_id','$mode_of_action_id','$int_type_id_left','$int_type_id_right' ,'$evidence_id','$evidence_code_id','$experiment_id','$comments','$curator_id', '".strftime("%Y-%m-%d", localtime)."')"; - executeDbQuery($query); + executeDbQuery($query); } } } -- 2.34.1