Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
bash script for creating singleton/pair
authorathreyab <athreyab@localhost>
Tue, 1 May 2012 00:20:22 +0000 (00:20 +0000)
committerathreyab <athreyab@localhost>
Tue, 1 May 2012 00:20:22 +0000 (00:20 +0000)
svn path=/; revision=326

Personnel/athreyab/illumina_parse/create_pairs [new file with mode: 0755]
Personnel/athreyab/illumina_parse/float_compare.pl [new file with mode: 0644]

diff --git a/Personnel/athreyab/illumina_parse/create_pairs b/Personnel/athreyab/illumina_parse/create_pairs
new file mode 100755 (executable)
index 0000000..8626f03
--- /dev/null
@@ -0,0 +1,117 @@
+#!/bin/bash
+
+function process_file{
+       array=("$@")
+       for i in "${array[@]}"
+       do      
+               echo "processing file ${array[i]}"
+               while read line ; do
+                       if [ $line_index == 0 ]
+                       then
+                               if [[ "$line" == *1:Y:* ]]
+                               then
+                                       out=""
+                               else
+                                       out="$out$line"
+                               fi
+                               line_index=$((line_index+1))
+                       elif [ $line_index == 1 ]
+                       then
+                               if [ -z "$out" ]
+                               then
+                                       out=""
+                               else
+                                       ns="${line//[^C]}"
+                                       perl float_compare.pl "${#line}" "${#ns}" "${max_rate}"
+                                       r=$?
+                                       if [ $r -eq 1 ]
+                                       then                                                            
+                                               out="$out||$line"
+                                               echo $out >> "$output_folder/file_$i";
+                                               out=""
+                                       else
+                                               out=""
+                                       fi
+                               fi
+                               line_index=$((line_index+1))
+                       elif [ $line_index == 2 ]
+                       then            
+                               line_index=$((line_index+1))
+
+                       elif [ $line_index == 3 ]
+                       then            
+                               line_index=0
+                       fi
+               done < "${array[i]}"
+       done
+}
+
+input_folder=$1
+output_folder=$2
+max_rate=$3
+
+echo -e "max_rate is $max_rate \n"
+
+#check if the input and output folder are defined in command line
+if [ -z "$input_folder" ]
+then
+       echo -e "Input folder is not defined \n"
+       exit;   
+elif [ -z "$output_folder" ]
+then
+       echo -e "Output folder is not defined \n"
+       exit;
+elif [ -z "$max_rate" ]
+then
+       echo -e "max rate is not defined \n"
+
+else
+       if [ -d "$output_folder" ]; then
+               echo "output directory already exists. Exiting now!"
+               exit;
+       fi
+
+       mkdir $output_folder
+       #create two temp files - array1.txt and array2.txt. array1.txt contains all files from reading 1 in sorted order
+       #array2.txt  contains all files from reading 2 in sorted order
+       ls $input_folder*R1*.fastq | sort > array1.txt
+       ls $input_folder*R2*.fastq | sort > array2.txt
+       
+       #put the fileNames from reading 1 in array - array1
+       index1=0
+       while read line ; do
+               array1[$index1]="$line"
+               index1=$(($index1+1))
+       done < array1.txt
+
+       #put the fileNames from reading 2 in array - array2
+       index2=0
+        while read line ; do
+                array2[$index2]="$line"
+                index2=$(($index2+1))
+       done < array2.txt
+
+       #  if array1 and array2 are not of equal size, then
+       # some files are missing.
+       if [ $index1 != $index2 ]
+       then
+               echo "some readings are missing\n";
+               exit;
+       fi
+
+       # delete temp files as we dont' need them anymore. We already
+       # have that information in array1 and array2
+       rm -f array1.txt
+       rm -f array2.txt
+
+       i=0
+       line_index=0
+       out=""
+       remove_entry=0
+       
+       # loop through each files in array1, filter them and put in a file
+       process_file "${array1[@]}"
+       process_file "${array2[@]}"
+fi
+
+
diff --git a/Personnel/athreyab/illumina_parse/float_compare.pl b/Personnel/athreyab/illumina_parse/float_compare.pl
new file mode 100644 (file)
index 0000000..603f89f
--- /dev/null
@@ -0,0 +1,12 @@
+#!/usr/bin/perl
+
+$n1 = $ARGV[0];
+$n2 = $ARGV[1];
+$n3 = $ARGV[2];
+
+if($n2/$n1 <= $n3){
+       exit 1;
+}
+else{
+       exit 0;
+}