From b8018903e38a18b6aa6cc7ab3e4924c192d79352 Mon Sep 17 00:00:00 2001 From: athreyab Date: Tue, 1 May 2012 00:20:22 +0000 Subject: [PATCH] bash script for creating singleton/pair svn path=/; revision=326 --- .../athreyab/illumina_parse/create_pairs | 117 ++++++++++++++++++ .../athreyab/illumina_parse/float_compare.pl | 12 ++ 2 files changed, 129 insertions(+) create mode 100755 Personnel/athreyab/illumina_parse/create_pairs create mode 100644 Personnel/athreyab/illumina_parse/float_compare.pl diff --git a/Personnel/athreyab/illumina_parse/create_pairs b/Personnel/athreyab/illumina_parse/create_pairs new file mode 100755 index 0000000..8626f03 --- /dev/null +++ b/Personnel/athreyab/illumina_parse/create_pairs @@ -0,0 +1,117 @@ +#!/bin/bash + +function process_file{ + array=("$@") + for i in "${array[@]}" + do + echo "processing file ${array[i]}" + while read line ; do + if [ $line_index == 0 ] + then + if [[ "$line" == *1:Y:* ]] + then + out="" + else + out="$out$line" + fi + line_index=$((line_index+1)) + elif [ $line_index == 1 ] + then + if [ -z "$out" ] + then + out="" + else + ns="${line//[^C]}" + perl float_compare.pl "${#line}" "${#ns}" "${max_rate}" + r=$? + if [ $r -eq 1 ] + then + out="$out||$line" + echo $out >> "$output_folder/file_$i"; + out="" + else + out="" + fi + fi + line_index=$((line_index+1)) + elif [ $line_index == 2 ] + then + line_index=$((line_index+1)) + + elif [ $line_index == 3 ] + then + line_index=0 + fi + done < "${array[i]}" + done +} + +input_folder=$1 +output_folder=$2 +max_rate=$3 + +echo -e "max_rate is $max_rate \n" + +#check if the input and output folder are defined in command line +if [ -z "$input_folder" ] +then + echo -e "Input folder is not defined \n" + exit; +elif [ -z "$output_folder" ] +then + echo -e "Output folder is not defined \n" + exit; +elif [ -z "$max_rate" ] +then + echo -e "max rate is not defined \n" + +else + if [ -d "$output_folder" ]; then + echo "output directory already exists. Exiting now!" + exit; + fi + + mkdir $output_folder + #create two temp files - array1.txt and array2.txt. array1.txt contains all files from reading 1 in sorted order + #array2.txt contains all files from reading 2 in sorted order + ls $input_folder*R1*.fastq | sort > array1.txt + ls $input_folder*R2*.fastq | sort > array2.txt + + #put the fileNames from reading 1 in array - array1 + index1=0 + while read line ; do + array1[$index1]="$line" + index1=$(($index1+1)) + done < array1.txt + + #put the fileNames from reading 2 in array - array2 + index2=0 + while read line ; do + array2[$index2]="$line" + index2=$(($index2+1)) + done < array2.txt + + # if array1 and array2 are not of equal size, then + # some files are missing. + if [ $index1 != $index2 ] + then + echo "some readings are missing\n"; + exit; + fi + + # delete temp files as we dont' need them anymore. We already + # have that information in array1 and array2 + rm -f array1.txt + rm -f array2.txt + + i=0 + line_index=0 + out="" + remove_entry=0 + + # loop through each files in array1, filter them and put in a file + process_file "${array1[@]}" + process_file "${array2[@]}" +fi + + diff --git a/Personnel/athreyab/illumina_parse/float_compare.pl b/Personnel/athreyab/illumina_parse/float_compare.pl new file mode 100644 index 0000000..603f89f --- /dev/null +++ b/Personnel/athreyab/illumina_parse/float_compare.pl @@ -0,0 +1,12 @@ +#!/usr/bin/perl + +$n1 = $ARGV[0]; +$n2 = $ARGV[1]; +$n3 = $ARGV[2]; + +if($n2/$n1 <= $n3){ + exit 1; +} +else{ + exit 0; +} -- 2.34.1