### OUTPUT : fasta files sorted by categories (clean, contam, dubious, lowcov, overexp) if [ $OUTPUTLEVEL == "1" ]; then echo -e "\nfasta files will not be written (output level set to '1')" elif [ $OUTPUTLEVEL == "2" ]; then echo -e "\nWriting categorized transcriptomes - clean and low coverage only (it might take some time)" for fasta in $out/*.fasta_mod; do ref=`basename $fasta .fasta_mod` echo -e "\t$ref" # version awk awk -v ref=$ref -v out=$out 'BEGIN{RS=">"; FS="\t"} NR>1 { sub("\n","\t"); gsub("\n","",$0); ident =split($1,a," ") #N.B the seq idents have been changed in line 37 (supr all text after the first space) print a[1]"\t"$2 > out"/"ref".togrep" #grpcmd = "LC_ALL=C fgrep -w -c -m 1 \""a[1] "\" " #go="_clean" # default state is clean #if (system(" [ -f " out"/"ref".lowcov ]") == 0) { cmd = grpcmd out"/"ref".lowcov"; cmd |& getline ret; close(cmd); if ( ret != 0 ) { go="_lowcov" } } #print RS$1"\n"$2 > out"/"ref""go".fasta" }' $fasta # tentative de sortie du awk pour acceleration for suff in lowcov contam dubious overexp; do if [ ! -f "$out/$ref.$suff" ]; then echo "" > $out/$ref.$suff; fi done cat $out/$ref".togrep" | while read line; do ctg=`echo $line | cut -d' ' -f1` go="_clean" if LC_ALL=C grep -F -q -w -m1 "$ctg" $out/$ref.lowcov && LC_ALL=C grep -F -q -w -m1 "$ctg" $out/$ref.all ; then go="_lowcov" fi echo ">"$line >> $out"/"$ref""$go".fasta" done for f in $out/$ref\_*.fasta; do sed -i 's/ /\n/g' $f done # fin de tentative if [ -f $out/$ref"_clean.fasta" ]; then sed -i "s/$ref|//g" $out/$ref"_clean.fasta"; fi if [ -f $out/$ref"_lowcov.fasta" ]; then sed -i "s/$ref|//g" $out/$ref"_lowcov.fasta"; fi done elif [ $OUTPUTLEVEL == "3" ]; then echo -e "\nWriting categorized transcriptomes - all categories (it might take some time)" for fasta in $out/*.fasta_mod; do ref=`basename $fasta .fasta_mod` echo -e "\t$ref" # version awk : 11 mn 35 s awk -v ref=$ref -v out=$out 'BEGIN{RS=">"; FS="\t"} NR>1 { sub("\n","\t"); gsub("\n","",$0); ident =split($1,a," ") #N.B the seq idents have been changed in line 37 (supr all text after the first space) print a[1]"\t"$2 > out"/"ref".togrep" #grpcmd = "LC_ALL=C grep -F -w -c -m 1 \""a[1] "\" " #go="_clean" # default state is clean #if (system(" [ -f " out"/"ref".lowcov ]") == 0) { cmd = grpcmd out"/"ref".lowcov"; cmd |& getline ret; close(cmd); if ( ret != 0 ) { go="_lowcov" } } #if (system(" [ -f " out"/"ref".contam ]") == 0 && ret == 0) { cmd = grpcmd out"/"ref".contam"; cmd |& getline ret; close(cmd); if ( ret != 0 ) { go="_contam"} } #if (system(" [ -f " out"/"ref".dubious ]") == 0 && ret == 0) { cmd = grpcmd out"/"ref".dubious"; cmd |& getline ret; close(cmd); if ( ret != 0 ) { go="_dubious"} } #if (system(" [ -f " out"/"ref".overexp ]") == 0 && ret == 0) { cmd = grpcmd out"/"ref".overexp"; cmd |& getline ret; close(cmd); if ( ret != 0 ) { go="_overexp"} } #print RS$1"\n"$2 > out"/"ref""go".fasta" }' $fasta # tentative de sortie du awk pour acceleration : 8 mn 51 s for suff in lowcov contam dubious overexp; do if [ ! -f "$out/$ref.$suff" ]; then echo "" > $out/$ref.$suff; fi done cat $out/$ref".togrep" | while read line; do ctg=`echo $line | cut -d' ' -f1` go="_clean" if LC_ALL=C grep -F -q -w -m1 "$ctg" $out/$ref.lowcov && LC_ALL=C grep -F -q -w -m1 "$ctg" $out/$ref.all ; then go="_lowcov" # attention : $out/$ref.lowcov contient aussi les non-supect !!! elif LC_ALL=C grep -F -q -w -m1 "$ctg" $out/$ref.contam && LC_ALL=C grep -F -q -w -m1 "$ctg" $out/$ref.all ; then go="_contam" elif LC_ALL=C grep -F -q -w -m1 "$ctg" $out/$ref.dubious && LC_ALL=C grep -F -q -w -m1 "$ctg" $out/$ref.all ; then go="_dubious" elif LC_ALL=C grep -F -q -w -m1 "$ctg" $out/$ref.overexp && LC_ALL=C grep -F -q -w -m1 "$ctg" $out/$ref.all ; then go="_overexp" fi echo ">"$line >> $out"/"$ref""$go".fasta" done for f in $out/$ref\_*.fasta; do sed -i 's/ /\n/g' $f done # fin de tentative if [ -f $out/$ref"_clean.fasta" ]; then sed -i "s/$ref|//g" $out/$ref"_clean.fasta"; fi if [ -f $out/$ref"_lowcov.fasta" ]; then sed -i "s/$ref|//g" $out/$ref"_lowcov.fasta"; fi if [ -f $out/$ref"_dubious.fasta" ]; then sed -i "s/$ref|//g" $out/$ref"_dubious.fasta"; fi if [ -f $out/$ref"_overexp.fasta" ]; then sed -i "s/$ref|//g" $out/$ref"_overexp.fasta"; fi if [ -f $out/$ref"_contam.fasta" ]; then sed -i "s/$ref|//g" $out/$ref"_contam.fasta"; fi done else echo -e "\nwarning : output level value must be set to either '1', '2' or '3' (default = '2')" fi