### OUTPUT : basic statistics (cross_contamination_summary and cross_contamination_profiles files) # removing unnecessary files rm -f $out/ALL_transcripts.all rm -f $out/ALL_transcripts.fasta_suspect # computing stats for every sample echo -e "\nWriting decontamination summary\n\tCroCo_summary\n\tCroCo_profiles" col_ratio=$((`ls $out/*.fasta_suspect | wc -l`+3)) echo -e "species\ttotal_contigs\tnever_suspected\tnb_suspects\tnb_clean\tnb_lowcov\tnb_overexp\tnb_dubious\tnb_contam\tclean_percent\tlow_cov_percent\toverexp_percent\tdubious_percent\tcontam_percent" > $out"/CroCo_summary" for f in $out"/"*".all" do ff=`basename $f .all` tot_ctgs=`grep -c '>' $INDIR/$ff.fasta` tot_suspects=`grep -c '>' $out/$ff.fasta_suspect` never_suspected=$(($tot_ctgs-$tot_suspects)) clean_ctgs=`grep -c -w 'clean' $f` clean_percent=`echo "scale=2; ($clean_ctgs + ($tot_ctgs-$tot_suspects) ) * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'` contam_ctgs=`grep -c -w 'contam' $f` contam_percent=`echo "scale=2; $contam_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'` dubious_ctgs=`grep -c -w 'dubious' $f` dubious_percent=`echo "scale=2; $dubious_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'` lowcov_ctgs=`grep -c -w 'lowcov' $f` lowcov_percent=`echo "scale=2; $lowcov_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'` overexp_ctgs=`grep -c -w 'overexp' $f` overexp_percent=`echo "scale=2; $overexp_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'` echo -e "$ff\t"$tot_ctgs"\t"$never_suspected"\t"$tot_suspects"\t"$clean_ctgs"\t"$lowcov_ctgs"\t"$overexp_ctgs"\t"$dubious_ctgs"\t"$contam_ctgs"\t"$clean_percent"\t"$lowcov_percent"\t"$overexp_percent"\t"$dubious_percent"\t"$contam_percent >> $out"/CroCo_summary" #sed 's/\./,/g' $out"/CroCo_summary" > $out"/CroCo_summary.tmp" #rm $out"/CroCo_summary"; mv $out"/CroCo_summary.tmp" $out"/CroCo_summary" #sed 's/\./,/g' $f > $out"/"$ff".profile_tmp" cat $f > $out"/"$ff".profile_tmp" sed 's/inf/40/g' $out"/"$ff".profile_tmp" > $out"/"$ff".profile_tmp2" echo -e "$ff" > $out"/"$ff".profile" cat $out"/"$ff".profile_tmp2" | cut -f$col_ratio | sort -n >> $out"/"$ff".profile" rm -f $out"/"$ff".profile_tmp" $out"/"$ff".profile_tmp2" done paste $out"/"*".profile" > $out"/CroCo_profiles" rm -f $out"/"*".profile"