output_stats.sh 2.28 KB
Newer Older
khalid's avatar
khalid committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
### OUTPUT : basic statistics (cross_contamination_summary and cross_contamination_profiles files)

# removing unnecessary files
rm -f $out/ALL_transcripts.all
rm -f $out/ALL_transcripts.fasta_suspect

# computing stats for every sample
echo -e "\nWriting decontamination summary\n\tCroCo_summary\n\tCroCo_profiles"
col_ratio=$((`ls $out/*.fasta_suspect | wc -l`+3))
echo -e "species\ttotal_contigs\tnever_suspected\tnb_suspects\tnb_clean\tnb_lowcov\tnb_overexp\tnb_dubious\tnb_contam\tclean_percent\tlow_cov_percent\toverexp_percent\tdubious_percent\tcontam_percent" > $out"/CroCo_summary"
for f in $out"/"*".all"
	do ff=`basename $f .all`
	tot_ctgs=`grep -c '>' $INDIR/$ff.fasta`
    tot_suspects=`grep -c '>' $out/$ff.fasta_suspect`
    never_suspected=$(($tot_ctgs-$tot_suspects))
khalid's avatar
khalid committed
16
	clean_ctgs=`grep -c -w 'clean' $f`
khalid's avatar
khalid committed
17
	clean_percent=`echo "scale=2; ($clean_ctgs + ($tot_ctgs-$tot_suspects) ) * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
khalid's avatar
khalid committed
18
	contam_ctgs=`grep -c -w 'contam' $f`
khalid's avatar
khalid committed
19
	contam_percent=`echo "scale=2; $contam_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
khalid's avatar
khalid committed
20
	dubious_ctgs=`grep -c -w 'dubious' $f`
khalid's avatar
khalid committed
21
	dubious_percent=`echo "scale=2; $dubious_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
khalid's avatar
khalid committed
22
	lowcov_ctgs=`grep -c -w 'lowcov' $f`
khalid's avatar
khalid committed
23
	lowcov_percent=`echo "scale=2; $lowcov_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
khalid's avatar
khalid committed
24
	overexp_ctgs=`grep -c -w 'overexp' $f`
khalid's avatar
khalid committed
25
26
27
28
29
30
31
32
33
34
35
36
37
	overexp_percent=`echo "scale=2; $overexp_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
	echo -e "$ff\t"$tot_ctgs"\t"$never_suspected"\t"$tot_suspects"\t"$clean_ctgs"\t"$lowcov_ctgs"\t"$overexp_ctgs"\t"$dubious_ctgs"\t"$contam_ctgs"\t"$clean_percent"\t"$lowcov_percent"\t"$overexp_percent"\t"$dubious_percent"\t"$contam_percent >> $out"/CroCo_summary"
	#sed 's/\./,/g' $out"/CroCo_summary" > $out"/CroCo_summary.tmp"
	#rm $out"/CroCo_summary"; mv $out"/CroCo_summary.tmp" $out"/CroCo_summary"
	#sed 's/\./,/g' $f > $out"/"$ff".profile_tmp"
	cat $f > $out"/"$ff".profile_tmp"
	sed 's/inf/40/g' $out"/"$ff".profile_tmp" > $out"/"$ff".profile_tmp2"
	echo -e "$ff" > $out"/"$ff".profile"
	cat $out"/"$ff".profile_tmp2" | cut -f$col_ratio | sort -n >> $out"/"$ff".profile"
	rm -f $out"/"$ff".profile_tmp" $out"/"$ff".profile_tmp2"
done
paste $out"/"*".profile" > $out"/CroCo_profiles"
rm -f $out"/"*".profile"