Commit 90e52ae7 authored by psimion's avatar psimion
Browse files

very minor 1.1 changes

parent 9df31e3c
......@@ -304,7 +304,7 @@ CroCo will create a directory containing all results which will be placed within
```bash
Usage :
CroCo_v0.1.sh [--mode p|u] [--tool B|B2|K|R|S] [--fold-threshold INT] [--minimum-coverage FLOAT] [--threads INT] [--output-prefix STR] [--output-level 1|2] [--graph yes|no] [--trim5 INT] [--trim3 INT] [--frag-length FLOAT] [--frag-sd FLOAT] [--suspect-id INT] [--suspect-len INT] [--add-option STR] [--recat STR]
CroCo_v1.1.sh [--mode p|u] [--tool B|B2|K|R|S] [--fold-threshold INT] [--minimum-coverage FLOAT] [--threads INT] [--output-prefix STR] [--output-level 1|2] [--graph yes|no] [--trim5 INT] [--trim3 INT] [--frag-length FLOAT] [--frag-sd FLOAT] [--suspect-id INT] [--suspect-len INT] [--add-option STR] [--recat STR]
--mode p|u :\t\t\t'p' for paired and 'u' for unpaired (default : 'p') [short: -m]
--in STR :\t\t\tName of the directory containing the input files to be analyzed (DEFAULT : working directory) [short: -i]
......@@ -329,16 +329,16 @@ It is good practice to redirect information about each CroCo run into an output
'2>&1 | tee log_file'
Minimal working example :
CroCo_v0.1.sh --mode p 2>&1 | tee log_file
CroCo_v1.1.sh --mode p 2>&1 | tee log_file
Exhaustive example :
CroCo_v0.1.sh --mode p --in data_folder_name --tool R --fold-threshold 2 --minimum-coverage 0.2 --overexp 300 --threads 8 --output-prefix test1_ --output-level 2 --graph yes --add-option '-v 0' --trim5 0 --trim3 0 --suspect-id 95 --suspect-len 40 --recat no 2>&1 | tee log_file
CroCo_v1.1.sh --mode p --in data_folder_name --tool R --fold-threshold 2 --minimum-coverage 0.2 --overexp 300 --threads 8 --output-prefix test1_ --output-level 2 --graph yes --add-option '-v 0' --trim5 0 --trim3 0 --suspect-id 95 --suspect-len 40 --recat no 2>&1 | tee log_file
Exhaustive example using shortcuts :
CroCo_v0.1.sh -m p -i data_folder_name -t R -f 2 -c 0.2 -d 300 -n 8 -p test1_ -l 2 -g yes -a '-v 0' -x 0 -y 0 -s 95 -w 40 -r no 2>&1 | tee log_file
CroCo_v1.1.sh -m p -i data_folder_name -t R -f 2 -c 0.2 -d 300 -n 8 -p test1_ -l 2 -g yes -a '-v 0' -x 0 -y 0 -s 95 -w 40 -r no 2>&1 | tee log_file
Example for re-categorizing previous CroCo results
CroCo_v0.1.sh -i data_folder_name -r previous_CroCo_results_folder_name -f 10 -c 0.5 -g yes 2>&1 | tee log_file
CroCo_v1.1.sh -i data_folder_name -r previous_CroCo_results_folder_name -f 10 -c 0.5 -g yes 2>&1 | tee log_file
```
......
No preview for this file type
### OUTPUT : basic statistics (cross_contamination_summary and cross_contamination_profiles files)
# removing unnecessary files
rm -f $out/ALL_transcripts.all
rm -f $out/ALL_transcripts.fasta_suspect
# computing stats for every sample
echo -e "\nWriting decontamination summary\n\tCroCo_summary\n\tCroCo_profiles"
col_ratio=$((`ls $out/*.fasta_suspect | wc -l`+3))
echo -e "species\ttotal_contigs\tnever_suspected\tnb_suspects\tnb_clean\tnb_lowcov\tnb_overexp\tnb_dubious\tnb_contam\tclean_percent\tlow_cov_percent\toverexp_percent\tdubious_percent\tcontam_percent" > $out"/CroCo_summary"
for f in $out"/"*".all"
do ff=`basename $f .all`
tot_ctgs=`grep -c '>' $INDIR/$ff.fasta`
tot_suspects=`grep -c '>' $out/$ff.fasta_suspect`
never_suspected=$(($tot_ctgs-$tot_suspects))
clean_ctgs=`grep -c -w 'clean' $f`
clean_percent=`echo "scale=2; ($clean_ctgs + ($tot_ctgs-$tot_suspects) ) * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
contam_ctgs=`grep -c -w 'contam' $f`
contam_percent=`echo "scale=2; $contam_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
dubious_ctgs=`grep -c -w 'dubious' $f`
dubious_percent=`echo "scale=2; $dubious_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
lowcov_ctgs=`grep -c -w 'lowcov' $f`
lowcov_percent=`echo "scale=2; $lowcov_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
overexp_ctgs=`grep -c -w 'overexp' $f`
overexp_percent=`echo "scale=2; $overexp_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
echo -e "$ff\t"$tot_ctgs"\t"$never_suspected"\t"$tot_suspects"\t"$clean_ctgs"\t"$lowcov_ctgs"\t"$overexp_ctgs"\t"$dubious_ctgs"\t"$contam_ctgs"\t"$clean_percent"\t"$lowcov_percent"\t"$overexp_percent"\t"$dubious_percent"\t"$contam_percent >> $out"/CroCo_summary"
cat $f > $out"/"$ff".profile_tmp"
sed 's/inf/30/g' $out"/"$ff".profile_tmp" > $out"/"$ff".profile_tmp2"
echo -e "$ff" > $out"/"$ff".profile"
cat $out"/"$ff".profile_tmp2" | cut -f$col_ratio | sed '1d' | sed "s/\./,/g" | sort -g | sed "s/,/\./g" >> $out"/"$ff".profile"
rm -f $out"/"$ff".profile_tmp" $out"/"$ff".profile_tmp2"
done
paste $out"/"*".profile" > $out"/CroCo_profiles"
rm -f $out"/"*".profile"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment