Commit 8e1edbcb authored by psimion's avatar psimion
Browse files

readme minor updates

parent 1a94b14a
......@@ -72,6 +72,8 @@ If you use CroCo in your work, please cite:
Simion et al. BMC Biology (2018) 16:28 DOI 10.1186/s12915-018-0486-7
---
<div class="page-break"></div>
......@@ -93,7 +95,7 @@ Simion et al. BMC Biology (2018) 16:28 DOI 10.1186/s12915-018-0486-7
# Quick installation guide
Start by downloading CroCo Repository here : http://xxxxxxxxxxxxxxxxxxxx.
Start by downloading or cloning CroCo Repository here : http://gitlab.mbb.univ-montp2.fr/mbb/CroCo.git.
Then go to the section below that corresponds to your Operating System.
---
......@@ -102,13 +104,12 @@ Then go to the section below that corresponds to your Operating System.
CroCo is a BASH script written under Linux that uses BLAST+, mapping tools (e.g. *bowtie*, *RapMap*) and do not require any installation : **You can immediately use CroCo If you already have both the BLAST+ suite and the mapping tool you want to use installed on your system and present in your PATH**.
If you lack one or several of these tools, we provide a bash script (`install_dependencies.sh`) that will automatically install them for you. To do so, extract CroCo, move into its `utils/` directory and install the dependencies you want.
If you lack one or several of these tools, we provide a bash script (`install_dependencies.sh`) that will automatically install them for you. To do so, extract CroCo (not needed if you used `git clone`), move into its `utils/` directory and install the dependencies you want.
Here to install all dependencies on an Ubuntu OS:
```bash
unzip XXXXXXXXXXXX
cd XXXXXXXXXXXX/utils
cd CroCo_directory/utils
bash ./install_dependencies.sh --tool all --os ubuntu
```
......@@ -132,8 +133,7 @@ A working image of CroCo, already containing all its dependencies, can then easi
Extract CroCo, move into its `CroCo_dockerbuild` directory and use Docker to build CroCo's image as follows :
```bash
unzip XXXXXXXXXXXX
cd XXXXXXXXXXXX/utils/CroCo_dockerbuild
cd CroCo_directory/utils/CroCo_dockerbuild
docker build -t crocodock .
```
......@@ -167,7 +167,7 @@ This script works under Ubuntu, Debian, Fedora, RedHat, CentOS and on Mac OS X.
Script usage :
```bash
./install_dependencies.sh --tool all|B|B2|K|L|S|H --os ubuntu|debian|fedora|centos|redhat|macosx
./install_dependencies.sh --tool all|B|K|R --os ubuntu|debian|fedora|centos|redhat|macosx
```
If you encounter problems during dependencies installation, take a look at the [Troubleshooting section](#troubleshooting) and at the `*_install.log` files created in the `utils/bin/` directory.
......
No preview for this file type
......@@ -91,6 +91,6 @@ elif [ $OUTPUTLEVEL == "2" ]; then
# if [ -f $out/$ref"_contam.fasta" ]; then sed -i "s/$ref|//g" $out/$ref"_contam.fasta"; fi
# done
else echo -e "\nwarning : output level value must be set to either '1', '2' or '3' (default = '2')"
else echo -e "\nwarning : output level value must be set to either '1' or '2' (default = '2')"
fi
### OUTPUT : network files - step 4 ("LINKS_gephi_simplified.csv", "LINKS_diagrammer_simplified.tsv")
#echo -e "\tLINKS_gephi_simplified.csv\n\tLINKS_diagrammer_simplified.tsv"
head -n1 LINKS_gephi.csv > LINKS_gephi_simplified.csv
head -n1 LINKS_diagrammer.tsv > LINKS_diagrammer_simplified.tsv
max_link=`cat LINKS_gephi.csv | sed '1d' | cut -d "," -f4 | awk '$0>x{x=$0};END{print x}'`
division_factor=50
threshold=$(($max_link/$division_factor))
echo -e "\tCross contamination network simplification : removal of links below "$threshold" cross-contaminations"
cat LINKS_gephi.csv | grep -v 'Weight' | while read line ; do
current_link=`echo $line | cut -d "," -f4`
if [ "$current_link" -gt "$threshold" ] ; then
echo $line >> LINKS_gephi_simplified.csv
fi
done
cat LINKS_diagrammer.tsv | grep -v 'contamination' | while read line2 ; do
current_link=`echo $line2 | cut -d' ' -f4`
if [[ "$current_link" -gt "$threshold" ]]; then
echo $line2 >> LINKS_diagrammer_simplified.tsv
fi
done
#echo -e "\tLINKS_gephi_dubious_simplified.csv\n\tLINKS_diagrammer_dubious_simplified.tsv"
head -n1 LINKS_gephi_dubious.csv > LINKS_gephi_dubious_simplified.csv
head -n1 LINKS_diagrammer_dubious.tsv > LINKS_diagrammer_dubious_simplified.tsv
max_link=`cat LINKS_gephi_dubious.csv | sed '1d' | cut -d "," -f4 | awk '$0>x{x=$0};END{print x}'`
division_factor=50
threshold=$(($max_link/$division_factor))
echo -e "\tDubious network simplification : removal of links below "$threshold" dubious cross-contaminations"
cat LINKS_gephi_dubious.csv | grep -v 'Weight' | while read line ; do
current_link=`echo $line | cut -d "," -f4`
if [ "$current_link" -gt "$threshold" ] ; then
echo $line >> LINKS_gephi_dubious_simplified.csv
fi
done
cat LINKS_diagrammer_dubious.tsv | grep -v 'contamination' | while read line2 ; do
current_link=`echo $line2 | cut -d' ' -f4`
if [[ "$current_link" -gt "$threshold" ]]; then
echo $line2 >> LINKS_diagrammer_dubious_simplified.tsv
fi
done
......@@ -27,7 +27,7 @@ for f in $out"/"*".all"
cat $f > $out"/"$ff".profile_tmp"
sed 's/inf/30/g' $out"/"$ff".profile_tmp" > $out"/"$ff".profile_tmp2"
echo -e "$ff" > $out"/"$ff".profile"
cat $out"/"$ff".profile_tmp2" | cut -f$col_ratio | sed '1d' | sort -g >> $out"/"$ff".profile"
cat $out"/"$ff".profile_tmp2" | cut -f$col_ratio | sed '1d' | sed 's/./,/g' | sort -g >> $out"/"$ff".profile"
rm -f $out"/"$ff".profile_tmp" $out"/"$ff".profile_tmp2"
done
paste $out"/"*".profile" > $out"/CroCo_profiles"
......
### OUTPUT : basic statistics (cross_contamination_summary and cross_contamination_profiles files)
# removing unnecessary files
rm -f $out/ALL_transcripts.all
rm -f $out/ALL_transcripts.fasta_suspect
# computing stats for every sample
echo -e "\nWriting decontamination summary\n\tCroCo_summary\n\tCroCo_profiles"
col_ratio=$((`ls $out/*.fasta_suspect | wc -l`+3))
echo -e "species\ttotal_contigs\tnever_suspected\tnb_suspects\tnb_clean\tnb_lowcov\tnb_overexp\tnb_dubious\tnb_contam\tclean_percent\tlow_cov_percent\toverexp_percent\tdubious_percent\tcontam_percent" > $out"/CroCo_summary"
for f in $out"/"*".all"
do ff=`basename $f .all`
tot_ctgs=`grep -c '>' $INDIR/$ff.fasta`
tot_suspects=`grep -c '>' $out/$ff.fasta_suspect`
never_suspected=$(($tot_ctgs-$tot_suspects))
clean_ctgs=`grep -c -w 'clean' $f`
clean_percent=`echo "scale=2; ($clean_ctgs + ($tot_ctgs-$tot_suspects) ) * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
contam_ctgs=`grep -c -w 'contam' $f`
contam_percent=`echo "scale=2; $contam_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
dubious_ctgs=`grep -c -w 'dubious' $f`
dubious_percent=`echo "scale=2; $dubious_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
lowcov_ctgs=`grep -c -w 'lowcov' $f`
lowcov_percent=`echo "scale=2; $lowcov_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
overexp_ctgs=`grep -c -w 'overexp' $f`
overexp_percent=`echo "scale=2; $overexp_ctgs * 100 / $tot_ctgs " | bc -l | awk '{printf "%.2f", $0}'`
echo -e "$ff\t"$tot_ctgs"\t"$never_suspected"\t"$tot_suspects"\t"$clean_ctgs"\t"$lowcov_ctgs"\t"$overexp_ctgs"\t"$dubious_ctgs"\t"$contam_ctgs"\t"$clean_percent"\t"$lowcov_percent"\t"$overexp_percent"\t"$dubious_percent"\t"$contam_percent >> $out"/CroCo_summary"
cat $f > $out"/"$ff".profile_tmp"
sed 's/inf/30/g' $out"/"$ff".profile_tmp" > $out"/"$ff".profile_tmp2"
echo -e "$ff" > $out"/"$ff".profile"
cat $out"/"$ff".profile_tmp2" | cut -f$col_ratio | sed '1d' | sed 's/./,/g' | sort -g >> $out"/"$ff".profile"
rm -f $out"/"$ff".profile_tmp" $out"/"$ff".profile_tmp2"
done
paste $out"/"*".profile" > $out"/CroCo_profiles"
rm -f $out"/"*".profile"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment