Commit 543bb272 authored by psimion's avatar psimion
Browse files

various cleaning of scripts prior to v1.2 transition

parent 6009469b
......@@ -62,9 +62,9 @@ if [ -f $CONFFILE ]; then
do
fastaName=`basename $fastaFile .fasta`
fasta_array[$fastaName]=$fastaFile";"$R1";"$R2
orders+=( $fastaName ) #this will keep fastaFileNames in the same order as in config file
orders+=( $fastaName ) #this will keep fastaFileNames in the same order as in config file
InfosCtg=(${fasta_array[$fastaName]//;/ }) #c'est un tableau
InfosCtg=(${fasta_array[$fastaName]//;/ })
len=${#InfosCtg[@]}
if [ $len -gt 2 ]; then
parity="Paired"
......@@ -144,10 +144,8 @@ if [ $RECAT == "no" ]; then
case "$TOOL" in
B) if [ $MODE == "u" ]
then
#fastq=$INDIR"/"${fasta_array[$k]}".fastq"
fastq=$INDIR"/"`echo $InfosCtg | cut -d';' -f2`
else
#fastq=" -1 "$INDIR"/"${fasta_array[$k]}".L.fastq -2 "$INDIR"/"${fasta_array[$k]}".R.fastq"
fastq=" -1 "$INDIR"/"`echo $InfosCtg | cut -d';' -f2`" -2 "$INDIR"/"`echo $InfosCtg | cut -d';' -f3`
fi
bowtie -p $PROCESSORS $ADDOPT -a --trim5 $TRIM5 --trim3 $TRIM3 --chunkmbs 2000 --suppress 1,2,4,5,6,7,8 $out/$toolidx/$toolidx $fastq | \
......@@ -161,14 +159,11 @@ if [ $RECAT == "no" ]; then
then
if [ $FRAGLENGTH == 'none' ] || [ $FRAGSD == 'none' ]; then
echo -e "\nWarning : When using unpaired data with Kallisto, you need to specify mean fragment length and fragment length standard deviation (--frag-length and --frag-sd options)"
#fastq=" --single -l $FRAGLENGTH -s $FRAGSD "$INDIR"/"${fasta_array[$k]}".fastq"
fastq=" --single -l $FRAGLENGTH -s $FRAGSD "$INDIR"/"`echo $InfosCtg | cut -d';' -f2`
else
#fastq=" --single -l $FRAGLENGTH -s $FRAGSD "$INDIR"/"${fasta_array[$k]}".fastq"
fastq=" --single -l $FRAGLENGTH -s $FRAGSD "$INDIR"/"`echo $InfosCtg | cut -d';' -f2`
fi
else
#fastq=$INDIR"/"${fasta_array[$k]}".L.fastq.gz "$INDIR"/"${fasta_array[$k]}".R.fastq.gz"
fastq=$INDIR"/"`echo $InfosCtg | cut -d';' -f2`" "$INDIR"/"`echo $InfosCtg | cut -d';' -f3`
fi
kallisto quant $ADDOPT --threads=$PROCESSORS -i $out/$toolidx -o $fileout.quant $fastq ;
......@@ -179,7 +174,7 @@ if [ $RECAT == "no" ]; then
then
fastq=" -r "$INDIR"/"`echo $InfosCtg | cut -d';' -f2`
else
#### gunzip input read files does not work with Rapmap ?
#### gunzip input read files does not work with Rapmap : if needed, see example in "output_cleaned_read_files" to enforce a gunzip step (same for bowtie)
#if [[ "$InfosCtg" == *".gz" ]]; then
#fastq="-1 <(gunzip -c "$INDIR"/"`echo $InfosCtg | cut -d';' -f2`") -2 <(gunzip -c "$INDIR"/"`echo $InfosCtg | cut -d';' -f3`")"
#else
......@@ -199,7 +194,6 @@ if [ $RECAT == "no" ]; then
if [ -f $finalout ]
then
#join -t $'\t' --header $finalout $fileout > $finalout'.tmp'
paste $finalout $fileout | awk -F 'FS' 'BEGIN{FS="\t"}{for (i=1; i<=NF-1; i++) if(i!=NF-1) {printf $i FS};{print $NF}}' > $finalout'.tmp'
mv $finalout'.tmp' $finalout
else
......@@ -213,17 +207,12 @@ if [ $RECAT == "no" ]; then
# care for character ";" in sequence names ?
# splitting "All_transcript.all" file into files corresponding to samples
#for (( j=0; j <i; j++ ))
j=0
for ref in "${orders[@]}"
do
#ref=${fasta_array[$j]}
echo -e "\nCategorization of $ref transcripts"
echo -e `head -n1 $finalout` > $out/$ref".all"
grep "$ref|" $out/All_transcripts.quants >> $out/$ref".all"
#readarray -t suspects < $out/$ref".suspects"
#listeSuspects=$( IFS=';'; echo "${suspects[*]}" );
awk -v outDir=$out -v ref=$ref -v col=$j -v fold=$FOLD -v mincov=$MINCOV -v overexp=$OVEREXP 'BEGIN{OFS="\t";col=col+2;}
{ if (NR == 1) {
print $0, "MaxOtherSpCov", "log2FoldChange", "Status" > outDir"/"ref".tmp"
......@@ -270,10 +259,7 @@ if [ $RECAT == "no" ]; then
elif [ $RECAT != "no" ]; then
i=0
for ref in "${orders[@]}"; do
#for allfile in $RECAT/*.all ; do
echo -e "\n## moving files from :\n$RECAT/utility_files_CroCo/$ref\ninto:\n$out\n"
echo -e "\n## moving files from :\n$RECAT/utility_files_CroCo/$ref\ninto:\n$out\n"
cat $RECAT/$ref.all | sed -r 's/(\t[^\t]*){3}$//' > $out/$ref".all"
cp $RECAT/utility_files_CroCo/$ref".ctgs" $out
cp $RECAT/utility_files_CroCo/$ref".fasta_suspect" $out
......@@ -283,10 +269,8 @@ elif [ $RECAT != "no" ]; then
done
# re-categorizing transcipts (clean, contam, dubious, lowcov, overexp)
#for (( j=0; j <i; j++ )); do
j=0
for ref in "${orders[@]}"; do
#ref=${recatfasta_array[$j]};
refseqs=$out/$ref".ctgs"
finalout=$out/$ref".all"
echo -e "Re-categorizing $ref transcripts"
......
......@@ -62,9 +62,9 @@ if [ -f $CONFFILE ]; then
do
fastaName=`basename $fastaFile .fasta`
fasta_array[$fastaName]=$fastaFile";"$R1";"$R2
orders+=( $fastaName ) #this will keep fastaFileNames in the same order as in config file
orders+=( $fastaName ) #this will keep fastaFileNames in the same order as in config file
InfosCtg=(${fasta_array[$fastaName]//;/ }) #c'est un tableau
InfosCtg=(${fasta_array[$fastaName]//;/ })
len=${#InfosCtg[@]}
if [ $len -gt 2 ]; then
parity="Paired"
......@@ -144,10 +144,8 @@ if [ $RECAT == "no" ]; then
case "$TOOL" in
B) if [ $MODE == "u" ]
then
#fastq=$INDIR"/"${fasta_array[$k]}".fastq"
fastq=$INDIR"/"`echo $InfosCtg | cut -d';' -f2`
else
#fastq=" -1 "$INDIR"/"${fasta_array[$k]}".L.fastq -2 "$INDIR"/"${fasta_array[$k]}".R.fastq"
fastq=" -1 "$INDIR"/"`echo $InfosCtg | cut -d';' -f2`" -2 "$INDIR"/"`echo $InfosCtg | cut -d';' -f3`
fi
bowtie -p $PROCESSORS $ADDOPT -a --trim5 $TRIM5 --trim3 $TRIM3 --chunkmbs 2000 --suppress 1,2,4,5,6,7,8 $out/$toolidx/$toolidx $fastq | \
......@@ -161,14 +159,11 @@ if [ $RECAT == "no" ]; then
then
if [ $FRAGLENGTH == 'none' ] || [ $FRAGSD == 'none' ]; then
echo -e "\nWarning : When using unpaired data with Kallisto, you need to specify mean fragment length and fragment length standard deviation (--frag-length and --frag-sd options)"
#fastq=" --single -l $FRAGLENGTH -s $FRAGSD "$INDIR"/"${fasta_array[$k]}".fastq"
fastq=" --single -l $FRAGLENGTH -s $FRAGSD "$INDIR"/"`echo $InfosCtg | cut -d';' -f2`
else
#fastq=" --single -l $FRAGLENGTH -s $FRAGSD "$INDIR"/"${fasta_array[$k]}".fastq"
fastq=" --single -l $FRAGLENGTH -s $FRAGSD "$INDIR"/"`echo $InfosCtg | cut -d';' -f2`
fi
else
#fastq=$INDIR"/"${fasta_array[$k]}".L.fastq.gz "$INDIR"/"${fasta_array[$k]}".R.fastq.gz"
fastq=$INDIR"/"`echo $InfosCtg | cut -d';' -f2`" "$INDIR"/"`echo $InfosCtg | cut -d';' -f3`
fi
kallisto quant $ADDOPT --threads=$PROCESSORS -i $out/$toolidx -o $fileout.quant $fastq ;
......@@ -179,7 +174,7 @@ if [ $RECAT == "no" ]; then
then
fastq=" -r "$INDIR"/"`echo $InfosCtg | cut -d';' -f2`
else
#### gunzip input read files does not work with Rapmap ?
#### gunzip input read files does not work with Rapmap : if needed, see example in "output_cleaned_read_files" to enforce a gunzip step (same for bowtie)
#if [[ "$InfosCtg" == *".gz" ]]; then
#fastq="-1 <(gunzip -c "$INDIR"/"`echo $InfosCtg | cut -d';' -f2`") -2 <(gunzip -c "$INDIR"/"`echo $InfosCtg | cut -d';' -f3`")"
#else
......@@ -199,7 +194,6 @@ if [ $RECAT == "no" ]; then
if [ -f $finalout ]
then
#join -t $'\t' --header $finalout $fileout > $finalout'.tmp'
paste $finalout $fileout | awk -F 'FS' 'BEGIN{FS="\t"}{for (i=1; i<=NF-1; i++) if(i!=NF-1) {printf $i FS};{print $NF}}' > $finalout'.tmp'
mv $finalout'.tmp' $finalout
else
......@@ -213,17 +207,12 @@ if [ $RECAT == "no" ]; then
# care for character ";" in sequence names ?
# splitting "All_transcript.all" file into files corresponding to samples
#for (( j=0; j <i; j++ ))
j=0
for ref in "${orders[@]}"
do
#ref=${fasta_array[$j]}
echo -e "\nCategorization of $ref transcripts"
echo -e `head -n1 $finalout` > $out/$ref".all"
grep "$ref|" $out/All_transcripts.quants >> $out/$ref".all"
#readarray -t suspects < $out/$ref".suspects"
#listeSuspects=$( IFS=';'; echo "${suspects[*]}" );
awk -v outDir=$out -v ref=$ref -v col=$j -v fold=$FOLD -v mincov=$MINCOV -v overexp=$OVEREXP 'BEGIN{OFS="\t";col=col+2;}
{ if (NR == 1) {
print $0, "MaxOtherSpCov", "log2FoldChange", "Status" > outDir"/"ref".tmp"
......@@ -270,10 +259,7 @@ if [ $RECAT == "no" ]; then
elif [ $RECAT != "no" ]; then
i=0
for ref in "${orders[@]}"; do
#for allfile in $RECAT/*.all ; do
echo -e "\n## moving files from :\n$RECAT/utility_files_CroCo/$ref\ninto:\n$out\n"
echo -e "\n## moving files from :\n$RECAT/utility_files_CroCo/$ref\ninto:\n$out\n"
cat $RECAT/$ref.all | sed -r 's/(\t[^\t]*){3}$//' > $out/$ref".all"
cp $RECAT/utility_files_CroCo/$ref".ctgs" $out
cp $RECAT/utility_files_CroCo/$ref".fasta_suspect" $out
......@@ -283,10 +269,8 @@ elif [ $RECAT != "no" ]; then
done
# re-categorizing transcipts (clean, contam, dubious, lowcov, overexp)
#for (( j=0; j <i; j++ )); do
j=0
for ref in "${orders[@]}"; do
#ref=${recatfasta_array[$j]};
refseqs=$out/$ref".ctgs"
finalout=$out/$ref".all"
echo -e "Re-categorizing $ref transcripts"
......@@ -370,7 +354,7 @@ fi
### miscellaneous
if [ $RECAT == "no" ]; then
mv *.outblast *.suspects *.blastdb.* *_index *.ctgs *.out *.all_quants *.fasta_mod *.fasta_suspect *.reads_to_discard utility_files_CroCo/
mv *.outblast *.suspects *.blastdb.* *_index *.ctgs *.out *.all_quants *.fasta_mod *.fasta_suspect utility_files_CroCo/
fi
cd ../
......
......@@ -94,25 +94,3 @@ rm -f $out/*.contigstotrim
rm -f $out/*.badcontigs
#cat "$INDIR/"$fastqfile | while read line; do
# k=$((k+1))
# if [[ $((k%2)) -ne 0 ]]; then
# readID=`echo $line | cut -d' ' -f1 | cut -d'.' -f2`
# if [[ "${readindex[${readID}]}" == "bad" ]]; then
# echo -e "\t\t$readID (contam !)"
# contamflag=1
# elif [[ "${readindex[${readID}]}" != "bad" ]]; then
# echo $line >> $fastqfile.clean
# contamflag=0
# else
# echo -e "\t\tproblem with read name !"
# fi
# elif [[ $((k%2)) -eq 0 ]] && [[ $contamflag -eq 0 ]]; then
# echo $line >> $fastqfile.clean
# fi
#done
function printUsage(){
echo -e "\n`basename $0` is a program that can detect potential cross-contaminations in assembled transcriptomes using sequencing reads to find true origin of transcripts.
echo -e "\n`basename $0` is a program that can detect potential cross-contamination in assembled transcriptomes using sequencing reads to find true origin of transcripts.
Usage :
$0 [--cnf configFile] [--mode p|u] [--tool B|B2|K|R|S] [--fold-threshold INT] [--minimum-coverage FLOAT] [--threads INT] [--output-prefix STR] [--output-level 1|2|3] [--graph yes|no] [--trim5 INT] [--trim3 INT] [--frag-length FLOAT] [--frag-sd FLOAT] [--suspect-id INT] [--suspect-len INT] [--add-option STR] [--recat STR]
$0 [--cnf configFile] [--mode p|u] [--tool B|B2|K|R|S] [--fold-threshold INT] [--minimum-coverage FLOAT] [--threads INT] [--output-prefix STR] [--output-level 1|2|3] [--graph yes|no] [--trim5 INT] [--trim3 INT] [--frag-length FLOAT] [--frag-sd FLOAT] [--suspect-id INT] [--suspect-len INT] [--add-option STR] [--recat STR] [--readclean yes|no]
--cnf configFile : a text filename containg a liste of contigs assemblies to test and their associated fastq reads files [short: -k]
--cnf configFile : a text filename containg a list of transcriptome assemblies to analyze and their associated fastq reads files [short: -k]
--mode p|u :\t\t\t'p' for paired and 'u' for unpaired (default : 'p') [short: -m]
--in STR :\t\t\tName of the directory containing the input files to be analyzed (DEFAULT : working directory) [short: -i]
--tool B|K|R :\t\t'B' for bowtie, 'K' for kallisto, 'R' for rapmap (DEFAULT : 'R') [short: -t]
......@@ -15,6 +15,7 @@ $0 [--cnf configFile] [--mode p|u] [--tool B|B2|K|R|S] [--fold-threshold INT] [-
--output-prefix STR :\t\tPrefix of output directory that will be created (DEFAULT : empty) [short: -p]
--output-level 1|2 :\t\tSelect whether or not to output fasta files. '1' for none, '2' for all (DEFAULT : 2) [short: -l]
--graph yes|no :\t\tProduce graphical output using R (DEFAULT : no) [short: -g]
--readclean yes|no :\t\tSelect whether or not to output fastq files devoid of reads that mapped onto contaminant transcripts (DEFAULT : no) [short: -z]
--add-option 'STR' :\t\tThis text string will be understood as additional options for the mapper/quantifier used (DEFAULT : empty) [short: -a]
--recat SRT :\t\t\tName of a previous CroCo output directory you wish to use to re-categorize transcripts (DEFAULT : no) [short: -r]
--trim5 INT :\t\t\tnb bases trimmed from 5' (DEFAULT : 0) [short: -x]
......@@ -31,13 +32,13 @@ Minimal working example :
CroCo_v0.1.sh --cnf sampleconfig.txt --mode p 2>&1 | tee log_file
Exhaustive example :
CroCo_v0.1.sh --cnf configFile --mode p --in data_folder_name --tool R --fold-threshold 2 --minimum-coverage 0.2 --overexp 300 --threads 8 --output-prefix test1_ --output-level 2 --graph yes --add-option '-v 0' --trim5 0 --trim3 0 --suspect-id 95 --suspect-len 40 --recat no 2>&1 | tee log_file
CroCo_v0.1.sh --cnf configFile --mode p --in data_folder_name --tool K --fold-threshold 2 --minimum-coverage 0.2 --overexp 300 --threads 8 --output-prefix test1_ --output-level 2 --graph yes --add-option '-v 0' --trim5 0 --trim3 0 --suspect-id 95 --suspect-len 40 --recat no --readclean no 2>&1 | tee log_file
Exhaustive example using shortcuts :
CroCo_v0.1.sh -k configFile -m p -i data_folder_name -t R -f 2 -c 0.2 -d 300 -n 8 -p test1_ -l 2 -g yes -a '-v 0' -x 0 -y 0 -s 95 -w 40 -r no 2>&1 | tee log_file
CroCo_v0.1.sh -k configFile -m p -i data_folder_name -t K -f 2 -c 0.2 -d 300 -n 8 -p test1_ -l 2 -g yes -a '-v 0' -x 0 -y 0 -s 95 -w 40 -r no -z no 2>&1 | tee log_file
Example for re-categorizing previous CroCo results
CroCo_v0.1.sh --cnf configFile -i data_folder_name -r previous_CroCo_results_folder_name -f 10 -c 0.5 -g yes 2>&1 | tee log_file
CroCo_v0.1.sh -k configFile -i data_folder_name -r previous_CroCo_results_folder_name -f 10 -c 0.5 -g yes 2>&1 | tee log_file
"
}
......@@ -55,7 +56,7 @@ function printAndUsageAndExit(){
number_re='^[0-9]+$'
float_re='^[0-9]+([.][0-9]+)?$'
ARGS=$(getopt -o k:m:i:f:x:y:c:t:n:p:l:g:a:u:v:s:r:w:d: --long cnf:,mode:,in:,fold-threshold:,trim5:,trim3:,minimum-coverage:,tool:,threads:,output-prefix:,output-level:,graph:,add-option:,frag-length:,frag-sd:,suspect-id:,recat:,suspect-len:,overexp:,readclean: -n "$0" -- "$@");
ARGS=$(getopt -o k:m:i:f:x:y:c:t:n:p:l:g:a:u:v:s:r:w:d:z: --long cnf:,mode:,in:,fold-threshold:,trim5:,trim3:,minimum-coverage:,tool:,threads:,output-prefix:,output-level:,graph:,add-option:,frag-length:,frag-sd:,suspect-id:,recat:,suspect-len:,overexp:,readclean: -n "$0" -- "$@");
#Bad arguments
if [ $? -ne 0 ] || [ $# -eq 0 ];
......@@ -69,64 +70,51 @@ eval set -- "$ARGS";
while true; do
case "$1" in
-k|--cnf)
-a|--add-option)
shift;
if [ -n "$1" ]; then
CONFFLAG=1
CONFFILE=$1
ADDOPT=$1
else
printAndUsageAndExit "You have to set a non-empty value for option --cnf to list the files to be analyzed"
printAndUsageAndExit "You have to set a non-empty value for option --add-option if you decide to use it"
fi
shift;
;;
-m|--mode)
-c|--minimum-coverage)
shift;
if [ -n "$1" ]; then
if [[ "$1" == "p" ]] || [[ "$1" == "u" ]]; then
MODEFLAG=1
MODE=$1
if ! [[ $1 =~ $float_re ]] ; then
printAndUsageAndExit "Value for option --minimum-coverage must be float ('$1' was given)"
else
printAndUsageAndExit "'$1' is an incorrect value for --mode option"
MINCOV=$1
fi
else
printAndUsageAndExit "You have to set a non-empty value for option --mode"
fi
shift;
;;
-i|--in)
shift;
if [ -n "$1" ]; then
opt=$1
tmp_indir=$(readlink -f "$opt")
INDIR=${tmp_indir/ /_}
else
printAndUsageAndExit "You have to set a non-empty value for option --in if you decide to use it"
printAndUsageAndExit "You have to set a non-empty value for option --minimum-coverage"
fi
shift;
;;
-u|--frag-length)
-d|--overexp)
shift;
if [ -n "$1" ]; then
if ! [[ $1 =~ $float_re ]] ; then
printAndUsageAndExit "Value for option --frag-length must be float ('$1' was given)"
if ! [[ $1 =~ $float_re ]] ; then
printAndUsageAndExit "Value for option --overexp must be float ('$1' was given)"
else
FRAGLENGTH=$1
OVEREXP=$1
fi
else
printAndUsageAndExit "You have to set a non-empty value for option --frag-length"
printAndUsageAndExit "You have to set a non-empty value for option --overexp"
fi
shift;
;;
-v|--frag-sd)
-f|--fold-threshold)
shift;
if [ -n "$1" ]; then
if ! [[ $1 =~ $float_re ]] ; then
printAndUsageAndExit "Value for option --frag-sd must be float ('$1' was given)"
if ! [[ $1 =~ $float_re ]] ; then
printAndUsageAndExit "Value for option --fold-threshold must be float ('$1' was given)"
else
FRAGSD=$1
FOLD=$1
fi
else
printAndUsageAndExit "You have to set a non-empty value for option --frag-sd"
printAndUsageAndExit "You have to set a non-empty value for option --fold-threshold"
fi
shift;
;;
......@@ -143,6 +131,27 @@ while true; do
fi
shift;
;;
-i|--in)
shift;
if [ -n "$1" ]; then
opt=$1
tmp_indir=$(readlink -f "$opt")
INDIR=${tmp_indir/ /_}
else
printAndUsageAndExit "You have to set a non-empty value for option --in if you decide to use it"
fi
shift;
;;
-k|--cnf)
shift;
if [ -n "$1" ]; then
CONFFLAG=1
CONFFILE=$1
else
printAndUsageAndExit "You have to set a non-empty value for option --cnf to list the files to be analyzed"
fi
shift;
;;
-l|--output-level)
shift;
if [ -n "$1" ]; then
......@@ -156,22 +165,17 @@ while true; do
fi
shift;
;;
-p|--output-prefix)
shift;
if [ -n "$1" ]; then
opt=$1
OUTPUTPREFIX=${opt/ /_}
else
printAndUsageAndExit "You have to set a non-empty value for option --output-prefix"
fi
shift;
;;
-a|--add-option)
-m|--mode)
shift;
if [ -n "$1" ]; then
ADDOPT=$1
if [[ "$1" == "p" ]] || [[ "$1" == "u" ]]; then
MODEFLAG=1
MODE=$1
else
printAndUsageAndExit "'$1' is an incorrect value for --mode option"
fi
else
printAndUsageAndExit "You have to set a non-empty value for option --add-option if you decide to use it"
printAndUsageAndExit "You have to set a non-empty value for option --mode"
fi
shift;
;;
......@@ -191,73 +195,79 @@ while true; do
fi
shift;
;;
-t|--tool)
-p|--output-prefix)
shift;
if [ -n "$1" ]; then
if [[ "$1" == "B" ]] || [[ "$1" == "B2" ]] || [[ "$1" == "K" ]] || [[ "$1" == "R" ]] || [[ "$1" == "S" ]] || [[ "$1" == "H" ]]; then
TOOL=$1
case "$TOOL" in
B) which bowtie > /dev/null; ret=$?; if ((ret!=0)); then printAndUsageAndExit "Could not find bowtie in utils/bin/bowtie-1.1.2/ or in PATH" ;fi ;;
K) which kallisto > /dev/null; ret=$?; if ((ret!=0)); then printAndUsageAndExit "Could not find kallisto in utils/bin/kallisto/ or in PATH" ;fi ;;
R) which rapmap > /dev/null; ret=$?; if ((ret!=0)); then printAndUsageAndExit "Could not find rapmap in utils/bin/rapmap/bin/ or in PATH" ;fi ;;
esac
else
printAndUsageAndExit "'$1' is an incorrect value for --tool option (B, K and R accepted)"
fi
opt=$1
OUTPUTPREFIX=${opt/ /_}
else
printAndUsageAndExit "You have to set a non-empty value for option --tool"
printAndUsageAndExit "You have to set a non-empty value for option --output-prefix"
fi
shift;
;;
-c|--minimum-coverage)
-r|--recat)
shift;
if [ -n "$1" ]; then
RECAT=$1
else
printAndUsageAndExit "You have to set a non-empty value for option --recat if you decide to use it"
fi
shift;
;;
-s|--suspect-id)
shift;
if [ -n "$1" ]; then
if ! [[ $1 =~ $float_re ]] ; then
printAndUsageAndExit "Value for option --minimum-coverage must be float ('$1' was given)"
printAndUsageAndExit "Value for option --suspect-id must be float ('$1' was given)"
else
MINCOV=$1
SUSPID=$1
fi
else
printAndUsageAndExit "You have to set a non-empty value for option --minimum-coverage"
printAndUsageAndExit "You have to set a non-empty value for option --suspect-id"
fi
shift;
;;
-d|--overexp)
-t|--tool)
shift;
if [ -n "$1" ]; then
if ! [[ $1 =~ $float_re ]] ; then
printAndUsageAndExit "Value for option --overexp must be float ('$1' was given)"
if [[ "$1" == "B" ]] || [[ "$1" == "B2" ]] || [[ "$1" == "K" ]] || [[ "$1" == "R" ]] || [[ "$1" == "S" ]] || [[ "$1" == "H" ]]; then
TOOL=$1
case "$TOOL" in
B) which bowtie > /dev/null; ret=$?; if ((ret!=0)); then printAndUsageAndExit "Could not find bowtie in utils/bin/bowtie-1.1.2/ or in PATH" ;fi ;;
K) which kallisto > /dev/null; ret=$?; if ((ret!=0)); then printAndUsageAndExit "Could not find kallisto in utils/bin/kallisto/ or in PATH" ;fi ;;
R) which rapmap > /dev/null; ret=$?; if ((ret!=0)); then printAndUsageAndExit "Could not find rapmap in utils/bin/rapmap/bin/ or in PATH" ;fi ;;
esac
else
OVEREXP=$1
printAndUsageAndExit "'$1' is an incorrect value for --tool option (B, K and R accepted)"
fi
else
printAndUsageAndExit "You have to set a non-empty value for option --overexp"
printAndUsageAndExit "You have to set a non-empty value for option --tool"
fi
shift;
;;
-f|--fold-threshold)
-u|--frag-length)
shift;
if [ -n "$1" ]; then
if ! [[ $1 =~ $float_re ]] ; then
printAndUsageAndExit "Value for option --fold-threshold must be float ('$1' was given)"
if ! [[ $1 =~ $float_re ]] ; then
printAndUsageAndExit "Value for option --frag-length must be float ('$1' was given)"
else
FOLD=$1
FRAGLENGTH=$1
fi
else
printAndUsageAndExit "You have to set a non-empty value for option --fold-threshold"
printAndUsageAndExit "You have to set a non-empty value for option --frag-length"
fi
shift;
;;
-s|--suspect-id)
-v|--frag-sd)
shift;
if [ -n "$1" ]; then
if ! [[ $1 =~ $float_re ]] ; then
printAndUsageAndExit "Value for option --suspect-id must be float ('$1' was given)"
if ! [[ $1 =~ $float_re ]] ; then
printAndUsageAndExit "Value for option --frag-sd must be float ('$1' was given)"
else
SUSPID=$1
FRAGSD=$1
fi
else
printAndUsageAndExit "You have to set a non-empty value for option --suspect-id"
printAndUsageAndExit "You have to set a non-empty value for option --frag-sd"
fi
shift;
;;
......@@ -300,16 +310,7 @@ while true; do
fi
shift;
;;
-r|--recat)
shift;
if [ -n "$1" ]; then
RECAT=$1
else
printAndUsageAndExit "You have to set a non-empty value for option --recat if you decide to use it"
fi
shift;
;;
--readclean)
-z|--readclean)
shift;
if [ -n "$1" ]; then
CLEANING=$1
......
function printUsage(){
echo -e "\n`basename $0` is a program that can detect potential cross-contaminations in assembled transcriptomes using sequencing reads to find true origin of transcripts.
echo -e "\n`basename $0` is a program that can detect potential cross-contamination in assembled transcriptomes using sequencing reads to find true origin of transcripts.
Usage :
$0 [--cnf configFile] [--mode p|u] [--tool B|B2|K|R|S] [--fold-threshold INT] [--minimum-coverage FLOAT] [--threads INT] [--output-prefix STR] [--output-level 1|2|3] [--graph yes|no] [--trim5 INT] [--trim3 INT] [--frag-length FLOAT] [--frag-sd FLOAT] [--suspect-id INT] [--suspect-len INT] [--add-option STR] [--recat STR]
$0 [--cnf configFile] [--mode p|u] [--tool B|B2|K|R|S] [--fold-threshold INT] [--minimum-coverage FLOAT] [--threads INT] [--output-prefix STR] [--output-level 1|2|3] [--graph yes|no] [--trim5 INT] [--trim3 INT] [--frag-length FLOAT] [--frag-sd FLOAT] [--suspect-id INT] [--suspect-len INT] [--add-option STR] [--recat STR] [--readclean yes|no]
--cnf configFile : a text filename containg a liste of contigs assemblies to test and their associated fastq reads files [short: -k]
--cnf configFile : a text filename containg a list of transcriptome assemblies to analyze and their associated fastq reads files [short: -k]
--mode p|u :\t\t\t'p' for paired and 'u' for unpaired (default : 'p') [short: -m]
--in STR :\t\t\tName of the directory containing the input files to be analyzed (DEFAULT : working directory) [short: -i]
--tool B|K|R :\t\t'B' for bowtie, 'K' for kallisto, 'R' for rapmap (DEFAULT : 'R') [short: -t]
......@@ -15,6 +15,7 @@ $0 [--cnf configFile] [--mode p|u] [--tool B|B2|K|R|S] [--fold-threshold INT] [-
--output-prefix STR :\t\tPrefix of output directory that will be created (DEFAULT : empty) [short: -p]
--output-level 1|2 :\t\tSelect whether or not to output fasta files. '1' for none, '2' for all (DEFAULT : 2) [short: -l]
--graph yes|no :\t\tProduce graphical output using R (DEFAULT : no) [short: -g]
--readclean yes|no :\t\tSelect whether or not to output fastq files devoid of reads that mapped onto contaminant transcripts (DEFAULT : no) [short: -z]
--add-option 'STR' :\t\tThis text string will be understood as additional options for the mapper/quantifier used (DEFAULT : empty) [short: -a]
--recat SRT :\t\t\tName of a previous CroCo output directory you wish to use to re-categorize transcripts (DEFAULT : no) [short: -r]
--trim5 INT :\t\t\tnb bases trimmed from 5' (DEFAULT : 0) [short: -x]
......@@ -31,13 +32,13 @@ Minimal working example :
CroCo_v0.1.sh --cnf sampleconfig.txt --mode p 2>&1 | tee log_file
Exhaustive example :
CroCo_v0.1.sh --cnf configFile --mode p --in data_folder_name --tool R --fold-threshold 2 --minimum-coverage 0.2 --overexp 300 --threads 8 --output-prefix test1_ --output-level 2 --graph yes --add-option '-v 0' --trim5 0 --trim3 0 --suspect-id 95 --suspect-len 40 --recat no 2>&1 | tee log_file
CroCo_v0.1.sh --cnf configFile --mode p --in data_folder_name --tool K --fold-threshold 2 --minimum-coverage 0.2 --overexp 300 --threads 8 --output-prefix test1_ --output-level 2 --graph yes --add-option '-v 0' --trim5 0 --trim3 0 --suspect-id 95 --suspect-len 40 --recat no --readclean no 2>&1 | tee log_file
Exhaustive example using shortcuts :
CroCo_v0.1.sh -k configFile -m p -i data_folder_name -t R -f 2 -c 0.2 -d 300 -n 8 -p test1_ -l 2 -g yes -a '-v 0' -x 0 -y 0 -s 95 -w 40 -r no 2>&1 | tee log_file
CroCo_v0.1.sh -k configFile -m p -i data_folder_name -t K -f 2 -c 0.2 -d 300 -n 8 -p test1_ -l 2 -g yes -a '-v 0' -x 0 -y 0 -s 95 -w 40 -r no -z no 2>&1 | tee log_file
Example for re-categorizing previous CroCo results
CroCo_v0.1.sh --cnf configFile -i data_folder_name -r previous_CroCo_results_folder_name -f 10 -c 0.5 -g yes 2>&1 | tee log_file
CroCo_v0.1.sh -k configFile -i data_folder_name -r previous_CroCo_results_folder_name -f 10 -c 0.5 -g yes 2>&1 | tee log_file
"
}
......@@ -55,7 +56,7 @@ function printAndUsageAndExit(){
number_re='^[0-9]+$'
float_re='^[0-9]+([.][0-9]+)?$'
ARGS=$(getopt -o k:m:i:f:x:y:c:t:n:p:l:g:a:u:v:s:r:w:d: --long cnf:,mode:,in:,fold-threshold:,trim5:,trim3:,minimum-coverage:,tool:,threads:,output-prefix:,output-level:,graph:,add-option:,frag-length:,frag-sd:,suspect-id:,recat:,suspect-len:,overexp:,readclean: -n "$0" -- "$@");
ARGS=$(getopt -o k:m:i:f:x:y:c:t:n:p:l:g:a:u:v:s:r:w:d:z: --long cnf:,mode:,in:,fold-threshold:,trim5:,trim3:,minimum-coverage:,tool:,threads:,output-prefix:,output-level:,graph:,add-option:,frag-length:,frag-sd:,suspect-id:,recat:,suspect-len:,overexp:,readclean: -n "$0" -- "$@");
#Bad arguments
if [ $? -ne 0 ] || [ $# -eq 0 ];
......@@ -69,64 +70,51 @@ eval set -- "$ARGS";
while true; do
case "$1" in
-k|--cnf)
-a|--add-option)
shift;
if [ -n "$1" ]; then
CONFFLAG=1
CONFFILE=$1
ADDOPT=$1
else
printAndUsageAndExit "You have to set a non-empty value for option --cnf to list the files to be analyzed"
printAndUsageAndExit "You have to set a non-empty value for option --add-option if you decide to use it"
fi
shift;
;;
-m|--mode)
-c|--minimum-coverage)
shift;
if [ -n "$1" ]; then
if [[ "$1" == "p" ]] || [[ "$1" == "u" ]]; then
MODEFLAG=1
MODE=$1
if ! [[ $1 =~ $float_re ]] ; then
printAndUsageAndExit "Value for option --minimum-coverage must be float ('$1' was given)"
else
printAndUsageAndExit "'$1' is an incorrect value for --mode option"
MINCOV=$1
fi
else
printAndUsageAndExit "You have to set a non-empty value for option --mode"
fi
shift;
;;
-i|--in)
shift;
if [ -n "$1" ]; then
opt=$1
tmp_indir=$(readlink -f "$opt")
INDIR=${tmp_indir/ /_}