Commit a0e243ef authored by eortega's avatar eortega
Browse files

scripts/procedure.sh updated and scripts/00_create_py_env.sh and...

scripts/procedure.sh updated and scripts/00_create_py_env.sh and 01_quality_check.sh updated to new architecture
parent c4017642
......@@ -27,7 +27,7 @@ source ${py_env_dir}/bin/activate
pip install --upgrade pip
##
pip install -r requirements_py-env.txt
pip install -r scripts/requirements_py-env.txt
# pip install biopython pandas matplotlib multiqc pyvcf
......
#!/bin/bash
## Launch example from coevolution/phages/
## ./scripts/01_quality_check.sh $PWD/ qc_rawd_data
## PATH TO WORKING DIRECTORY
path=$1
#path=/home/enrique/work/Gandon/coevolution/phages/
step_name=$2
......@@ -11,6 +16,9 @@ path=$1
source ~/envs/coev/bin/activate
## Some temporary files are created nearby the source files
## Using links from a tmp will avoid errors in case the directory
## containing the raw data is not writable
## SEPARATE THE FILES BY NAME
......@@ -20,47 +28,47 @@ W_seq=$(mktemp -d)
R_seq=$(mktemp -d)
Other_seq=$(mktemp -d)
echo $W_seq $R_seq $Other_seq
### CREATE SYMBOLIC LINKS TO SEQUENCE FILES
### DIIVIDED IN 3 DIRECTORIES
echo "CREATE SYMBOLIC LINKS"
for i in $(ls ${path}raw_data/sequences/W*);
for i in $(ls ${path}data/sequences/W*);
do
ln -s $i $W_seq;
done
for i in $(ls ${path}raw_data/sequences/R*);
for i in $(ls ${path}data/sequences/R*);
do
ln -s $i $R_seq;
done
for i in $(ls ${path}raw_data/sequences/ | grep -v ^W | grep -v ^R);
for i in $(ls ${path}data/sequences/ | grep -v ^W | grep -v ^R);
do
#echo $i;
ln -s ${path}raw_data/sequences/$i $Other_seq
done
echo "CREATE SYMBOLIC LINKS -- DONE"
### MAKE RUN FASTQC ON EACH GROUPE
mkdir -p ${path}qual/fastqc/{W_seq,R_seq,Other_seq}
mkdir -p ${path}steps/${step_name}/fastqc/{W_seq,R_seq,Other_seq}
fastqc -t 35 --noextract -o ${path}qual/fastqc/W_seq $W_seq/*
fastqc -t 35 --noextract -o ${path}steps/$step_name/fastqc/W_seq $W_seq/*
# multiqc -f -i W_seq -o ${path}qual/multiqc/ ${path}qual/fastqc/W_seq
fastqc -t 35 --noextract -o ${path}qual/fastqc/R_seq $R_seq/*
fastqc -t 35 --noextract -o ${path}steps/$step_name/fastqc/R_seq $R_seq/*
# multiqc -f -i R_seq -o ${path}qual/multiqc/ ${path}qual/fastqc/R_seq/
fastqc -t 35 --noextract -o ${path}qual/fastqc/Other_seq $Other_seq/*
fastqc -t 35 --noextract -o ${path}steps/$step_name/fastqc/Other_seq $Other_seq/*
# multiqc -f -i Other_seq -o ${path}qual/multiqc/ ${path}qual/fastqc/Other_seq
......@@ -71,17 +79,17 @@ fastqc -t 35 --noextract -o ${path}qual/fastqc/Other_seq $Other_seq/*
## LOOP MULTIQC DEPENDING ON THE INPUTS
## MAKE DIRECTORIES FOR THE DIFFERENT SAMPLES:
mkdir -p ${path}qual/multiqc/{W,R,Other}
mkdir -p ${path}steps/$step_name/multiqc/{W_seq,R_seq,Other_seq}
for i in $(seq 8)
do
multiqc -f -i W${i}_seq -n W${i} -o ${path}qual/multiqc/W ${path}qual/fastqc/W_seq/W${i}*
multiqc -f -i R${i}_seq -n R${i} -o ${path}qual/multiqc/R ${path}qual/fastqc/R_seq/R${i}*
multiqc -f -i W${i}_seq -n W${i} -o ${path}steps/$step_name/multiqc/W_seq ${path}steps/$step_name/fastqc/W_seq/W${i}*
multiqc -f -i R${i}_seq -n R${i} -o ${path}steps/$step_name/multiqc/R_seq ${path}steps/$step_name/fastqc/R_seq/R${i}*
done
for i in 2972 A B C D CTRL T Undetermined
do
multiqc -f -i ${i}_seq -n ${i} -o ${path}qual/multiqc/Other ${path}qual/fastqc/Other_seq/${i}*
multiqc -f -i ${i}_seq -n ${i} -o ${path}steps/$step_name/multiqc/Other_seq ${path}steps/$step_name/fastqc/Other_seq/${i}*
done
......
......@@ -191,7 +191,7 @@ To run from ipython:
The commands used to launch the scripts up here as well as the supplementary commands to separate the different data, extract and all other action is written here.
It contains a pre-treatment of data to create sub-groups using symbolic links
It contains a pre-treatment of data to create sub-groups using symbolic links.
### README.md
......
......@@ -5,7 +5,7 @@
# cd /home/user/work/coev/phages
## PREPARE DATA
## PREPARE DATA SUBGROUPS
## Uncompress raw data into data folder
tar -xzvf data/sequences.tar.gz -C data
......@@ -17,6 +17,8 @@ chmod -w data/sequences.tar.gz
## Make links to data to make sub-groups
## It makes it easier to handle groups of files
## This step is also done on scripts/01_quality_check.sh
mkdir -p data/fastq_ln/{R_seq,W_seq,Other_seq}
for i in $PWD/data/sequences/R*
......@@ -39,5 +41,13 @@ done
###################################
## CREATE PYTHON ENVIRONMENT -- With virtualenv
./scripts/00_create_py_env.sh
## environment located in:
## ~/envs/coev/
###################################
## QUALITY CHECK
./scripts/01_quality_check.sh $PWD/ qc_raw_data
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment