Commit b156e910 authored by Bastien MACE's avatar Bastien MACE
Browse files

initial commit

parent 5be03d12
.Rproj.user
.Rhistory
.RData
.Ruserdata
*fasta
*fastq
*csv
*ods
*txt
\ No newline at end of file
# swarm_and_obitools
**Bastien Macé, 2020**
## Introduction
This project is based on the idea that gathering similar sequences allows to faithfully study them by elminating sequences generated from PCR or NGS errors.
\ No newline at end of file
cd ## once you have conda installed, close the shell, reopen it and paste this
## following line :
conda config --set auto_activate_base false
########################################################################
#STEP 1 : Create a new environment obitools
ENVYAML=./dada2_and_obitools/obitools_env_conda.yaml
conda env create -f $ENVYAML
########################################################################
#STEP 2 : Pair-end sequencing
## unzip your data if you need :
unzip mullus_surmuletus_data.zip
## activate your environment :
conda activate obitools
## use the function illuminapairedend to make the pair-end sequencing from
## the forward and reverse sequences you have in your data :
illuminapairedend --score-min=40 -r mullus_surmuletus_data/200221_SN234_A_L001_AIMI-199_R1.fastq mullus_surmuletus_data/200221_SN234_A_L001_AIMI-199_R2.fastq > AIMI-199.fastq
illuminapairedend --score-min=40 -r mullus_surmuletus_data/200221_SN234_A_L001_AIMI-200_R1.fastq mullus_surmuletus_data/200221_SN234_A_L001_AIMI-200_R2.fastq > AIMI-200.fastq
## this function will create a new .fastq file which will contain the sequences
## after the pair-end of forward and reverse sequences which have a quality
## score higher than 40 (-- score-min=40)
## to only conserve the sequences which have been aligned, use obigrep :
obigrep -p 'mode!="joined"' AIMI-199.fastq > AIMI-199.ali.fastq
obigrep -p 'mode!="joined"' AIMI-200.fastq > AIMI-200.ali.fastq
## -p requires a python expression
## the unaligned sequences are notified with mode="joined" by illuminapairedend
## whereas the aligned sequences are notified with mode="aligned"
## so here python creates new datasets (.ali.fastq) which only contain the
## sequences notified "aligned"
########################################################################
#STEP 3 : Demultiplexing
## to be able to compare the sequences next, you need to remove tags and primers,
## and to use the function ngsfilter :
ngsfilter -t mullus_surmuletus_data/AIMI_199_corr_tags.txt -u AIMI-199.unidentified.fastq AIMI-199.ali.fastq > AIMI-199.ali.assigned.fastq
ngsfilter -t mullus_surmuletus_data/AIMI_200_corr_tags.txt -u AIMI-200.unidentified.fastq AIMI-200.ali.fastq > AIMI-200.ali.assigned.fastq
## new files are created :
## .unidentified.fastq files contain the sequences that were not assigned
## whith a correct tag
## .ali.assigned.fastq files contain the sequences that were assigned with
## a correct tag, so it contains only the barcode sequences
## separate your .ali.assigned.fastq files depending on their samples,
## in placing them in a dedicated folder (useful for next steps) :
mkdir samples
## create the folder
mv -t samples AIMI-199.ali.assigned.fastq AIMI-200.ali.assigned.fastq
## place the latests .fastq files in the folder
cd samples
obisplit -t experiment --fastq AIMI-199.ali.assigned.fastq
obisplit -t experiment --fastq AIMI-200.ali.assigned.fastq
## separation of the files depending on their sample
mv -t ./dada2_and_obitools AIMI-199.ali.assigned.fastq AIMI-200.ali.assigned.fastq
## removing the original files from the folder
obiuniq -m sample Aquarium_2.fastq > Aquarium_2.uniq.fasta
obiannotate -k count -k merged_sample Aquarium_2.uniq.fasta > Aquarium_2.1.uniq.fasta
obigrep -l 20 -p 'count>=10' Aquarium_2.1.uniq.fasta > Aquarium_2.grep.fasta
obiclean -s merged_sample -r 0.05 -H Aquarium_2.grep.fasta > Aquarium_2.clean.fasta
obigrep -p 'obiclean_internalcount==0' Aquarium_2.clean.fasta > Aquarium_2.clean.grep.fasta
obiannotate -k count Aquarium_2.clean.grep.fasta > Aquarium_2.1.clean.grep.fasta
## la base de reference des individus rougets, et une base de données taxonomique du laboratoire rassemblant les especes de l’infra-classe Teleoste
ecotag -m 0.5 -d /media/bmace/LaCie/projets/only_obitools/embl_std -R /media/bmace/LaCie/projets/only_obitools/base_ref_finale_formated.fasta Aquarium_2.1.clean.grep.fasta > Aquarium_2.tag.fasta
obiannotate -k count Aquarium_2.tag.fasta > Aquarium_2.1.tag.fasta
#bioedit
obitab -o --output-field-separator=";" pipeline1_Aq2.fasta > pipeline1_Aq2.csv
sumatra -t 1 pipeline1_Aq2.fasta /media/bmace/LaCie/projets/only_obitools/base_ref_finale_formated.fasta > pipeline1_Aq2.txt
#modifier pipeline1_Aq2.fasta pour avoir le bon format pour swarm en pipeline1_Aq2.1.fasta
swarm -z -d 0 -w pipeline1_Aq2.2.fasta -o /dev/null pipeline1_Aq2.1.fasta #si besoin de dérépliquer
swarm -z -d 1 -f -t 10 -o stats_Aq2.txt -w pipeline3_Aq2.fasta < pipeline1_Aq2.2.fasta
#modifier avec le filtrage d'Elbrecht
sumatra -t 1 pipeline3_Aq2.fasta /media/bmace/LaCie/projets/swarm_and_obitools/base_ref_finale_formated.fasta > pipeline3_Aq2.txt
\ No newline at end of file
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment