Commit 4dcb0b04 authored by peguerin's avatar peguerin
Browse files

mitofish

parent 628e8d35
Loading
Loading
Loading
Loading

.gitignore

0 → 100644
+2 −0
Original line number Diff line number Diff line
# exclude everything
mitofish/*
+4 −6
Original line number Diff line number Diff line
@@ -68,10 +68,8 @@ In addition, it includes `EMBL` folder which contains all the sequences and `TAX

Now, your reference database can be used for taxonomic assignment in our pipeline to generate species environmental presence from raw eDNA data.

You can use the absolute path of the folder of your reference database as the `/path/to/reference_database` argument in  [only_obitools](http://gitlab.mbb.univ-montp2.fr/edna/only_obitools) and [snakemake_only_obitools](http://gitlab.mbb.univ-montp2.fr/edna/snakemake_only_obitools)





You can use the absolute path of the folder of your reference database as the `/path/to/reference_database` argument in the following pipelines :
* [only_obitools](http://gitlab.mbb.univ-montp2.fr/edna/only_obitools)
* [nextflow_obitools](https://gitlab.mbb.univ-montp2.fr/edna/nextflow_obitoolsand)
* [snakemake_only_obitools](http://gitlab.mbb.univ-montp2.fr/edna/snakemake_only_obitools)
+10 −0
Original line number Diff line number Diff line
@@ -14,6 +14,16 @@ cd TAXO
wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
tar -zxvf taxdump.tar.gz
cd ..

if [ $MITOFISH == 'y' ]
then
 echo "adding sequences from mitofish..."
 bash scripts/add_sequences_from_mitofish.sh
 obiconvert --skip-on-error --fasta -t ./TAXO --ecopcrdb-output=mitofish/"${rd_prefix}" mitofish/mitogene_12S.fasta
else
 echo "skip adding sequences from mitofish"
fi

# format the data
obiconvert --skip-on-error --embl -t ./TAXO --ecopcrdb-output="${rd_prefix}" EMBL/rel_std_*.dat
# ecoPCR to simulate an in silico PCR
+4 −0
Original line number Diff line number Diff line
# argument values for building reference database

## "y" add sequences from mitofish the reference database
## "n" don't add sequences from mitofish
MITOFISH="n"

## reference database prefix
rd_prefix="embl_std"

+48 −0
Original line number Diff line number Diff line
## add sequences of gene from mitofish database

mkdir mitofish

## Downloads mitogenomes
mkdir mitofish/mitogenomes
wget http://mitofish.aori.u-tokyo.ac.jp/files/mitogenomes.zip --directory-prefix=mitofish/
unzip mitofish/mitogenomes.zip -d mitofish/mitogenomes

## Downloads gene annotations of mitogenomes
mkdir mitofish/mitoannotations
wget http://mitofish.aori.u-tokyo.ac.jp/files/mitoannotations.zip --directory-prefix=mitofish/
unzip mitofish/mitoannotations.zip -d mitofish/mitoannotations

## list of species with mitogenomes
for i in `ls mitofish/mitogenomes`;
do
echo $i | cut -d "." -f 1 | cut -d "_" -f 3-4;
done | sort | uniq | sed 's/_/ /g' > mitofish/species_mitogenomes.list
## list of species with mitoannotations
for i in `ls mitofish/mitoannotations/`;
do
echo $i | cut -d "." -f 1 | cut -d "_" -f 3-4;
done | sort | uniq | sed 's/_/ /g' > mitofish/species_mitoannotations.list
## list of species with mitogenomes AND mitoannotations
grep -f mitofish/species_mitoannotations.list mitofish/species_mitogenomes.list > mitofish/species_mitogenome_annotations.list
## extract 12S sequences
mkdir mitofish/complete
cat mitofish/species_mitogenome_annotations.list | while read i;
do
 GENUS=`echo $i | awk '{ print $1}'`;
 SPECIES=`echo $i | awk '{ print $2}'`;
 GENUS_SPECIES=`echo $GENUS"_"$SPECIES`;
 for j in `ls mitofish/mitogenomes/*$GENUS_SPECIES*`;
 do
  JFILE=`basename $j`
  ln -s $(pwd)/mitofish/mitogenomes/$JFILE mitofish/complete/$JFILE;
 done
 for k in `ls mitofish/mitoannotations/*$GENUS_SPECIES*`;
 do
  KFILE=`basename $k`
  ln -s $(pwd)/mitofish/mitoannotations/$KFILE mitofish/complete/$KFILE;
 done
done
cat mitofish/complete/*fa > mitofish/mitogenomes.fa
mkdir mitofish/mitogenomes_12S
python2 scripts/extract_gene_from_mito.py -i mitofish/mitogenomes.fa -a mitofish/complete -o mitofish/mitogenomes_12S -g 12S -t rRNA -5 50 -3 50 -T TAXO/names.dmp
cat mitofish/mitogenomes_12S/*fa > mitofish/mitogene_12S.fasta
Loading