detect_suspect_trans_with_blast.sh 2.44 KB
Newer Older
khalid's avatar
khalid committed
1
# add sample name to all sequence names and build BLASTdb
2
#for (( j=0; j <i; j++ )); do
3
for ref in "${orders[@]}"; do
4
5
6
7
8
9
10
11
	#ref=${fasta_array[$j]}
	#awk '/^>/{ print $1 } ; /^[^>]/{ print $0 }' < $INDIR/${fasta_array[$j]}.fasta > $out/${fasta_array[$j]}".fasta_mod"
	#sed -i "s/>/>$ref|/g" $out/${fasta_array[$j]}".fasta_mod"
	#makeblastdb -in $out/${fasta_array[$j]}".fasta_mod" -parse_seqids -dbtype nucl -out $out/${fasta_array[$j]}".blastdb"

	awk '/^>/{ print $1 } ; /^[^>]/{ print $0 }' < $INDIR/${ref}.fasta > $out/${ref}".fasta_mod"
	sed -i "s/>/>$ref|/g" $out/${ref}".fasta_mod"
	makeblastdb -in $out/${ref}".fasta_mod" -parse_seqids -dbtype nucl -out $out/${ref}".blastdb"
12
13
done

khalid's avatar
khalid committed
14

15
16
### initial version of the section :
# "all pairwise BLAST and listing suspects (see $SUSPID and $SUSPLEN)"
khalid's avatar
khalid committed
17
 echo -e "\n"
18
 #for (( j=0; j <i; j++ )); do
19
 for ref in "${orders[@]}"; do
20
  #ref=${fasta_array[$j]};
khalid's avatar
khalid committed
21
  suspects=$out/$ref".suspects"
22
23
  
  #for (( k=0; k <i; k++ )); do
24
  for target in "${orders[@]}"; do
25
   #target=${fasta_array[$k]};
khalid's avatar
khalid committed
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
   if [ "$ref" != "$target" ]; then
    outblast=$out/$ref"v"$target".outblast"
    query=$out/$ref".fasta_mod"
    db=$out/$target".blastdb"
    echo -e "Blasting $ref vs. $target"
    blastn -num_threads $PROCESSORS -query $query -db $db -perc_identity $SUSPID -soft_masking true -max_target_seqs 5000 -outfmt "6 qseqid sseqid evalue pident bitscore qstart qend qlen sstart send slen" -out $outblast
   fi
  done
  cat $out/$ref"v"*".outblast" > $out/$ref".outblast" ; rm -f $out/$ref"v"*".outblast"
  cat $out/$ref".outblast" | awk -v susplen=$SUSPLEN '{ if($5>=susplen){print $1} }' > $out/$ref".suspects_tmp"
  cat $out/$ref".suspects_tmp" | sort | uniq > $out/$ref".suspects" ; rm -f $suspects"_tmp"
  blastdbcmd -db $out/$ref".blastdb" -entry_batch $out/$ref".suspects" -outfmt %f -line_length 20000 -out $out/$ref".fasta_suspect_tmp"
  cat $out/$ref".fasta_suspect_tmp" | sed 's/lcl|//g' | awk '{if($0 ~ /^>/){print $1} else{print}}' > $out/$ref".fasta_suspect"
  rm -f $out/$ref".fasta_suspect_tmp"
  echo -e "\ttotal suspects transcripts in $ref : "`cat $out/$ref".suspects" | wc -l`
 done

43

khalid's avatar
khalid committed
44
 # regroup all samples
45
46
47
echo '' > $out/ALL_transcripts.fasta
echo '' > $out/ALL_transcripts.suspects
echo '' > $out/ALL_transcripts.fasta_suspects
khalid's avatar
khalid committed
48

49
50
51
52
53
for ref in "${orders[@]}"; do 
 cat $out/$ref.fasta_mod >> $out/ALL_transcripts.fasta
 cat $out/$ref.suspects >> $out/ALL_transcripts.suspects
 cat $out/$ref.fasta_suspect >> $out/ALL_transcripts.fasta_suspects
done