detect_suspect_trans_with_blast.sh~ 2.44 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# add sample name to all sequence names and build BLASTdb
#for (( j=0; j <i; j++ )); do
for ref in "${orders[@]}"; do
	#ref=${fasta_array[$j]}
	#awk '/^>/{ print $1 } ; /^[^>]/{ print $0 }' < $INDIR/${fasta_array[$j]}.fasta > $out/${fasta_array[$j]}".fasta_mod"
	#sed -i "s/>/>$ref|/g" $out/${fasta_array[$j]}".fasta_mod"
	#makeblastdb -in $out/${fasta_array[$j]}".fasta_mod" -parse_seqids -dbtype nucl -out $out/${fasta_array[$j]}".blastdb"

	awk '/^>/{ print $1 } ; /^[^>]/{ print $0 }' < $INDIR/${ref}.fasta > $out/${ref}".fasta_mod"
	sed -i "s/>/>$ref|/g" $out/${ref}".fasta_mod"
	makeblastdb -in $out/${ref}".fasta_mod" -parse_seqids -dbtype nucl -out $out/${ref}".blastdb"
done


### initial version of the section :
# "all pairwise BLAST and listing suspects (see $SUSPID and $SUSPLEN)"
 echo -e "\n"
 #for (( j=0; j <i; j++ )); do
 for ref in "${orders[@]}"; do
  #ref=${fasta_array[$j]};
  suspects=$out/$ref".suspects"
  
  #for (( k=0; k <i; k++ )); do
  for target in "${orders[@]}"; do
   #target=${fasta_array[$k]};
   if [ "$ref" != "$target" ]; then
    outblast=$out/$ref"v"$target".outblast"
    query=$out/$ref".fasta_mod"
    db=$out/$target".blastdb"
    echo -e "Blasting $ref vs. $target"
    blastn -num_threads $PROCESSORS -query $query -db $db -perc_identity $SUSPID -soft_masking true -max_target_seqs 5000 -outfmt "6 qseqid sseqid evalue pident bitscore qstart qend qlen sstart send slen" -out $outblast
   fi
  done
  cat $out/$ref"v"*".outblast" > $out/$ref".outblast" ; rm -f $out/$ref"v"*".outblast"
  cat $out/$ref".outblast" | awk -v susplen=$SUSPLEN '{ if($5>=susplen){print $1} }' > $out/$ref".suspects_tmp"
  cat $out/$ref".suspects_tmp" | sort | uniq > $out/$ref".suspects" ; rm -f $suspects"_tmp"
  blastdbcmd -db $out/$ref".blastdb" -entry_batch $out/$ref".suspects" -outfmt %f -line_length 20000 -out $out/$ref".fasta_suspect_tmp"
  cat $out/$ref".fasta_suspect_tmp" | sed 's/lcl|//g' | awk '{if($0 ~ /^>/){print $1} else{print}}' > $out/$ref".fasta_suspect"
  rm -f $out/$ref".fasta_suspect_tmp"
  echo -e "\ttotal suspects transcripts in $ref : "`cat $out/$ref".suspects" | wc -l`
 done


 # regroup all samples
psimion's avatar
psimion committed
45
46
47
echo '' > $out/ALL_transcripts.fasta
echo '' > $out/ALL_transcripts.suspects
echo '' > $out/ALL_transcripts.fasta_suspects
48

psimion's avatar
psimion committed
49
50
51
52
53
for ref in "${orders[@]}"; do 
 cat $out/$ref.fasta_mod >> $out/ALL_transcripts.fasta
 cat $out/$ref.suspects >> $out/ALL_transcripts.suspects
 cat $out/$ref.fasta_suspect >> $out/ALL_transcripts.fasta_suspects
done