Commit c4ec68b0 authored by Enrique Ortega enrique.ortega@cefe.cnrs.fr's avatar Enrique Ortega enrique.ortega@cefe.cnrs.fr
Browse files

Removed older versions of scripts

parent b0f115f0
phage R7R3_S65 -> /home/enrique/work/Gandon/coevolution/phages/results/mapping/R_seq/R7R3_S65.sam
/home/enrique/work/Gandon/coevolution/phages/data/trimmed/R_seq/R7R3_S65_R1.fq.gz /home/enrique/work/Gandon/coevolution/phages/data/trimmed/R_seq/R7R3_S65_R2.fq.gz
/home/enrique/work/Gandon/coevolution/phages/data/refs/indexes_Sv/Sv
phage R4T2_S46 -> /home/enrique/work/Gandon/coevolution/phages/results/mapping/R_seq/R4T2_S46.sam
/home/enrique/work/Gandon/coevolution/phages/data/trimmed/R_seq/R4T2_S46_R1.fq.gz /home/enrique/work/Gandon/coevolution/phages/data/trimmed/R_seq/R4T2_S46_R2.fq.gz
/home/enrique/work/Gandon/coevolution/phages/data/refs/indexes_Sv/Sv
#! /home/enrique/envs/biopython/bin/python
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import vcf
vcf_file = '/mnt/alpha_raid/work/Gandon/phages/test_freebayes/results/W5T4_S71_phag.vcf'
ctrl_file = '/mnt/alpha_raid/work/Gandon/phages/test_freebayes/results/TO-WT_S83_phag.vcf'
vcf_file_list = []
class Dog:
'''Example class to test'''
def __init__(self):#, name):
# self.name = name
self.tricks = [] # creates a new empty list for each dog
def add_trick(self, trick):
self.tricks.append(trick)
class ImportVCF:
'''
Imports VCF experimental file:
- Full records: self.ctrl_dico
- Index of contents: self.ctrl_index (type = list)
'''
def __init__(self):#, name):
# self.name = name
self.vcf_index = []
self.vcf_dico = {}
self.vcf_CHROM = {}
self.vcf_POS = {}
self.vcf_ALT = {}
self.vcf_REF = {}
def load_vcf(self, file_name):
vcf_file_list.append(file_name)
vcf_reader = vcf.Reader(open(file_name, 'r'))
for record in vcf_reader:
# print(record)
self.vcf_index.append( record.CHROM + ';' + str(record.POS) )
self.vcf_dico[record.CHROM + ';' + str(record.POS)]=record
self.vcf_CHROM[record.CHROM + ';' + str(record.POS)]=record.CHROM
self.vcf_POS[record.CHROM + ';' + str(record.POS)]=record.POS
self.vcf_ALT[record.CHROM + ';' + str(record.POS)]=record.ALT
self.vcf_REF[record.CHROM + ';' + str(record.POS)]=record.REF
class ImportControl:
'''
Imports control VCF file.
- Full records: self.ctrl_dico (type = list)
- Index: self.ctrl_index
- Chromosome: self.ctrl_CHROM
- Position: self.ctrl_POS
- Reference allele: self.ctrl_REF
- Alternative allele: self.ctrl_ALT
'''
def __init__(self):#, name):
# self.name = name
self.ctrl_index = []
self.ctrl_dico = {}
self.ctrl_CHROM = {}
self.ctrl_POS = {}
self.ctrl_ALT = {}
self.ctrl_REF = {}
def load_vcf(self, file_name):
vcf_reader = vcf.Reader(open(file_name, 'r'))
for record in vcf_reader:
# print(record)
self.ctrl_index.append( record.CHROM + ';' + str(record.POS) )
self.ctrl_dico[record.CHROM + ';' + str(record.POS)]=record
self.ctrl_CHROM[record.CHROM + ';' + str(record.POS)]=record.CHROM
self.ctrl_POS[record.CHROM + ';' + str(record.POS)]=record.POS
self.ctrl_ALT[record.CHROM + ';' + str(record.POS)]=record.ALT
self.ctrl_REF[record.CHROM + ';' + str(record.POS)]=record.REF
class RemoveCtrlMutations:
'''
Compare VCF to ctrl.
- Removes mutations found in control IF:
- They are on the same Chromosomoe & Position
- If the mutation is the same one
Only report the mutations which differ
'''
def __init__(self):
self.var = []
self.cln_index = []
self.cln_dico = {}
self.cln_CHROM = {}
self.cln_POS = {}
self.cln_ALT = {}
self.cln_REF = {}
def check_ctrl_in_exp(self, ctrl_object, exp_object):
'Checks if *and remove* the intersection between control and experimental VCFs'
print("\nThese are the common indexes between control and experimental VCFs")
counter = 0
ctrl_not_in_exp = []
val = False
for i in ctrl_object.ctrl_index:
if i in exp_object.vcf_index:
counter += 1
print("{0} in exp_object".format(i))
val = self.compare_ALT(ctrl_object, exp_object, i)
if val:
### Create a new dico witn only the ones we find. or remove from the original
self.cln_index.append(i)
self.cln_dico[i] = exp_object.vcf_dico[i]
self.cln_CHROM[i] = exp_object.vcf_CHROM[i]
self.cln_POS[i] = exp_object.vcf_POS[i]
self.cln_ALT[i] = exp_object.vcf_ALT[i]
self.cln_REF[i] = exp_object.vcf_REF[i]
else:
ctrl_not_in_exp.append(i)
print( "\n{0} occurrences of control in explerimental (len = {1})".format(
counter
, len(exp_object.vcf_index
)
) )
print("### List of control not in experimental: {}".format(ctrl_not_in_exp))
def compare_ALT(self, ctrl_object, exp_object, iteration):
'Compares if ALT is the same in control and experimental. Returns True or False'
# print ("control ALT = {0} ; exp ALT = {1}". format(ctrl_object.ctrl_ALT[iteration], exp_object.vcf_ALT[iteration]))
# print ("control REF = {0} ; exp REF = {1}". format(ctrl_object.ctrl_REF[iteration], exp_object.vcf_REF[iteration]))
value = False
if ctrl_object.ctrl_ALT[iteration] == exp_object.vcf_ALT[iteration]:
value = True
return value
class Patate:
'''
Truc
'''
def __init__(self, list_experiments, list_headers, control_list):
self.potatoe = 'potatoe'
self.list_experiments = list_experiments
self.list_headers = list_headers
self.control_list = control_list
# self.exp_list = []
self.sorted_single_full_list = self.make_full_list()
self.no_control_list = self.remove_control()
# self.binary_dataframe = self.binary_lists(self.sorted_full_list)
self.binary_df = self.binary_lists()
def potatoes(self):
print("Do potatoes potate?")
def make_full_list(self):
# parsing_list = self.list_sorter_and_replace_strings()
temp_list = []
for i in self.list_experiments:
temp_list = temp_list + i
parsing_list = self.list_sorter_and_replace_strings(temp_list)
# print("Before set sort, type = {1}:\n{0}\n".format(temp_list, type(temp_list.sort())))
# temp_list = list(set(sorted(temp_list)))
# temp_list.sort()
# print("After set sort, type = {1}:\n{0}\n".format(temp_list, type(temp_list)))
return parsing_list
def list_sorter_and_replace_strings(self, li):
'''
Input is a list which contains STR elements "CHROM_POS"
as the method list.sort() is either numeric or alphabetic
it is required to remove the "CHROM_", convert the POS to INT
And finally sort again the whole thing.
I can also re-add a string instead of the "CHROM".
In case of having multiple samples, the sample name may be more pertinent
'''
temp_list = []
for i in range(0,len(li)):
# print("{0}\t{1}\t{2}".format(i, li[i], li[i].split(';')[1] ))
temp_list.append(int(li[i].split(';')[1]))
temp_list2 = list(set(sorted(temp_list)))
temp_list2.sort()
# print("{1} elements in temp_list2: {0}".format(temp_list2, len(temp_list2)))
for i in range(0,len(temp_list2)):
temp_list2[i] = "NC_007019.1;"+str(temp_list2[i])
# print("++++>", temp_list2[i])
return temp_list2
def remove_control(self):
no_control_list = []
for i in self.sorted_single_full_list :
if i not in self.control_list:
no_control_list. append(i)
print(">> {0} elements in the list, {1} elements in the control. {2} elements remaining".format(len(self.sorted_single_full_list), len(self.control_list), len(no_control_list) ))
return no_control_list
def binary_lists_draft(self):
# self.make_full_list(list_exp)
bin_list_lists = [] ### MAYBE DECLARE AS AN ATTRIBUTE
for li in self.list_experiments:
temp_list = []
for elem in self.no_control_list:
if elem in li:
temp_list.append(1)
else:
temp_list.append(0)
bin_list_lists.append(temp_list)
mydf = pd.DataFrame(bin_list_lists)
mydf = mydf.transpose()
mydf.columns = [self.list_headers]
# mydf.set_index(self.no_control_list)
return mydf
def binary_lists(self):
bin_dico = {}
for li, head in zip(self.list_experiments, self.list_headers):
temp_list = []
for elem in self.no_control_list:
if elem in li:
temp_list.append(1)
else:
temp_list.append(0)
bin_dico[head] = temp_list
# print(bin_dico)
mydf = pd.DataFrame(index = self.no_control_list, data = bin_dico)
return mydf
def plot_binary_matrix(self, outfile, data_name):
#################
## MAKE FIGURE
#################
# print("{0} Y labels, listed here:\n{1}".format(len(self.no_control_list), self.no_control_list ))
print("{0} Y labels, listed here".format(len(self.no_control_list) ))
only_coordinates_ytick_labels = []
for i in self.no_control_list:
only_coordinates_ytick_labels.append(i.split(";")[1])
only_coordinates_xtick_labels = []
for i in range(0, len(self.list_headers)):
only_coordinates_xtick_labels.append(i)
# only_coordinates_xtick_labels.append(i+1)
print(self.binary_df)
## Creates an empty figure
fig = plt.figure(figsize=(4,35))
fig.suptitle("Presence/Absence of mutations") # Figure title, useful in case of multi-ple graphic figures
## Creates the proportions the graphic will have (left, bottom, width, height)
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
## Title for that graphic
ax.set_title(data_name)
## Set x axis label and ticks and tick labels
ax.set_xlabel('Time')
# ax.set_xticks([0, 1, 2, 3])
ax.set_xticks(only_coordinates_xtick_labels)
ax.set_xticklabels(self.list_headers)
## Set y axis label and tick labels
ax.set_ylabel('Coordinate by experiment')
ax.set_yticks(range(0,len(self.no_control_list)))
# ax.set_yticklabels(self.no_control_list)
ax.set_yticklabels(only_coordinates_ytick_labels)
## Do the graph
plt.imshow(self.binary_df
, cmap = 'binary' ## Color MAP
# , interpolation='nearest'
)
## Show the graph -- for dev and debug
# plt.show()
## Save file to file -- for script
fig.savefig(outfile)
plt.close(fig)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment