Commit 440525db authored by peguerin's avatar peguerin
Browse files

check dat files integrity

parent 7dae9a42
......@@ -43,10 +43,27 @@ def mkdir_results(results_subfolders):
if not os.path.exists(os.path.join("results",subfolder)):
os.mkdir(os.path.join("results",subfolder))
def is_dna_sequence(dna_string, alphabet):
for base in dna_string:
if base not in alphabet:
return False
else:
return True
def read_dat(filedat):
dfdat = pandas.read_csv(filedat,sep="\t",header=None)
## check dat file must have 6 colons
assert dfdat.shape[1] == 6
dfdat.columns=['experiment','plaque','barcode','primer5','primer3','F']
## check dat file all colon must have same size
col_b='experiment'
for col_a in dfdat.columns:
assert dfdat[col_a].size == dfdat[col_b].size
col_b=col_a
## check primer are dna string
for col in ['primer5', 'primer3']:
assert False not in [is_dna_sequence(sequence, "ATGCRYSWKMBDHVN") for sequence in dfdat[col]]
return dfdat
......@@ -175,7 +192,7 @@ for run in uniqRuns:
selectedRow=runMarkerDfrm[(runMarkerDfrm.plaque == plaque)]
projet=selectedRow['projet'].values[0]
sample=selectedRow['sample'].values[0]
plaque_dat=all_dat[marker][(all_dat[marker].plaque == plaque)]
plaque_dat=all_dat[marker].loc[(all_dat[marker]['plaque'] == plaque)]
barcode5=plaque_dat["barcode"].values[0]
barcode3=str(Seq(barcode5).reverse_complement())
primer5=plaque_dat["primer5"].values[0]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment