Commit 8ecc9a05 authored by peguerin's avatar peguerin
Browse files

add flag args to edit taxdump

parent e8db4031
......@@ -47,12 +47,12 @@ from curegen import generate_curation_df
def main():
args = parse_args()
## load taxonomy ncbi
ncbi = load_taxdump(args.ncbi_taxdump)
ncbi = load_taxdump(args.ncbi_taxdump, args.ncbi_taxdump_load)
taxDic = taxdump2dic(ncbi, rootTaxon=7742)
if args.command == 'validate':
print('validate records...')
rawResults = validate_fasta(args.fasta, taxDic, ncbi)
results = curation(args.curate, rawResults, taxDic, ncbi)
results = curation(args.curate, rawResults, taxDic, ncbi, args.ncbi_taxdump_edition, args.ncbi_taxdump)
SeqIO.write(results['valide'], str(args.output_prefix)+'_valide.fasta', 'fasta')
SeqIO.write(results['faultyFormat'], str(args.output_prefix)+'_faulty_format.fasta', 'fasta')
SeqIO.write(results['faultyTaxon'], str(args.output_prefix)+'_faulty_taxon.fasta', 'fasta')
......
......@@ -34,9 +34,12 @@ def parse_args(usage=HELPER_TEXT):
validate = subprasers.add_parser('validate', help='check format and taxonomy')
validate.add_argument("-f","--fasta", type=str, help='path of the barcodes sequences FASTA file', required=True)
validate.add_argument("-c","--curate", type=str, help='path of the taxonomy curation CSV file. Header must be current_name;ncbi_name;genus;family. A curation CSV file can be generated with the command curegen', required=False, default="NA")
validate.add_argument("-n","--ncbi_taxdump", type=str, help='path of NCBI taxdump.tar.gz file', required=False, default="NA")
validate.add_argument("-n","--ncbi_taxdump", type=str, help='path of NCBI taxonomy folder', required=False, default="NA")
validate.add_argument("-l","--ncbi_taxdump_load", action='store_true', help='load NCBI taxonomy folder')
validate.add_argument("-e","--ncbi_taxdump_edition", action='store_true', help='allow edition of ncbi taxdump files to add new taxonomy nodes')
validate.add_argument("-o","--output_prefix", type=str, help='prefix of the output FASTA such as [PREFIX].fasta')
curegen = subprasers.add_parser('curegen', help='try to correct wrong taxonomy')
curegen.add_argument("-f","--fasta", type=str, help='path of the barcodes sequences FASTA file', required=True)
curegen.add_argument("-n","--ncbi_taxdump", type=str, help='path of NCBI taxdump.tar.gz file', required=False, default="NA")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment