arguments.py 4.86 KB
Newer Older
peguerin's avatar
peguerin committed
1
2
import argparse
import sys
peguerin's avatar
peguerin committed
3
4
import os
import pandas
peguerin's avatar
peguerin committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18

HELPER_TEXT ="""
.___  ___.  __  ___ .______    _______  .______      
|   \/   | |  |/  / |   _  \  |       \ |   _  \     
|  \  /  | |  '  /  |  |_)  | |  .--.  ||  |_)  |    
|  |\/|  | |    <   |   _  <  |  |  |  ||      /     
|  |  |  | |  .  \  |  |_)  | |  '--'  ||  |\  \----.
|__|  |__| |__|\__\ |______/  |_______/ | _| `._____|
                                                     
_______________________________________________________________________________


Pierre-Edouard GUERIN, Laetitia MATHON, Virginie MARQUES, Stephanie MANEL
CNRS, EPHE, Sorbonne University, Montpellier, France
peguerin's avatar
peguerin committed
19
version 0.2 "Tete de veau" March 2021
peguerin's avatar
peguerin committed
20
21
22
23
24
25
26
Usage:
> python3 mkbdr [options]
For help:
> python3 mkbdr --help
"""


peguerin's avatar
peguerin committed
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def check_taxdumpdir(ncbiTaxdumpDir):
    nodesFile=str(ncbiTaxdumpDir)+'/nodes.dmp'
    namesFile=str(ncbiTaxdumpDir)+'/names.dmp'
    if not os.path.exists(ncbiTaxdumpDir):
        print('ARGUMENTS ERROR: arguments ncbi_taxdump {0}. This path not exists.'.format(ncbiTaxdumpDir))
        return False
    if not os.path.isfile(nodesFile):
        print('ARGUMENTS ERROR: arguments ncbi_taxdump {0}. {1} is missing.'.format(ncbiTaxdumpDir, nodesFile))
        return False
    elif not os.path.isfile(namesFile):
        print('ARGUMENTS ERROR: arguments ncbi_taxdump {0}. {1} is missing.'.format(ncbiTaxdumpDir, namesFile))
        return False
    else:
        return True


def check_curatecsv(curateCsvFile):
    curateColons = ['current_name', 'ncbi_name', 'genus', 'family', 'ncbi_rank'] 
    if not os.path.isfile(curateCsvFile):
        print('ARGUMENTS ERROR: arguments curate {0}. {0} file not exists.'.format(curateCsvFile))
        return False
    try:
        dfCure = pandas.read_csv(curateCsvFile, sep=";")        
    except ValueError:
        print("ARGUMENTS ERROR: arguments curate {0}. {0} is not a CSV file and must have ';' as delimiter'.".format(curateCsvFile))
        return False
    colons = list(dfCure.columns.values)
    for colon in curateColons:
        if colon not in colons:
            print("ARGUMENTS ERROR: arguments curate {0}. {1} colon is missing.".format(curateCsvFile, colon))
            return False
    return True
    
peguerin's avatar
peguerin committed
60
61
62
63
64
65
66
67

def parse_args(usage=HELPER_TEXT):
    parser = argparse.ArgumentParser(description='mkbdr - to build a custom metabarcoding reference database.')

    subprasers = parser.add_subparsers(dest='command')

    validate = subprasers.add_parser('validate', help='check format and taxonomy')
    validate.add_argument("-f","--fasta", type=str, help='path of the barcodes sequences FASTA file', required=True)
peguerin's avatar
peguerin committed
68
    validate.add_argument("-c","--curate", type=str, help='path of the taxonomy curation CSV file. Header must be current_name;ncbi_name;genus;family. A curation CSV file can be generated with the command curegen', required=False, default="NA")
peguerin's avatar
peguerin committed
69
70
71
    validate.add_argument("-n","--ncbi_taxdump", type=str, help='path of NCBI taxonomy folder', required=False, default="NA")
    validate.add_argument("-l","--ncbi_taxdump_load", action='store_true', help='load NCBI taxonomy folder')
    validate.add_argument("-e","--ncbi_taxdump_edition", action='store_true', help='allow edition of ncbi taxdump files to add new taxonomy nodes')
peguerin's avatar
peguerin committed
72
73
    validate.add_argument("-o","--output_prefix", type=str, help='prefix of the output FASTA such as [PREFIX].fasta')

peguerin's avatar
peguerin committed
74

peguerin's avatar
peguerin committed
75
76
77
    curegen = subprasers.add_parser('curegen', help='try to correct wrong taxonomy')
    curegen.add_argument("-f","--fasta", type=str, help='path of the barcodes sequences FASTA file', required=True)
    curegen.add_argument("-n","--ncbi_taxdump", type=str, help='path of NCBI taxdump.tar.gz file', required=False, default="NA")
peguerin's avatar
peguerin committed
78
    curegen.add_argument("-d","--database_globalnames", type=str, help="Name of the selected database from GlobalNames i.e. 'Catalogue of Life' or 'Fishbase Cache'", required=False, default='FishBase Cache')
peguerin's avatar
peguerin committed
79
    curegen.add_argument("-o","--output_prefix", type=str, help='prefix of the output curated taxonomy CSV such as [PREFIX].csv')
peguerin's avatar
peguerin committed
80
81
    curegen.add_argument("-l","--ncbi_taxdump_load", action='store_true', help='load NCBI taxonomy folder')

peguerin's avatar
peguerin committed
82
83
84

    args = parser.parse_args()

peguerin's avatar
peguerin committed
85
    if args.command not in ['validate', 'curegen']:
peguerin's avatar
peguerin committed
86
87
        print(usage)
        sys.exit(0)
peguerin's avatar
peguerin committed
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
    print("Checking arguments...", end='')
    ## check arguments are not faulty
    if args.command == 'validate':
        if args.curate != 'NA':
            if check_curatecsv(args.curate) is False:
                print(usage)
                sys.exit(0)
        if args.ncbi_taxdump_edition or args.ncbi_taxdump_load:
            if args.ncbi_taxdump == 'NA':
                print('ARGUMENT ERROR: ncbi_taxdump argument is needed.')
                print(usage)
                sys.exit(0)
            else:
                if check_taxdumpdir(args.ncbi_taxdump) is False:
                    print(usage)
                    sys.exit(0)
    print("done.")
peguerin's avatar
peguerin committed
105
    return args