diff --git a/csv_to_sql_file.py b/csv_to_sql_file.py index bc12ad6..1c90627 100644 --- a/csv_to_sql_file.py +++ b/csv_to_sql_file.py @@ -1,52 +1,53 @@ -import logging, argparse, sys, os +import argparse import csv +import logging import re +import sys +import os -# python csv_to_sql_file.py -file file.csv -table sql_table -output output.sql -headers -auto_type -date_format -# varchar integer float date bool +# varchar integer float date bool -DEFAULT_DATE_FORMAT = "DD/MM/YYYY" - - -def csv_to_file(csv_file, table, output_file, headers, auto_type, date_format): - print(csv_file, table, output_file, headers, auto_type, date_format) +DEFAULT_CSV_FILE = None +DEFAULT_DELIMITER = ';' +DEFAULT_TABLE = None +DEFAULT_OUTPUT_FILE = 'output.sql' +DEFAULT_HEADERS = True +DEFAULT_DATE_FORMAT = 'DD/MM/YYYY' +DEFAULT_DATE_PATTERN = 'r(\d+/\d+/\d+)' def main(): + args = set_argparse() + csv_to_file( + args.file, + args.delimiter, + args.table, + args.output, + args.headers, + args.date_format, + args.date_pattern + ) - CSV_FILE = args.file - TABLE = args.table - OUTPUT_FILE = args.output - HEADERS = args.headers - AUTO_TYPE = args.auto_type - DATE_FORMAT = args.date_format - - logging.info('CSV file : %s', CSV_FILE) - logging.info('Table : %s', TABLE) - logging.info('Output file : %s', OUTPUT_FILE) - logging.info('Headers : %s', HEADERS) - logging.info('Auto type : %s', AUTO_TYPE) - logging.info('Date Format : %s', DATE_FORMAT) - - csv_to_file(CSV_FILE, TABLE, OUTPUT_FILE, HEADERS, AUTO_TYPE, DATE_FORMAT) exit(0) def set_argparse(): - parser = argparse.ArgumentParser(description='') - parser.action_groups.pop() + parser = argparse.ArgumentParser(description='Data from CSV file to SQL INSERT INTO table') + parser._action_groups.pop() required = parser.add_argument_group('required arguments') optional = parser.add_argument_group('optional arguments') - required.add_argument('-f', '--file', help=' (default: None)', default=None, required=True) + required.add_argument('-f', '--file', help=f' (default: {DEFAULT_CSV_FILE})', default=DEFAULT_CSV_FILE, required=True) - optional.add_argument('-t', '--table', help=' (default: filename)', default=None) - optional.add_argument('-o', '--output', help=' (default: output.sql)', default='output.sql') - optional.add_argument('-h', '--headers', help=' (default: True)', default=True) - optional.add_argument('-at', '--auto_type', help=' (default: True)', default=True) - optional.add_argument('-df', '--date_format', help=f' (default: {DEFAULT_DATE_FORMAT})', + optional.add_argument('-del', '--delimiter', help=f'Csv file to transform (default: {DEFAULT_DELIMITER})', default=DEFAULT_DELIMITER) + optional.add_argument('-t', '--table', help=f'Table (default: filename)', default=DEFAULT_TABLE) + optional.add_argument('-o', '--output', help=f'Output file (default: {DEFAULT_OUTPUT_FILE})', default=DEFAULT_OUTPUT_FILE) + optional.add_argument('-head', '--headers', help=f'Headers in csv file (default: {DEFAULT_HEADERS})', default=DEFAULT_HEADERS) + optional.add_argument('-df', '--date_format', help=f'Date format to consider (default: {DEFAULT_DATE_FORMAT})', default=DEFAULT_DATE_FORMAT) + optional.add_argument('-dp', '--date_pattern', help=f'Date pattern to recognize (default: {DEFAULT_DATE_PATTERN})', + default=DEFAULT_DATE_PATTERN) optional.add_argument('-i', '--info', help='Info mode (default: True)', default=True, action='store_false') optional.add_argument('-d', '--debug', help='Debug mode (default: False)', default=False, action='store_true') @@ -67,5 +68,50 @@ def set_argparse(): return args +def csv_to_file(csv_file, delimiter, table, output_file, headers, date_format, date_pattern): + if table is None: + table = os.path.basename(csv_file).split('.')[0] + + logging.info('CSV file : %s', csv_file) + logging.info('Delimiter : %s', delimiter) + logging.info('Table : %s', table) + logging.info('Output file : %s', output_file) + logging.info('Headers : %s', headers) + logging.info('Date Format : %s', date_format) + logging.info('Date Pattern : %s', date_pattern) + + openFile = open(csv_file, 'r') + csvFile = csv.reader(openFile, delimiter=delimiter) + + if headers: + headersRow = next(csvFile) + logging.info('Headers : %s', headersRow) + + insert = f'INSERT INTO {table}\n VALUES ' + with open(output_file, 'w') as outputFile: + valuesString = "" + for row in csvFile: + values = [] + for value in map((lambda x: x), row): + if value == "": + values.append("NULL") + elif re.search(date_pattern, value): + values.append(f"TO_DATE('{value}','{date_format}')") + elif value.isnumeric() or value.isdecimal() or value.isdigit(): + values.append(value) + elif value.lower() in ['true', 'false', 'True', 'False', 'TRUE', 'FALSE']: + values.append(value.capitalize()) + else: + values.append(f"'{value}'") + print(re.search(date_pattern, value)) + valuesString += f"({','.join(values)}),\n" + + print(valuesString) + valuesString = insert + valuesString[0:-2] + ";" + outputFile.writelines(valuesString) + outputFile.close() + openFile.close() + + if __name__ == '__main__': main() diff --git a/examples/example_1.csv b/examples/example_1.csv new file mode 100644 index 0000000..ad795f0 --- /dev/null +++ b/examples/example_1.csv @@ -0,0 +1,6 @@ +header_1;header_2;header_3;header_4 +test1;0;1353;-16543 +test1;0.01;1353.13;-16543.132 +test3;08/05/1998T18:01:00;08/05/2022;08/05/1998 18:01:00 +test4;True;TRUE;FALSE +test5;;; \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e69de29..2640300 100644 Binary files a/requirements.txt and b/requirements.txt differ