From 9e39c9329b409a74e5818e5a695d71e20f804324 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Y=C3=BBki=20Vachot?= Date: Sun, 8 May 2022 20:25:38 +0200 Subject: [PATCH] Add csv_to_sql_file --- csv_to_sql_file.py | 108 +++++++++++++++++++++++++++++------------ examples/example_1.csv | 6 +++ requirements.txt | Bin 0 -> 4278 bytes 3 files changed, 83 insertions(+), 31 deletions(-) create mode 100644 examples/example_1.csv diff --git a/csv_to_sql_file.py b/csv_to_sql_file.py index bc12ad6..1c90627 100644 --- a/csv_to_sql_file.py +++ b/csv_to_sql_file.py @@ -1,52 +1,53 @@ -import logging, argparse, sys, os +import argparse import csv +import logging import re +import sys +import os -# python csv_to_sql_file.py -file file.csv -table sql_table -output output.sql -headers -auto_type -date_format -# varchar integer float date bool +# varchar integer float date bool -DEFAULT_DATE_FORMAT = "DD/MM/YYYY" - - -def csv_to_file(csv_file, table, output_file, headers, auto_type, date_format): - print(csv_file, table, output_file, headers, auto_type, date_format) +DEFAULT_CSV_FILE = None +DEFAULT_DELIMITER = ';' +DEFAULT_TABLE = None +DEFAULT_OUTPUT_FILE = 'output.sql' +DEFAULT_HEADERS = True +DEFAULT_DATE_FORMAT = 'DD/MM/YYYY' +DEFAULT_DATE_PATTERN = 'r(\d+/\d+/\d+)' def main(): + args = set_argparse() + csv_to_file( + args.file, + args.delimiter, + args.table, + args.output, + args.headers, + args.date_format, + args.date_pattern + ) - CSV_FILE = args.file - TABLE = args.table - OUTPUT_FILE = args.output - HEADERS = args.headers - AUTO_TYPE = args.auto_type - DATE_FORMAT = args.date_format - - logging.info('CSV file : %s', CSV_FILE) - logging.info('Table : %s', TABLE) - logging.info('Output file : %s', OUTPUT_FILE) - logging.info('Headers : %s', HEADERS) - logging.info('Auto type : %s', AUTO_TYPE) - logging.info('Date Format : %s', DATE_FORMAT) - - csv_to_file(CSV_FILE, TABLE, OUTPUT_FILE, HEADERS, AUTO_TYPE, DATE_FORMAT) exit(0) def set_argparse(): - parser = argparse.ArgumentParser(description='') - parser.action_groups.pop() + parser = argparse.ArgumentParser(description='Data from CSV file to SQL INSERT INTO table') + parser._action_groups.pop() required = parser.add_argument_group('required arguments') optional = parser.add_argument_group('optional arguments') - required.add_argument('-f', '--file', help=' (default: None)', default=None, required=True) + required.add_argument('-f', '--file', help=f' (default: {DEFAULT_CSV_FILE})', default=DEFAULT_CSV_FILE, required=True) - optional.add_argument('-t', '--table', help=' (default: filename)', default=None) - optional.add_argument('-o', '--output', help=' (default: output.sql)', default='output.sql') - optional.add_argument('-h', '--headers', help=' (default: True)', default=True) - optional.add_argument('-at', '--auto_type', help=' (default: True)', default=True) - optional.add_argument('-df', '--date_format', help=f' (default: {DEFAULT_DATE_FORMAT})', + optional.add_argument('-del', '--delimiter', help=f'Csv file to transform (default: {DEFAULT_DELIMITER})', default=DEFAULT_DELIMITER) + optional.add_argument('-t', '--table', help=f'Table (default: filename)', default=DEFAULT_TABLE) + optional.add_argument('-o', '--output', help=f'Output file (default: {DEFAULT_OUTPUT_FILE})', default=DEFAULT_OUTPUT_FILE) + optional.add_argument('-head', '--headers', help=f'Headers in csv file (default: {DEFAULT_HEADERS})', default=DEFAULT_HEADERS) + optional.add_argument('-df', '--date_format', help=f'Date format to consider (default: {DEFAULT_DATE_FORMAT})', default=DEFAULT_DATE_FORMAT) + optional.add_argument('-dp', '--date_pattern', help=f'Date pattern to recognize (default: {DEFAULT_DATE_PATTERN})', + default=DEFAULT_DATE_PATTERN) optional.add_argument('-i', '--info', help='Info mode (default: True)', default=True, action='store_false') optional.add_argument('-d', '--debug', help='Debug mode (default: False)', default=False, action='store_true') @@ -67,5 +68,50 @@ def set_argparse(): return args +def csv_to_file(csv_file, delimiter, table, output_file, headers, date_format, date_pattern): + if table is None: + table = os.path.basename(csv_file).split('.')[0] + + logging.info('CSV file : %s', csv_file) + logging.info('Delimiter : %s', delimiter) + logging.info('Table : %s', table) + logging.info('Output file : %s', output_file) + logging.info('Headers : %s', headers) + logging.info('Date Format : %s', date_format) + logging.info('Date Pattern : %s', date_pattern) + + openFile = open(csv_file, 'r') + csvFile = csv.reader(openFile, delimiter=delimiter) + + if headers: + headersRow = next(csvFile) + logging.info('Headers : %s', headersRow) + + insert = f'INSERT INTO {table}\n VALUES ' + with open(output_file, 'w') as outputFile: + valuesString = "" + for row in csvFile: + values = [] + for value in map((lambda x: x), row): + if value == "": + values.append("NULL") + elif re.search(date_pattern, value): + values.append(f"TO_DATE('{value}','{date_format}')") + elif value.isnumeric() or value.isdecimal() or value.isdigit(): + values.append(value) + elif value.lower() in ['true', 'false', 'True', 'False', 'TRUE', 'FALSE']: + values.append(value.capitalize()) + else: + values.append(f"'{value}'") + print(re.search(date_pattern, value)) + valuesString += f"({','.join(values)}),\n" + + print(valuesString) + valuesString = insert + valuesString[0:-2] + ";" + outputFile.writelines(valuesString) + outputFile.close() + openFile.close() + + if __name__ == '__main__': main() diff --git a/examples/example_1.csv b/examples/example_1.csv new file mode 100644 index 0000000..ad795f0 --- /dev/null +++ b/examples/example_1.csv @@ -0,0 +1,6 @@ +header_1;header_2;header_3;header_4 +test1;0;1353;-16543 +test1;0.01;1353.13;-16543.132 +test3;08/05/1998T18:01:00;08/05/2022;08/05/1998 18:01:00 +test4;True;TRUE;FALSE +test5;;; \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2640300ff8ae8ec36f8648dce089c66535021384 100644 GIT binary patch literal 4278 zcmZvgOK%%T5QOI(AU}nHNJ?>h@F57`gAoG`WWeX;(-g@S#fNF>$0w<8YI|pwBm`Nu zrlz~Px~6C7|NgDgCe70}P0}h&(+7PG({JgWz8})B>6cWceOjjHRHsoI2(i%DD&t1! zPAJfK4e}`5Hpe+@*D4(q`OwC^?;(#ucx{wpjM~+Sa!%7)-*DUH{bcGQ=2|-`{z`04 z!W@+Oly>PpqsjDM-{Z_{k}D+U_jZ+dW6cY%tL14Y+e0OZYaj<UJ+_Dr#kH<(UU2R&NnH8@g*YlPhcOW}gjb3)XZk(asGe1F zQ1K||nQIT#pw6=t=3jOuIP1Gmt!wcZG<>Liseglwi0-FZeH7P9QQRv#H}aLWLHbJ> z;~b2Yjf^ObAG)~D;^nS!rrfh`gm&b3f2Tji74I8nr&)r?BZwsO39jr}I{#}+f}M5L0rzUu5TD`@pXqm?>dOTp4hYeHVm0DWc-nM3R~ z2P%Knew{~NGYi%O#N2wARpXQSJS_2q@vb&kohpZS~MG2;9 zOy_VV@t(9g6YI$2S!o?U>G!n|mH4q<{Op}n!AYerp2Xqy(D_Munbs@)Nd%wZ@Ya)d z)AvK`R{MiIMY$(2_cWjmZY<)JlS~S6@x9vvaS#sP(A7FO-2O zRx;ufmkC*CbvTF5`tr;t zTTzJ9&ip8MpY zg1h~oQwXwj2e13p_a*0`>*uTt5sK?}THWQHFAW2-z##4}9K-Fu>{JXUs$s$~510gV zArCiBD)?bOtNmTvYXg0w^+u{Oz33kNyvOec82W9;OJvbvhWTyi^RrWC-a5PumeMFr z0I$bTtdAI|%k&84%5B-jmmc@olP7vZ?;;mYU_yRTyvs?#Y3#ndz{= zOK~r^p0!{NzAsv{4u$oonI&Kj;^fydcNcTky(bDY#Cme8tr|BEK=-W!Xwi#g`+-Ib-J!ZsAQNJn+^UXIhu}Y6|!Ou3wFlPz2>l++n z&7Z~CJRb8dMH`hyXR-GiqgyX^#5(sC>N5R!(=j(nu5!D56N;C@n|?CWqnl<# rUH!KVH|t!!aqn;eGl^aJup5`p2fl%w*JnLcewBv&-$UtVd@}qWm5*uf literal 0 HcmV?d00001