diff --git a/account_statement_base_import/parser/file_parser.py b/account_statement_base_import/parser/file_parser.py index ac8dfbe0..706e0b2d 100644 --- a/account_statement_base_import/parser/file_parser.py +++ b/account_statement_base_import/parser/file_parser.py @@ -41,7 +41,7 @@ class FileParser(BankStatementImportParser): """ def __init__(self, parse_name, ftype='csv', extra_fields=None, header=None, - **kwargs): + dialect=None, **kwargs): """ :param char: parse_name: The name of the parser :param char: ftype: extension of the file (could be csv, xls or @@ -64,6 +64,7 @@ class FileParser(BankStatementImportParser): self._datemode = 0 # used only for xls documents, # 0 means Windows mode (1900 based dates). # Set in _parse_xls, from the contents of the file + self.dialect = dialect def _custom_format(self, *args, **kwargs): """No other work on data are needed in this parser.""" @@ -111,7 +112,8 @@ class FileParser(BankStatementImportParser): csv_file.write(self.filebuffer) csv_file.flush() with open(csv_file.name, 'rU') as fobj: - reader = UnicodeDictReader(fobj, fieldnames=self.fieldnames) + reader = UnicodeDictReader(fobj, fieldnames=self.fieldnames, + dialect=self.dialect) return list(reader) def _parse_xls(self): diff --git a/account_statement_base_import/parser/parser.py b/account_statement_base_import/parser/parser.py index 9618157d..80cfbd0d 100644 --- a/account_statement_base_import/parser/parser.py +++ b/account_statement_base_import/parser/parser.py @@ -29,11 +29,15 @@ def UnicodeDictReader(utf8_data, **kwargs): pos = utf8_data.tell() sample_data = utf8_data.read(2048) utf8_data.seek(pos) - dialect = sniffer.sniff(sample_data, delimiters=',;\t') + if not kwargs.get('dialect'): + dialect = sniffer.sniff(sample_data, delimiters=',;\t') + del kwargs['dialect'] + else: + dialect = kwargs.pop('dialect') csv_reader = csv.DictReader(utf8_data, dialect=dialect, **kwargs) for row in csv_reader: - yield dict([(key, unicode(value, 'utf-8')) for key, value in - row.iteritems()]) + yield dict([(unicode(key, 'utf-8'), unicode(value, 'utf-8')) + for key, value in row.iteritems() if key]) class BankStatementImportParser(object):