#!/usr/bin/env python # -*- encoding: utf-8 -*- ############################################################################## # # Copyright (C) 2011 credativ Ltd (). # All Rights Reserved # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # ############################################################################## """ Parser for HSBC UK MT940 format files Based on fi_patu's parser """ import re from datetime import datetime import logging _logger = logging.getLogger(__name__) class HSBCParser(object): def __init__(self): recparse = dict() patterns = {'ebcdic': r"\w/\?:\(\).,'+{} -"} # MT940 header recparse["20"] = r":(?P20):(?P.{1,16})" recparse["25"] = (r":(?P25):(?P\d{6})" r"(?P\d{1,29})") recparse["28"] = r":(?P28C?):(?P.{1,8})" # Opening balance 60F recparse["60F"] = (r":(?P60F):(?P[CD])" r"(?P\d{6})(?P.{3})" r"(?P[\d,]{1,15})") # Transaction recparse["61"] = (r""" :(?P61): (?P\d{6})(?P\d{4})? (?PR?[CD]) (?P[A-Z])? (?P[\d,]{1,15}) (?P[A-Z][A-Z0-9]{3}) (?P[%(ebcdic)s]{1,16}) (?://) (?P[%(ebcdic)s]{1,16})? (?:\n(?P[%(ebcdic)s]))? """ % (patterns)).replace('\n', '') # Further info recparse["86"] = (r":(?P86):" r"(?P.{1,80})?" r"(?:\n(?P.{1,80}))?" r"(?:\n(?P.{1,80}))?" r"(?:\n(?P.{1,80}))?" r"(?:\n(?P.{1,80}))?") # Forward available balance (64) / Closing balance (62F) # / Interim balance (62M) recparse["64"] = (r":(?P64|62[FM]):" r"(?P[CD])" r"(?P\d{6})(?P.{3})" r"(?P[\d,]{1,15})") for record in recparse: recparse[record] = re.compile(recparse[record]) self.recparse = recparse def parse_record(self, line): """ Parse record using regexps and apply post processing """ for matcher in self.recparse: matchobj = self.recparse[matcher].match(line) if matchobj: break if not matchobj: _logger.warning("failed to match line %r", line) return # Strip strings matchdict = matchobj.groupdict() # Remove members set to None matchdict = dict([(k, v) for k, v in matchdict.iteritems() if v]) matchkeys = set(matchdict.keys()) needstrip = set([ "transref", "accnum", "statementnr", "custrefno", "bankref", "furtherinfo", "infoline1", "infoline2", "infoline3", "infoline4", "infoline5", "startingbalance", "endingbalance" ]) for field in matchkeys & needstrip: matchdict[field] = matchdict[field].strip() # Convert to float. Comma is decimal separator needsfloat = set(["startingbalance", "endingbalance", "amount"]) for field in matchkeys & needsfloat: matchdict[field] = float(matchdict[field].replace(',', '.')) # Convert date fields needdate = set(["prevstmtdate", "valuedate", "bookingdate"]) for field in matchkeys & needdate: datestring = matchdict[field] post_check = False if (len(datestring) == 4 and field == "bookingdate" and "valuedate" in matchdict): # Get year from valuedate datestring = matchdict['valuedate'].strftime('%y') + datestring post_check = True try: matchdict[field] = datetime.strptime(datestring, '%y%m%d') if post_check and matchdict[field] > matchdict["valuedate"]: matchdict[field] = matchdict[field].replace( year=matchdict[field].year - 1 ) except ValueError: matchdict[field] = None return matchdict def parse(self, cr, data): records = [] # Some records are multiline for line in data: if len(line) <= 1: continue if line[0] == ':' and len(line) > 1: records.append(line) else: records[-1] = '\n'.join([records[-1], line]) output = [] for rec in records: output.append(self.parse_record(rec)) return output def parse_file(filename): with open(filename, "r") as hsbcfile: HSBCParser().parse(None, hsbcfile.readlines()) def main(): """The main function, currently just calls a dummy filename :returns: description """ parse_file("testfile") if __name__ == '__main__': main()