#!/usr/bin/env python # encoding: utf-8 """Parser for PATU format files""" import re import datetime def fixchars(line): """Fix the characters mangled in the input :param line: Line to rewrite :returns: string, fixed line """ # Fix the umlauts int the input line = line.replace("{", u"ä") line = line.replace("}", u"ö") # XXX: There are a whole bunch of these, adding them later return line class PatuParser(object): """Parse PATU lines in to structs""" def __init__(self): """ Initialize PATU parser """ recparse = dict() recparse["00"] = ( "T(?P00)(?P\d{3})" "(?P\d{3})(?P\d{14})" "(?P\d{3})(?P\d{6})" "(?P\d{6})" "(?P\d{6})(?P\d{4})" "(?P.{17})(?P\d{6})" "(?P.{19})" "(?P\d{6})(?P.{3})" "(?P.{30})" "(?P\d{18})(?P.{35})" "(?P.{40})(?P.{40})" "(?P.{30})(?P.{30})" ) recparse["10"] = ( "T(?P[18]0)(?P\d{3})" "(?P\d{6})" "(?P.{18})(?P\d{6})" "(?P\d{6})" "(?P\d{6})(?P\d)" "(?P.{3})(?P.{35})" "(?P.{19})(?P.)(?P.)" "(?P.{35})(?P.)" "(?P.{14})(?P.)" "(?P.{20})" "(?P.{8})(?P.)" ) recparse["11"] = ( "T(?P[18]1)(?P\d{3})" "(?P.{2})" "(?:(?# Match specific info)" "(?<=00)(?P.{35})+" "|" "(?<=01)(?P\d{8})" "|" "(?<=02)(?P.{10})\s(?P.{15})\s" "(?P\d{6})" "|" "(?<=03)(?P.{19})\s(?P.{14})" "|" "(?<=04)(?P.{18})" "|" "(?<=05)(?P.{19})\s(?P.{3})\s" "(?P.{11})(?P.{6})" "|" "(?<=06)(?P.{35})(?P.{35})" "|" "(?<=07)(?P.{35})" "(?P.{35})?" "(?P.{35})?" "(?P.{35})?" "(?P.{35})?" "(?P.{35})?" "(?P.{35})?" "(?P.{35})?" "(?P.{35})?" "(?P.{35})?" "(?P.{35})?" "(?P.{35})?" "|" "(?<=08)(?P\d{3})\s(?P.{31})" "|" "(?<=09)(?P.{35})" "|" "(?<=11)(?P.{35})(?P.{35})" "(?P.{35})(?P.{70})" "(?P.{70})(?P.{35})" "(?P.{70})" ")" ) recparse["40"] = ( "T(?P40)(?P\d{3})" "(?P\d{6})(?P.{19})" "(?P.{19})" ) recparse["50"] = ( "T(?P50)(?P\d{3})" "(?P\d)(?P\d{6})" "(?P\d{8})(?P.{19})" "(?P\d{8})(?P.{19})" ) recparse["60"] = ( "T(?P60)(?P\d{3})" "(?P.{3})(?P01)" "(?P\d{6})-" "(?P\d{6})" "(?P.)(?P.{19})" "(?P.)(?P\d{7})" "(?P.)(?P.{19})" "(?P.)(?P\d{7})" "(?P.)(?P\d{7})" "(?P.)(?P.{19})" "(?P.)(?P.{35})" "(?P\d{7})" "(?P.)(?P.{35})" "(?P\d{7})" ) recparse["70"] = ( "T(?P70)(?P\d{3})" "(?P\d{3})" "(?P.{80})" "(?P.{80})?" "(?P.{80})?" "(?P.{80})?" "(?P.{80})?" "(?P.{80})?" ) for record in recparse: recparse[record] = re.compile(recparse[record]) self.recparse = recparse def parse_record(self, line): """Docstring for parse_perus :param line: description :returns: description """ line = fixchars(line) for matcher in self.recparse: matchobj = self.recparse[matcher].match(line) if matchobj: break if not matchobj: print(" **** failed to match line '%s'" % (line)) return # Strip strings matchdict = matchobj.groupdict() # Remove members set to None for field in matchdict.keys(): if not matchdict[field]: del matchdict[field] matchkeys = set(matchdict.keys()) needstrip = set([ "bankcontact1", "bankcontact2", "bankcontact3", "customerid", "accountowner", "accountname", "refnr", "formnr", "recipientname", "eventdesc", "recipientaccount", "message", "principalinfo1", "bankinfo1", "bankinfo2", "bankinfo3", "bankinfo4", "bankinfo5", "bankinfo6", "bankinfo7", "bankinfo8", "bankinfo9", "bankinfo10", "bankinfo11", "bankinfo12", "principalinfo2", "paymentdesc", "infoline1", "infoline2", "infoline3", "infoline4", "infoline5", "infoline6", "recipientname2", "recipientnameiban", "sendername"]) for field in matchkeys & needstrip: matchdict[field] = matchdict[field].strip() # Convert to int needsint = set([ "itemcount", "eventid", "record_len", "depositcount", "withdrawcount"]) for field in matchkeys & needsint: matchdict[field] = float(matchdict[field]) # Convert to float needsfloat = set([ "startingbalance", "accountlimit", "amount", "destinationamount", "balance", "availablefunds", "depositsum", "withdrawsum", "avgbalance", "avglimitbalance", "permanentbalance"]) for field in matchkeys & needsfloat: matchdict[field] = float(matchdict[field]) # convert sents to euros needseur = set([ "startingbalance", "accountlimit", "amount", "destinationamount", "balance", "availablefunds", "depositsum", "withdrawsum", "avgbalance", "permanentbalance"]) for field in matchkeys & needseur: matchdict[field] = matchdict[field] / 100 # convert ibanswift to separate fields if "ibanswift" in matchdict: matchdict["iban"], matchdict["swift"] = ( matchdict["ibanswift"].strip().split() ) # Convert date fields needdate = set([ "startdate", "enddate", "creationdate", "balancedate", "valuedate", "paymentdate", "recorddate", "perioddate"]) for field in matchkeys & needdate: # Base all dates on the year 2000, since it's unlikely that this # starndard will survive to see 2020 due to SEPA datestring = matchdict[field] if datestring == '000000': matchdict[field] = None continue matchdict[field] = datetime.date( int("20" + datestring[0:2]), int(datestring[2:4]), int(datestring[4:6])) # convert time fields needtime = set(["creationtime"]) for field in matchkeys & needtime: timestring = matchdict[field] matchdict[field] = datetime.time( int(timestring[0:2]), int(timestring[2:4])) return matchdict def parse_file(filename): """Parse file with PATU format inside :param filename: description :returns: description """ patufile = open(filename, "r") parser = PatuParser() for line in patufile: parser.parse_record(line) def main(): """The main function, currently just calls a dummy filename :returns: description """ parse_file("myinput.nda") if __name__ == '__main__': main()