#!/usr/bin/env python # encoding: utf-8 """Parser for PATU format files""" import re import datetime def fixchars(line): """Fix the characters mangled in the input :param line: Line to rewrite :returns: string, fixed line """ # Fix the umlauts int the input line = line.replace("{", u"ä") line = line.replace("}", u"ö") # XXX: There are a whole bunch of these, adding them later return line class PatuParser(object): """Parse PATU lines in to structs""" def __init__(self): """ Initialize PATU parser """ recparse = dict() recparse["00"] = ( r"T(?P00)(?P\d{3})" r"(?P\d{3})(?P\d{14})" r"(?P\d{3})(?P\d{6})" r"(?P\d{6})" r"(?P\d{6})(?P\d{4})" r"(?P.{17})(?P\d{6})" r"(?P.{19})" r"(?P\d{6})(?P.{3})" r"(?P.{30})" r"(?P\d{18})(?P.{35})" r"(?P.{40})(?P.{40})" r"(?P.{30})(?P.{30})" ) recparse["10"] = ( r"T(?P[18]0)(?P\d{3})" r"(?P\d{6})" r"(?P.{18})(?P\d{6})" r"(?P\d{6})" r"(?P\d{6})(?P\d)" r"(?P.{3})(?P.{35})" r"(?P.{19})(?P.)(?P.)" r"(?P.{35})(?P.)" r"(?P.{14})(?P.)" r"(?P.{20})" r"(?P.{8})(?P.)" ) recparse["11"] = ( r"T(?P[18]1)(?P\d{3})" r"(?P.{2})" r"(?:(?# Match specific info)" r"(?<=00)(?P.{35})+" r"|" r"(?<=01)(?P\d{8})" r"|" r"(?<=02)(?P.{10})\s(?P.{15})\s" r"(?P\d{6})" r"|" r"(?<=03)(?P.{19})\s(?P.{14})" r"|" r"(?<=04)(?P.{18})" r"|" r"(?<=05)(?P.{19})\s(?P.{3})\s" r"(?P.{11})(?P.{6})" r"|" r"(?<=06)(?P.{35})(?P.{35})" r"|" r"(?<=07)(?P.{35})" r"(?P.{35})?" r"(?P.{35})?" r"(?P.{35})?" r"(?P.{35})?" r"(?P.{35})?" r"(?P.{35})?" r"(?P.{35})?" r"(?P.{35})?" r"(?P.{35})?" r"(?P.{35})?" r"(?P.{35})?" r"|" r"(?<=08)(?P\d{3})\s(?P.{31})" r"|" r"(?<=09)(?P.{35})" r"|" r"(?<=11)(?P.{35})(?P.{35})" r"(?P.{35})(?P.{70})" r"(?P.{70})(?P.{35})" r"(?P.{70})" r")" ) recparse["40"] = ( r"T(?P40)(?P\d{3})" r"(?P\d{6})(?P.{19})" r"(?P.{19})" ) recparse["50"] = ( r"T(?P50)(?P\d{3})" r"(?P\d)(?P\d{6})" r"(?P\d{8})(?P.{19})" r"(?P\d{8})(?P.{19})" ) recparse["60"] = ( r"T(?P60)(?P\d{3})" r"(?P.{3})(?P01)" r"(?P\d{6})-" r"(?P\d{6})" r"(?P.)(?P.{19})" r"(?P.)(?P\d{7})" r"(?P.)(?P.{19})" r"(?P.)(?P\d{7})" r"(?P.)(?P\d{7})" r"(?P.)(?P.{19})" r"(?P.)(?P.{35})" r"(?P\d{7})" r"(?P.)(?P.{35})" r"(?P\d{7})" ) recparse["70"] = ( r"T(?P70)(?P\d{3})" r"(?P\d{3})" r"(?P.{80})" r"(?P.{80})?" r"(?P.{80})?" r"(?P.{80})?" r"(?P.{80})?" r"(?P.{80})?" ) for record in recparse: recparse[record] = re.compile(recparse[record]) self.recparse = recparse def parse_record(self, line): """Docstring for parse_perus :param line: description :returns: description """ line = fixchars(line) for matcher in self.recparse: matchobj = self.recparse[matcher].match(line) if matchobj: break if not matchobj: print(" **** failed to match line '%s'" % (line)) return # Strip strings matchdict = matchobj.groupdict() # Remove members set to None for field in matchdict.keys(): if not matchdict[field]: del matchdict[field] matchkeys = set(matchdict.keys()) needstrip = set([ "bankcontact1", "bankcontact2", "bankcontact3", "customerid", "accountowner", "accountname", "refnr", "formnr", "recipientname", "eventdesc", "recipientaccount", "message", "principalinfo1", "bankinfo1", "bankinfo2", "bankinfo3", "bankinfo4", "bankinfo5", "bankinfo6", "bankinfo7", "bankinfo8", "bankinfo9", "bankinfo10", "bankinfo11", "bankinfo12", "principalinfo2", "paymentdesc", "infoline1", "infoline2", "infoline3", "infoline4", "infoline5", "infoline6", "recipientname2", "recipientnameiban", "sendername"]) for field in matchkeys & needstrip: matchdict[field] = matchdict[field].strip() # Convert to int needsint = set([ "itemcount", "eventid", "record_len", "depositcount", "withdrawcount"]) for field in matchkeys & needsint: matchdict[field] = float(matchdict[field]) # Convert to float needsfloat = set([ "startingbalance", "accountlimit", "amount", "destinationamount", "balance", "availablefunds", "depositsum", "withdrawsum", "avgbalance", "avglimitbalance", "permanentbalance"]) for field in matchkeys & needsfloat: matchdict[field] = float(matchdict[field]) # convert sents to euros needseur = set([ "startingbalance", "accountlimit", "amount", "destinationamount", "balance", "availablefunds", "depositsum", "withdrawsum", "avgbalance", "permanentbalance"]) for field in matchkeys & needseur: matchdict[field] = matchdict[field] / 100 # convert ibanswift to separate fields if "ibanswift" in matchdict: matchdict["iban"], matchdict["swift"] = ( matchdict["ibanswift"].strip().split() ) # Convert date fields needdate = set([ "startdate", "enddate", "creationdate", "balancedate", "valuedate", "paymentdate", "recorddate", "perioddate"]) for field in matchkeys & needdate: # Base all dates on the year 2000, since it's unlikely that this # starndard will survive to see 2020 due to SEPA datestring = matchdict[field] if datestring == '000000': matchdict[field] = None continue matchdict[field] = datetime.date( int("20" + datestring[0:2]), int(datestring[2:4]), int(datestring[4:6])) # convert time fields needtime = set(["creationtime"]) for field in matchkeys & needtime: timestring = matchdict[field] matchdict[field] = datetime.time( int(timestring[0:2]), int(timestring[2:4])) return matchdict def parse_file(filename): """Parse file with PATU format inside :param filename: description :returns: description """ patufile = open(filename, "r") parser = PatuParser() for line in patufile: parser.parse_record(line) def main(): """The main function, currently just calls a dummy filename :returns: description """ parse_file("myinput.nda") if __name__ == '__main__': main()