#!/usr/bin/env python # encoding: utf-8 """Parser for PATU format files""" import re, datetime def fixchars(line): """Fix the characters mangled in the input :param line: Line to rewrite :returns: string, fixed line """ # Fix the umlauts int the input line = line.replace("{", u"ä") line = line.replace("}", u"ö") # XXX: There are a whole bunch of these, adding them later return line class PatuParser(object): """Parse PATU lines in to structs""" def __init__( self ): """ Initialize PATU parser """ recparse = dict() recparse["00"] = "T(?P00)(?P\d{3})" \ + "(?P\d{3})(?P\d{14})" \ + "(?P\d{3})(?P\d{6})" \ + "(?P\d{6})" \ + "(?P\d{6})(?P\d{4})" \ + "(?P.{17})(?P\d{6})" \ + "(?P.{19})" \ + "(?P\d{6})(?P.{3})" \ + "(?P.{30})"\ + "(?P\d{18})(?P.{35})" \ + "(?P.{40})(?P.{40})" \ + "(?P.{30})(?P.{30})" recparse["10"] = "T(?P[18]0)(?P\d{3})" \ + "(?P\d{6})" \ + "(?P.{18})(?P\d{6})" \ + "(?P\d{6})" \ + "(?P\d{6})(?P\d)" \ + "(?P.{3})(?P.{35})" \ + "(?P.{19})(?P.)(?P.)" \ + "(?P.{35})(?P.)" \ + "(?P.{14})(?P.)" \ + "(?P.{20})" \ + "(?P.{8})(?P.)" recparse["11"] = "T(?P[18]1)(?P\d{3})" \ + "(?P.{2})" \ + "(?:(?# Match specific info)" \ + "(?<=00)(?P.{35})+" \ + "|" \ + "(?<=01)(?P\d{8})" \ + "|" \ + "(?<=02)(?P.{10})\s(?P.{15})\s" \ + "(?P\d{6})" \ + "|" \ + "(?<=03)(?P.{19})\s(?P.{14})" \ + "|" \ + "(?<=04)(?P.{18})" \ + "|" \ + "(?<=05)(?P.{19})\s(?P.{3})\s" \ + "(?P.{11})(?P.{6})" \ + "|" \ + "(?<=06)(?P.{35})(?P.{35})" \ + "|" \ + "(?<=07)(?P.{35})" \ + "(?P.{35})?" \ + "(?P.{35})?" \ + "(?P.{35})?" \ + "(?P.{35})?" \ + "(?P.{35})?" \ + "(?P.{35})?" \ + "(?P.{35})?" \ + "(?P.{35})?" \ + "(?P.{35})?" \ + "(?P.{35})?" \ + "(?P.{35})?" \ + "|" \ + "(?<=08)(?P\d{3})\s(?P.{31})" \ + "|" \ + "(?<=09)(?P.{35})" \ + "|" \ + "(?<=11)(?P.{35})(?P.{35})" \ + "(?P.{35})(?P.{70})" \ + "(?P.{70})(?P.{35})" \ + "(?P.{70})" \ + ")" recparse["40"] = "T(?P40)(?P\d{3})" \ + "(?P\d{6})(?P.{19})" \ + "(?P.{19})" recparse["50"] = "T(?P50)(?P\d{3})" \ + "(?P\d)(?P\d{6})" \ + "(?P\d{8})(?P.{19})" \ + "(?P\d{8})(?P.{19})" recparse["60"] = "T(?P60)(?P\d{3})" \ + "(?P.{3})(?P01)" \ + "(?P\d{6})-" \ + "(?P\d{6})" \ + "(?P.)(?P.{19})" \ + "(?P.)(?P\d{7})" \ + "(?P.)(?P.{19})" \ + "(?P.)(?P\d{7})" \ + "(?P.)(?P\d{7})" \ + "(?P.)(?P.{19})" \ + "(?P.)(?P.{35})" \ + "(?P\d{7})" \ + "(?P.)(?P.{35})" \ + "(?P\d{7})" recparse["70"] = "T(?P70)(?P\d{3})" \ + "(?P\d{3})" \ + "(?P.{80})" \ + "(?P.{80})?" \ + "(?P.{80})?" \ + "(?P.{80})?" \ + "(?P.{80})?" \ + "(?P.{80})?" for record in recparse: recparse[record] = re.compile(recparse[record]) self.recparse = recparse def parse_record(self, line): """Docstring for parse_perus :param line: description :returns: description """ line = fixchars(line) for matcher in self.recparse: matchobj = self.recparse[matcher].match(line) if matchobj: break if not matchobj: print " **** failed to match line '%s'" % (line) return # Strip strings matchdict = matchobj.groupdict() # Remove members set to None for field in matchdict.keys(): if not matchdict[field]: del matchdict[field] matchkeys = set(matchdict.keys()) needstrip = set(["bankcontact1", "bankcontact2", "bankcontact3", "customerid", "accountowner", "accountname", "refnr", "formnr", "recipientname", "eventdesc", "recipientaccount", "message", "principalinfo1", "bankinfo1", "bankinfo2", "bankinfo3", "bankinfo4", "bankinfo5", "bankinfo6", "bankinfo7", "bankinfo8", "bankinfo9", "bankinfo10", "bankinfo11", "bankinfo12", "principalinfo2", "paymentdesc", "infoline1", "infoline2", "infoline3", "infoline4", "infoline5", "infoline6", "recipientname2", "recipientnameiban", "sendername"]) for field in matchkeys & needstrip: matchdict[field] = matchdict[field].strip() # Convert to int needsint = set(["itemcount", "eventid", "record_len", "depositcount", "withdrawcount"]) for field in matchkeys & needsint: matchdict[field] = float(matchdict[field]) # Convert to float needsfloat = set(["startingbalance", "accountlimit", "amount", "destinationamount", "balance", "availablefunds", "depositsum", "withdrawsum", "avgbalance", "avglimitbalance", "permanentbalance"]) for field in matchkeys & needsfloat: matchdict[field] = float(matchdict[field]) # convert sents to euros needseur = set(["startingbalance", "accountlimit", "amount", "destinationamount", "balance", "availablefunds", "depositsum", "withdrawsum", "avgbalance", "permanentbalance"]) for field in matchkeys & needseur: matchdict[field] = matchdict[field] / 100 # convert ibanswift to separate fields if matchdict.has_key("ibanswift"): matchdict["iban"], matchdict["swift"] = \ matchdict["ibanswift"].strip().split() # Convert date fields needdate = set(["startdate", "enddate", "creationdate", "balancedate", "valuedate", "paymentdate", "recorddate", "perioddate"]) for field in matchkeys & needdate: # Base all dates on the year 2000, since it's unlikely that this # starndard will survive to see 2020 due to SEPA datestring = matchdict[field] if datestring == '000000': matchdict[field] = None continue matchdict[field] = datetime.date(int("20" + datestring[0:2]), int(datestring[2:4]), int(datestring[4:6])) # convert time fields needtime = set(["creationtime"]) for field in matchkeys & needtime: timestring = matchdict[field] matchdict[field] = datetime.time(int(timestring[0:2]), int(timestring[2:4])) return matchdict def parse_file(filename): """Parse file with PATU format inside :param filename: description :returns: description """ patufile = open(filename, "r") parser = PatuParser() for line in patufile: parser.parse_record(line) def main(): """The main function, currently just calls a dummy filename :returns: description """ parse_file("myinput.nda") if __name__ == '__main__': main()