Files
bank-payment/account_banking_fi_patu/parser.py
Pieter J. Kersten cb48435950 [MERGE] account_banking_fi_patu: added finish patu parser - thanks to Sami Haahtinen
[FIX] account_banking_nl_clieop: added forgotten danish translation file.
2010-06-30 09:11:08 +02:00

228 lines
9.4 KiB
Python

#!/usr/bin/env python
# encoding: utf-8
"""Parser for PATU format files"""
import re, datetime
def fixchars(line):
"""Fix the characters mangled in the input
:param line: Line to rewrite
:returns: string, fixed line
"""
# Fix the umlauts int the input
line = line.replace("{", u"ä")
line = line.replace("}", u"ö")
# XXX: There are a whole bunch of these, adding them later
return line
class PatuParser(object):
"""Parse PATU lines in to structs"""
def __init__( self ):
""" Initialize PATU parser """
recparse = dict()
recparse["00"] = "T(?P<recordid>00)(?P<record_len>\d{3})" \
+ "(?P<version>\d{3})(?P<accountnr>\d{14})" \
+ "(?P<statementnr>\d{3})(?P<startdate>\d{6})" \
+ "(?P<enddate>\d{6})" \
+ "(?P<creationdate>\d{6})(?P<creationtime>\d{4})" \
+ "(?P<customerid>.{17})(?P<balancedate>\d{6})" \
+ "(?P<startingbalance>.{19})" \
+ "(?P<itemcount>\d{6})(?P<currency>.{3})" \
+ "(?P<accountname>.{30})"\
+ "(?P<accountlimit>\d{18})(?P<accountowner>.{35})" \
+ "(?P<bankcontact1>.{40})(?P<bankcontact2>.{40})" \
+ "(?P<bankcontact3>.{30})(?P<ibanswift>.{30})"
recparse["10"] = "T(?P<recordid>[18]0)(?P<record_len>\d{3})" \
+ "(?P<eventid>\d{6})" \
+ "(?P<archivalnr>.{18})(?P<recorddate>\d{6})" \
+ "(?P<valuedate>\d{6})" \
+ "(?P<paymentdate>\d{6})(?P<eventtype>\d)" \
+ "(?P<eventcode>.{3})(?P<eventdesc>.{35})" \
+ "(?P<amount>.{19})(?P<receiptcode>.)(?P<creationmethod>.)" \
+ "(?P<recipientname>.{35})(?P<recipientsource>.)" \
+ "(?P<recipientaccount>.{14})(?P<recipientaccountchanged>.)" \
+ "(?P<refnr>.{20})" \
+ "(?P<formnr>.{8})(?P<eventlevel>.)"
recparse["11"] = "T(?P<recordid>[18]1)(?P<record_len>\d{3})" \
+ "(?P<infotype>.{2})" \
+ "(?:(?# Match specific info)" \
+ "(?<=00)(?P<message>.{35})+" \
+ "|" \
+ "(?<=01)(?P<transactioncount>\d{8})" \
+ "|" \
+ "(?<=02)(?P<customerid>.{10})\s(?P<invoicenr>.{15})\s" \
+ "(?P<invoicedate>\d{6})" \
+ "|" \
+ "(?<=03)(?P<cardnumber>.{19})\s(?P<storereference>.{14})" \
+ "|" \
+ "(?<=04)(?P<origarchiveid>.{18})" \
+ "|" \
+ "(?<=05)(?P<destinationamount>.{19})\s(?P<currency>.{3})\s" \
+ "(?P<exchangerate>.{11})(?P<rateref>.{6})" \
+ "|" \
+ "(?<=06)(?P<principalinfo1>.{35})(?P<principalinfo2>.{35})" \
+ "|" \
+ "(?<=07)(?P<bankinfo1>.{35})" \
+ "(?P<bankinfo2>.{35})?" \
+ "(?P<bankinfo3>.{35})?" \
+ "(?P<bankinfo4>.{35})?" \
+ "(?P<bankinfo5>.{35})?" \
+ "(?P<bankinfo6>.{35})?" \
+ "(?P<bankinfo7>.{35})?" \
+ "(?P<bankinfo8>.{35})?" \
+ "(?P<bankinfo9>.{35})?" \
+ "(?P<bankinfo10>.{35})?" \
+ "(?P<bankinfo11>.{35})?" \
+ "(?P<bankinfo12>.{35})?" \
+ "|" \
+ "(?<=08)(?P<paymentcode>\d{3})\s(?P<paymentdesc>.{31})" \
+ "|" \
+ "(?<=09)(?P<recipientname2>.{35})" \
+ "|" \
+ "(?<=11)(?P<reference>.{35})(?P<recipientiban>.{35})" \
+ "(?P<recipientbic>.{35})(?P<recipientnameiban>.{70})" \
+ "(?P<sendername>.{70})(?P<senderid>.{35})" \
+ "(?P<archivalid>.{70})" \
+ ")"
recparse["40"] = "T(?P<recordid>40)(?P<record_len>\d{3})" \
+ "(?P<recorddate>\d{6})(?P<balance>.{19})" \
+ "(?P<availablefunds>.{19})"
recparse["50"] = "T(?P<recordid>50)(?P<record_len>\d{3})" \
+ "(?P<period>\d)(?P<perioddate>\d{6})" \
+ "(?P<depositcount>\d{8})(?P<depositsum>.{19})" \
+ "(?P<withdrawcount>\d{8})(?P<withdrawsum>.{19})"
recparse["60"] = "T(?P<recordid>60)(?P<record_len>\d{3})" \
+ "(?P<bankid>.{3})(?P<specialid>01)" \
+ "(?P<interestperiodstart>\d{6})-" \
+ "(?P<interestperiodend>\d{6})" \
+ "(?P<avgbalanceinfo>.)(?P<avgbalance>.{19})" \
+ "(?P<interestinfo>.)(?P<interestrate>\d{7})" \
+ "(?P<limitbalanceinfo>.)(?P<avglimitbalance>.{19})" \
+ "(?P<limitinterestinfo>.)(?P<limitinterestrate>\d{7})" \
+ "(?P<limitusageinfo>.)(?P<limitusage>\d{7})" \
+ "(?P<permanentbalanceinfo>.)(?P<permanentbalance>.{19})" \
+ "(?P<refinterestinfo>.)(?P<refinterestname>.{35})" \
+ "(?P<refinterestrate>\d{7})" \
+ "(?P<refcreditinfo>.)(?P<refcreditname>.{35})" \
+ "(?P<refcreditrate>\d{7})"
recparse["70"] = "T(?P<recordid>70)(?P<record_len>\d{3})" \
+ "(?P<bankid>\d{3})" \
+ "(?P<infoline1>.{80})" \
+ "(?P<infoline2>.{80})?" \
+ "(?P<infoline3>.{80})?" \
+ "(?P<infoline4>.{80})?" \
+ "(?P<infoline5>.{80})?" \
+ "(?P<infoline6>.{80})?"
for record in recparse:
recparse[record] = re.compile(recparse[record])
self.recparse = recparse
def parse_record(self, line):
"""Docstring for parse_perus
:param line: description
:returns: description
"""
line = fixchars(line)
for matcher in self.recparse:
matchobj = self.recparse[matcher].match(line)
if matchobj:
break
if not matchobj:
print " **** failed to match line '%s'" % (line)
return
# Strip strings
matchdict = matchobj.groupdict()
# Remove members set to None
for field in matchdict.keys():
if not matchdict[field]:
del matchdict[field]
matchkeys = set(matchdict.keys())
needstrip = set(["bankcontact1", "bankcontact2", "bankcontact3",
"customerid", "accountowner", "accountname", "refnr", "formnr",
"recipientname", "eventdesc", "recipientaccount", "message",
"principalinfo1", "bankinfo1", "bankinfo2", "bankinfo3",
"bankinfo4", "bankinfo5", "bankinfo6", "bankinfo7", "bankinfo8",
"bankinfo9", "bankinfo10", "bankinfo11", "bankinfo12",
"principalinfo2", "paymentdesc", "infoline1", "infoline2",
"infoline3", "infoline4", "infoline5", "infoline6",
"recipientname2", "recipientnameiban", "sendername"])
for field in matchkeys & needstrip:
matchdict[field] = matchdict[field].strip()
# Convert to int
needsint = set(["itemcount", "eventid", "record_len",
"depositcount", "withdrawcount"])
for field in matchkeys & needsint:
matchdict[field] = float(matchdict[field])
# Convert to float
needsfloat = set(["startingbalance", "accountlimit", "amount",
"destinationamount", "balance", "availablefunds", "depositsum",
"withdrawsum", "avgbalance", "avglimitbalance",
"permanentbalance"])
for field in matchkeys & needsfloat:
matchdict[field] = float(matchdict[field])
# convert sents to euros
needseur = set(["startingbalance", "accountlimit", "amount",
"destinationamount", "balance", "availablefunds", "depositsum",
"withdrawsum", "avgbalance", "permanentbalance"])
for field in matchkeys & needseur:
matchdict[field] = matchdict[field] / 100
# convert ibanswift to separate fields
if matchdict.has_key("ibanswift"):
matchdict["iban"], matchdict["swift"] = \
matchdict["ibanswift"].strip().split()
# Convert date fields
needdate = set(["startdate", "enddate", "creationdate", "balancedate",
"valuedate", "paymentdate", "recorddate", "perioddate"])
for field in matchkeys & needdate:
# Base all dates on the year 2000, since it's unlikely that this
# starndard will survive to see 2020 due to SEPA
datestring = matchdict[field]
if datestring == '000000':
matchdict[field] = None
continue
matchdict[field] = datetime.date(int("20" + datestring[0:2]),
int(datestring[2:4]), int(datestring[4:6]))
# convert time fields
needtime = set(["creationtime"])
for field in matchkeys & needtime:
timestring = matchdict[field]
matchdict[field] = datetime.time(int(timestring[0:2]),
int(timestring[2:4]))
return matchdict
def parse_file(filename):
"""Parse file with PATU format inside
:param filename: description
:returns: description
"""
patufile = open(filename, "r")
parser = PatuParser()
for line in patufile:
parser.parse_record(line)
def main():
"""The main function, currently just calls a dummy filename
:returns: description
"""
parse_file("myinput.nda")
if __name__ == '__main__':
main()