Parsing OpenGeoDB PLZ code TSV files using Python
Problem:
You want to parse the OpenGeoDB Postleitzahl (german ZIP code) TSV files available from here.
Solution
Depending on your usecase, PyGeoDB which accesses the OpenGeoDB online service might be a more suitable solution.
If you need a local solution for your task, here is a simple generator-style python script that stores the information in a namedtuple. It includes an example.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
A OpenGeoDB PLZ code TSV parser
This parser can be used to parse TSV files found at
http://www.fa-technik.adfc.de/code/opengeodb/
This script is intended to be used with Python3.
It should work with python 2.x, however unicode with CSV
might be an issue.
Tested with DE.tsv. Minor changes might be neccessary for other TSVs,
as different data is stored for different countries
Version 1.0
(c) Uli Köhler (2015)
Released under CC0 1.0 Universal (public domain)
"""
import csv
from collections import namedtuple
__author__ = "Uli Köhler"
__version__ = "1.0"
__license__ = "CC0 1.0 Universal"
PLZEntry = namedtuple('PLZEntry',
['loc_id', #OpenGeoDB Location ID
'ags', # AGS - Amtlicher Gemeindeschluessel
'ascii', # Normalized ASCII-only uppercase location name
'name', # Actual (unicode) location name
'lat', # Latitude in degrees
'lon', # Longitude in degrees
'amt', # Associated to
'plz', # List of PLZ codes
'vorwahl', # Telephone prefix
'einwohner', # Population figure
'flaeche', # Area
'kz', # KFZ Kennzeichen
'typ', # ?
'level', # ?
'of', # ?
'invalid'])
def zeroOneToBool(x):
"Convert '0' to False, everything else to "
return False if x == '0' else True
def toFloatDefault(s, default=float('NaN')):
"Convert a string to a float, using a default in case of error"
try: return float(s)
except ValueError: return default
def toIntDefault(s, default=0):
"Convert a string to a int, using a default in case of error"
try: return int(s)
except ValueError: return default
def readTSVFile(filename):
"Read a OpenGeoDB PLZ TSV file and yield PLZEntry objects"
with open(filename) as infile:
csvreader = csv.reader(infile, delimiter='\t', quotechar='"')
for row in csvreader:
if row[0] == "#loc_id": continue # Skip header
yield PLZEntry(toIntDefault(row[0]), row[1], row[2], row[3], toFloatDefault(row[4]), toFloatDefault(row[5]),
row[6], row[7].split(","), row[8], toIntDefault(row[9]), toFloatDefault(row[10]), row[11],
row[12], row[13], row[14], zeroOneToBool(row[15]))
if __name__ == "__main__":
#Example on how to use the functions
import argparse
parser = argparse.ArgumentParser(description="Get the name of a location by its PLZ code")
parser.add_argument("infile", help="The OpenGeoDB TSV file to read")
parser.add_argument("plz", help="Print only entry names with this PLZ")
args = parser.parse_args()
#Run reader
for entry in readTSVFile(args.infile):
if args.plz in entry.plz:
print (entry.name)