Parsing OpenGeoDB PLZ code TSV files using Python

Problem:

You want to parse the OpenGeoDB Postleitzahl (german ZIP code) TSV files available from here.

Solution

Depending on your usecase, PyGeoDB which accesses the OpenGeoDB online service might be a more suitable solution.

If you need a local solution for your task, here is a simple generator-style python script that stores the information in a namedtuple. It includes an example.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
A OpenGeoDB PLZ code TSV parser

This parser can be used to parse TSV files found at
http://www.fa-technik.adfc.de/code/opengeodb/

This script is intended to be used with Python3.
It should work with python 2.x, however unicode with CSV
might be an issue.

Tested with DE.tsv. Minor changes might be neccessary for other TSVs,
as different data is stored for different countries

Version 1.0
(c) Uli Köhler (2015)
Released under CC0 1.0 Universal (public domain)
"""
import csv
from collections import namedtuple

__author__  = "Uli Köhler"
__version__ = "1.0"
__license__ = "CC0 1.0 Universal"

PLZEntry = namedtuple('PLZEntry',
    ['loc_id', #OpenGeoDB Location ID
    'ags', # AGS - Amtlicher Gemeindeschluessel
    'ascii', # Normalized ASCII-only uppercase location name
    'name', # Actual (unicode) location name
    'lat', # Latitude in degrees
    'lon', # Longitude in degrees
    'amt', # Associated to
    'plz', # List of PLZ codes
    'vorwahl', # Telephone prefix
    'einwohner', # Population figure
    'flaeche', # Area
    'kz', # KFZ Kennzeichen
    'typ', # ?
    'level', # ?
    'of', # ?
    'invalid'])

def zeroOneToBool(x):
    "Convert '0' to False, everything else to "
    return False if x == '0' else True

def toFloatDefault(s, default=float('NaN')):
    "Convert a string to a float, using a default in case of error"
    try: return float(s)
    except ValueError: return default

def toIntDefault(s, default=0):
    "Convert a string to a int, using a default in case of error"
    try: return int(s)
    except ValueError: return default

def readTSVFile(filename):
    "Read a OpenGeoDB PLZ TSV file and yield PLZEntry objects"
    with open(filename) as infile:
        csvreader = csv.reader(infile, delimiter='\t', quotechar='"')
        for row in csvreader:
            if row[0] == "#loc_id": continue # Skip header
            yield PLZEntry(toIntDefault(row[0]), row[1], row[2], row[3], toFloatDefault(row[4]), toFloatDefault(row[5]),
                           row[6], row[7].split(","), row[8], toIntDefault(row[9]), toFloatDefault(row[10]), row[11],
                           row[12], row[13], row[14], zeroOneToBool(row[15]))


if __name__ == "__main__":
    #Example on how to use the functions
    import argparse
    parser = argparse.ArgumentParser(description="Get the name of a location by its PLZ code")
    parser.add_argument("infile", help="The OpenGeoDB TSV file to read")
    parser.add_argument("plz", help="Print only entry names with this PLZ")
    args = parser.parse_args()
    #Run reader
    for entry in readTSVFile(args.infile):
        if args.plz in entry.plz:
            print (entry.name)