Parsing the OUI database in Haskell

In this post, we will show a method and provide a Haskell module capable of parsing both the IEEE OUI list and the IEEE IAB list.

While our code only parses the databases into an object form and doesn’t insert them into a tree capable of fast MAC address -> vendor lookup, it is based on Attoparsec, providing good performance and high flexibility for changes.

In order to parse oui.txt, simply call parseOUIFile "oui.txt". Note that while an Either error message is provided for parsing errors, the current version does not provide Either-based errors for file IO errors.

{-# LANGUAGE OverloadedStrings #-}
module OUIParser where
{-
    Parser for the IEEE OUI / IAB list

    Download at
    http://standards.ieee.org/develop/regauth/oui/oui.txt
    http://standards.ieee.org/develop/regauth/iab/iab.txt

    Copyright (c) 2014 Uli Koehler
    Licensed under Apache License v2.0

    Version 1.0
-}

import Prelude hiding (takeWhile)
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as LB
import Data.ByteString.Char8 (ByteString)
import Data.Attoparsec.Char8 (skipSpace, endOfLine, isEndOfLine, isDigit_w8, hexadecimal, char, isSpace_w8)
import Data.Attoparsec.ByteString.Lazy
import GHC.Word (Word8)
import Control.Applicative
import Data.Char (isHexDigit)
import Control.Monad
import qualified Data.ByteString.Internal as BI (c2w, w2c)
import Debug.Trace

isHexDigit_w8 = isHexDigit . BI.w2c

skipWhile1 :: (Word8 -> Bool) -> Parser ()
skipWhile1 p = skip p *> skipWhile p

type OUIPrefix = (Int, Int, Int)
type IABRange = (Int, Int)
type CompanyId = ByteString

data OUIInfo = OUIInfo {
                    ouiPrefix :: OUIPrefix,
                    ouiCompanyId :: CompanyId,
                    ouiCompanyName :: ByteString,
                    ouiCompanyAddress :: ByteString
                } deriving (Show)

data IABInfo = IABInfo {
                    iabPrefix :: OUIPrefix,
                    iabRange :: IABRange,
                    iabCompanyName :: ByteString,
                    iabCompanyAddress :: ByteString
                } deriving (Show)

-- Skip space, but not \n
skipSpaceNoNewline = skipWhile (\x -> isSpace_w8 x && not (isEndOfLine x))
skipSpaceNoNewline1 = skipWhile1 (\x -> isSpace_w8 x && not (isEndOfLine x))

parseOUIFile :: FilePath -> IO (Either String [OUIInfo])
parseOUIFile filename = do
    fileContent <- LB.readFile filename
    return $ eitherResult $ parse ouiFileParser fileContent

parseIABFile :: FilePath -> IO (Either String [IABInfo])
parseIABFile filename = do
    fileContent <- LB.readFile filename
    return $ eitherResult $ parse iabFileParser fileContent

ouiFileParser :: Parser [OUIInfo]
ouiFileParser = do
    ouiHeaderParser
    result <- many ouiRecordParser
    return result

iabFileParser :: Parser [IABInfo]
iabFileParser = do
    iabHeaderParser
    result <- many iabRecordParser
    return result

ouiHeaderParser :: Parser ()
ouiHeaderParser = do
    -- Leading line: empty
    skipSpaceNoNewline >> endOfLine
    -- OUI header
    skipSpaceNoNewline >> string "OUI/MA-L"
    skipSpaceNoNewline >> string "Organization"
    endOfLine
    -- company id header
    skipSpaceNoNewline >> string "company_id"
    skipSpaceNoNewline >> string "Organization"
    skipSpaceNoNewline >> endOfLine
    -- Address header
    skipSpaceNoNewline >> string "Address" >> endOfLine
    ---- Two whitespace lines
    skipSpaceNoNewline >> endOfLine
    skipSpaceNoNewline >> endOfLine
    -- Return nothing
    return ()

iabHeaderParser :: Parser ()
iabHeaderParser = do
    -- Leading line: empty
    skipSpaceNoNewline >> endOfLine
    -- OUI header
    skipSpaceNoNewline >> string "OUI"
    skipSpaceNoNewline >> string "Organization"
    endOfLine
    -- company id header
    skipSpaceNoNewline >> string "IAB Range"
    skipSpaceNoNewline >> string "Organization"
    skipSpaceNoNewline >> endOfLine
    -- Address header
    skipSpaceNoNewline >> string "Address" >> endOfLine
    ---- Two whitespace lines
    skipSpaceNoNewline >> endOfLine
    skipSpaceNoNewline >> endOfLine
    -- Return nothing
    return ()

-- Consume a line, return non-empty stripped part
stripLineParser1 :: Parser ByteString
stripLineParser1 = skipSpaceNoNewline *> takeWhile1 (not . isEndOfLine) <* endOfLine

addressLineParser :: Parser ByteString
addressLineParser = skipSpaceNoNewline1 *> takeWhile (not . isEndOfLine) <* endOfLine

ouiPrefixParser :: Parser OUIPrefix
ouiPrefixParser = do
    part1 <- hexadecimal
    char '-'
    part2 <- hexadecimal
    char '-'
    part3 <- hexadecimal
    return $ (part1, part2, part3)

iabRangeParser :: Parser IABRange
iabRangeParser = do
    part1 <- hexadecimal
    char '-'
    part2 <- hexadecimal
    return (part1, part2)

-- Parser for the first OUI record line
ouiLineParser :: Parser (OUIPrefix, ByteString)
ouiLineParser = do
    skipSpace
    ouiPrefix <- ouiPrefixParser
    skipSpace
    string "(hex)"
    -- Company name
    companyName <- stripLineParser1
    return (ouiPrefix, companyName)

-- Parses the line with company ID
-- Ignores the company name, because it's the same as
-- in the company ID line (and PRIVATE reservations don't contain it in this line)
companyLineParser :: Parser ByteString
companyLineParser = do
    skipSpace
    companyId <- takeWhile1 isHexDigit_w8
    skipSpace
    string "(base 16)"
    -- Skip company 
    skipWhile (not . isEndOfLine)
    endOfLine
    return companyId

iabLineParser :: Parser IABRange
iabLineParser = do
    skipSpace
    iabRange <- iabRangeParser
    skipSpace
    string "(base 16)"
    -- Skip company 
    skipWhile (not . isEndOfLine)
    endOfLine
    return iabRange

iabRecordParser :: Parser IABInfo
iabRecordParser = do
    (ouiPrefix, companyName) <- ouiLineParser
    iabRange <- iabLineParser
    companyAddress <- many $ addressLineParser
    -- Empty line
    endOfLine
    return $ IABInfo ouiPrefix iabRange companyName $ B.unlines companyAddress

ouiRecordParser :: Parser OUIInfo
ouiRecordParser = do
    (ouiPrefix, companyName) <- ouiLineParser
    companyId <- companyLineParser
    companyAddress <- many $ addressLineParser
    -- Empty line
    endOfLine
    return $ OUIInfo ouiPrefix companyId companyName $ B.unlines companyAddress