Reading matblas substitution matrices in Python


You want to read substitution matrices in the matblas format, e.g. this BLOSUM62 from NCBI into a numpy ndarray.


Use this snippet:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import with_statement
import numpy

__author__  = "Uli Köhler"
__license__ = "Apache License v2.0"
__version__ = "1.0"

def readMatblasAlignmentMatrix(filename):
    Read a substitution matrix in matblas format.

    Keyword arguments:
        filename: The filename to read the matrix from

    Returns a tuple (column/row list, numpy substitution matrix)
    with open(filename) as infile:
        currentRow = 0
        for line in infile:
            if line.startswith("#"): continue
            elif line.startswith(" "): #Column indicator
                columns = line.split()
                matrix = numpy.empty((len(columns), len(columns)), dtype=numpy.int32)
            else: #Matrix row
                parts = line.split()
                assert(len(parts) == len(columns) + 1)
                #Assume rows are in the same order as columns
                assert(columns[currentRow] == parts[0])
                matrix[:,currentRow] = parts[1:]
                currentRow += 1
        return (columns, matrix)