Reading matblas substitution matrices in Python
Problem:
You want to read substitution matrices in the matblas format, e.g. this BLOSUM62 from NCBI into a numpy ndarray.
Solution
Use this snippet:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import with_statement
import numpy
__author__ = "Uli Köhler"
__license__ = "Apache License v2.0"
__version__ = "1.0"
def readMatblasAlignmentMatrix(filename):
"""
Read a substitution matrix in matblas format.
Keyword arguments:
filename: The filename to read the matrix from
Returns a tuple (column/row list, numpy substitution matrix)
"""
with open(filename) as infile:
currentRow = 0
for line in infile:
if line.startswith("#"): continue
elif line.startswith(" "): #Column indicator
columns = line.split()
matrix = numpy.empty((len(columns), len(columns)), dtype=numpy.int32)
else: #Matrix row
parts = line.split()
assert(len(parts) == len(columns) + 1)
#Assume rows are in the same order as columns
assert(columns[currentRow] == parts[0])
matrix[:,currentRow] = parts[1:]
currentRow += 1
return (columns, matrix)