Problem:
You want to read substitution matrices in the matblas format, e.g. this BLOSUM62 from NCBI into a numpy ndarray.
Solution:
Use this snippet:
#!/usr/bin/env python3 # -*- coding: utf-8 -*- from __future__ import with_statement import numpy __author__ = "Uli Köhler" __license__ = "Apache License v2.0" __version__ = "1.0" def readMatblasAlignmentMatrix(filename): """ Read a substitution matrix in matblas format. Keyword arguments: filename: The filename to read the matrix from Returns a tuple (column/row list, numpy substitution matrix) """ with open(filename) as infile: currentRow = 0 for line in infile: if line.startswith("#"): continue elif line.startswith(" "): #Column indicator columns = line.split() matrix = numpy.empty((len(columns), len(columns)), dtype=numpy.int32) else: #Matrix row parts = line.split() assert(len(parts) == len(columns) + 1) #Assume rows are in the same order as columns assert(columns[currentRow] == parts[0]) matrix[:,currentRow] = parts[1:] currentRow += 1 return (columns, matrix)