index-symbols.py to list all symbols for an entire directory of shared objects

This simple script lists an entire directory of shared objects (*.so) and all symbols they export.

Run like this:#

./index-symbols.py /opt/ros/jazzy/lib/ -o ros-index.txt

This will generate a huge (75 megabytes) file with all symbols from ROS2 libraries, containing self-contained sections such as

Library: librviz_rendering.so
Type: T
Symbol: _ZN14rviz_rendering5Arrow8setScaleERKN4Ogre6VectorILi3EfEE
Demangled: rviz_rendering::Arrow::setScale(Ogre::Vector<3, float> const&)

Using this file, it’s easy to search for missing symbols using grep or a text editor such as Visual Studio Code.

The script

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Author: Uli Köhler (techoverflow.net)
# License: Apache License 2.0
import argparse
import subprocess
import os
import sys
from tqdm import tqdm
from collections import defaultdict
from typing import Dict, List, Tuple
from concurrent.futures import ThreadPoolExecutor

def get_symbols(library_path: str) -> List[Tuple[str, str]]:
    """
    Extract symbols from a shared library using nm.
    Returns list of tuples (symbol, symbol_type).
    """
    try:
        # Run nm to get symbols
        nm_output = subprocess.check_output(['nm', '-D', library_path], 
                                         stderr=subprocess.DEVNULL).decode()
        
        # Parse nm output
        symbols = []
        for line in nm_output.splitlines():
            if not line.strip():
                continue
            parts = line.split()
            if len(parts) >= 3:
                symbol_type = parts[1]
                symbol = ' '.join(parts[2:])
                symbols.append((symbol, symbol_type))
                
        return symbols
    except subprocess.CalledProcessError:
        return []

def demangle_cpp_symbol(symbol: str) -> str:
    """
    Demangle a C++ symbol using c++filt.
    Returns the demangled symbol or the original if demangling fails.
    """
    try:
        demangled = subprocess.check_output(['c++filt', symbol], 
                                          stderr=subprocess.DEVNULL).decode().strip()
        return demangled
    except subprocess.CalledProcessError:
        return symbol

def process_symbol(symbol_tuple):
    symbol, symbol_type = symbol_tuple
    symbol_info = {
        'symbol': symbol,
        'type': symbol_type,
    }
    
    # If it looks like a mangled C++ symbol (contains many underscores or std::)
    if '_' in symbol or 'std::' in symbol:
        demangled = demangle_cpp_symbol(symbol)
        if demangled != symbol:
            symbol_info['demangled'] = demangled
    
    return symbol_info

def process_libraries(directory: str) -> Dict[str, List[dict]]:
    """
    Process all shared libraries in the given directory.
    Returns a dictionary mapping filenames to lists of symbol information.
    """
    results = defaultdict(list)
    
    # Find all .so files
    if os.path.isfile(directory):
        filenames = [os.path.basename(directory)]
        directory = os.path.dirname(directory)
    else:
        filenames = os.listdir(directory)

    for filename in tqdm(filenames):
        if filename.endswith('.so') or '.so.' in filename:
            filepath = os.path.join(directory, filename)
            
            # Get symbols for this library
            symbols = get_symbols(filepath)
            
            # Process each symbol
            with ThreadPoolExecutor(max_workers=64) as executor:
                symbol_infos = list(executor.map(process_symbol, symbols))
                results[filename].extend(symbol_infos)
    
    return results

def main():
    parser = argparse.ArgumentParser(description='List symbols from shared libraries in a directory')
    parser.add_argument('directory', help='Directory containing shared libraries, or a single .so file')
    parser.add_argument( '-o', '--output', help='Output file (optional)')
    args = parser.parse_args()
    
    # Process all libraries
    results = process_libraries(args.directory)
    
    # Open output file or use stdout
    out_file = open(args.output, 'w', encoding='utf-8') if args.output else sys.stdout
    
    try:
        # Stream results directly
        for filename, symbols in results.items():
            print(f"\n=== {filename} ===\n", file=out_file)
            for symbol_info in symbols:
                print(f"Library: {filename}", file=out_file)
                print(f"Type: {symbol_info['type']}", file=out_file)
                print(f"Symbol: {symbol_info['symbol']}", file=out_file)
                if 'demangled' in symbol_info:
                    print(f"Demangled: {symbol_info['demangled']}", file=out_file)
                print("", file=out_file)
    finally:
        # Close file if we opened one
        if args.output:
            out_file.close()
    
    return 0

if __name__ == '__main__':
    exit(main())