How to parse C++ comments from source code using clang

In our previous post How to parse C++ ‘struct’ definition from source code using clang we learned how to parse a C++ struct definition using the clang C++ parser. In this post, we will show how to parse C++ comments from source code using clang.

#include <clang-c/Index.h>
#include <iostream>
#include <vector>
#include <string>
#include <cstring>

void extractCommentBlocks(CXTranslationUnit unit) {
    CXSourceRange range = clang_getCursorExtent(clang_getTranslationUnitCursor(unit));
    CXToken *tokens;
    unsigned numTokens;
    
    clang_tokenize(unit, range, &tokens, &numTokens);
    
    for (unsigned i = 0; i < numTokens; ++i) {
        CXTokenKind tokenKind = clang_getTokenKind(tokens[i]);
        if (tokenKind == CXToken_Comment) {
            CXString tokenSpelling = clang_getTokenSpelling(unit, tokens[i]);
            CXSourceLocation location = clang_getTokenLocation(unit, tokens[i]);
            
            unsigned line, column;
            CXFile file;
            clang_getFileLocation(location, &file, &line, &column, nullptr);
            
            std::cout << "Comment at line " << line << ", column " << column << ": " 
                      << clang_getCString(tokenSpelling) << std::endl;
            clang_disposeString(tokenSpelling);
        }
    }
    
    clang_disposeTokens(unit, tokens, numTokens);
}

int main() {
    CXIndex index = clang_createIndex(0, 0);
    const char *code = R"(
        // This is a single line comment
        typedef struct {
            double a; /* That weird parameter */
            double b; /* Another weird parameter */
        } myParameters;
        
        /*
         * This is a multi-line
         * comment block
         */
        int function() {
            // Another single line comment
            return 42; /* inline comment */
        }
    )";

    CXUnsavedFile unsavedFile = {"test.cpp", code, (unsigned long)strlen(code)};
    CXTranslationUnit unit = clang_parseTranslationUnit(index, "test.cpp", nullptr, 0, &unsavedFile, 1, CXTranslationUnit_None);
    if (unit == nullptr) {
        std::cerr << "Failed to parse translation unit." << std::endl;
        return 1;
    }

    CXCursor cursor = clang_getTranslationUnitCursor(unit);
    extractCommentBlocks(unit);

    clang_disposeTranslationUnit(unit);
    clang_disposeIndex(index);
    return 0;
}

How to compile

On Ubuntu, install the required libraries using

sudo apt -y install libclang-19-dev

and compile the code using

g++ parse_comments.cpp -o parse_comments -std=c++17 -I/usr/lib/llvm-19/include -L/usr/lib/llvm-19/lib -lclang

Example output

Run using ./parse_comments

Comment at line 2, column 9: // This is a single line comment
Comment at line 4, column 23: /* That weird parameter */
Comment at line 5, column 23: /* Another weird parameter */
Comment at line 8, column 9: /*
         * This is a multi-line
         * comment block
         */
Comment at line 13, column 13: // Another single line comment
Comment at line 14, column 24: /* inline comment */

Alternate build using cmake

You might need to adjust the versions (19.1.1) and other parameters here to get it working properly for your setup.

cmake_minimum_required(VERSION 3.10)
project(parse_comments)

# C++17 Standard festlegen
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# LLVM/Clang-Komponenten finden
find_package(LLVM 19.1.1 REQUIRED CONFIG)
find_package(Clang REQUIRED CONFIG)

# Include-Verzeichnisse hinzufügen
include_directories(${LLVM_INCLUDE_DIRS})
include_directories(${CLANG_INCLUDE_DIRS})

# Executable erstellen
add_executable(parse_comments parse_comments.cpp)

# LLVM-Definitionen hinzufügen
target_compile_definitions(parse_comments PUBLIC ${LLVM_DEFINITIONS})

# Gegen libclang linken
target_link_libraries(parse_comments PUBLIC libclang)