How to parse C++ comments from source code using clang
In our previous post How to parse C++ ‘struct’ definition from source code using clang we learned how to parse a C++ struct definition using the clang C++ parser. In this post, we will show how to parse C++ comments from source code using clang.
#include <clang-c/Index.h>
#include <iostream>
#include <vector>
#include <string>
#include <cstring>
void extractCommentBlocks(CXTranslationUnit unit) {
CXSourceRange range = clang_getCursorExtent(clang_getTranslationUnitCursor(unit));
CXToken *tokens;
unsigned numTokens;
clang_tokenize(unit, range, &tokens, &numTokens);
for (unsigned i = 0; i < numTokens; ++i) {
CXTokenKind tokenKind = clang_getTokenKind(tokens[i]);
if (tokenKind == CXToken_Comment) {
CXString tokenSpelling = clang_getTokenSpelling(unit, tokens[i]);
CXSourceLocation location = clang_getTokenLocation(unit, tokens[i]);
unsigned line, column;
CXFile file;
clang_getFileLocation(location, &file, &line, &column, nullptr);
std::cout << "Comment at line " << line << ", column " << column << ": "
<< clang_getCString(tokenSpelling) << std::endl;
clang_disposeString(tokenSpelling);
}
}
clang_disposeTokens(unit, tokens, numTokens);
}
int main() {
CXIndex index = clang_createIndex(0, 0);
const char *code = R"(
// This is a single line comment
typedef struct {
double a; /* That weird parameter */
double b; /* Another weird parameter */
} myParameters;
/*
* This is a multi-line
* comment block
*/
int function() {
// Another single line comment
return 42; /* inline comment */
}
)";
CXUnsavedFile unsavedFile = {"test.cpp", code, (unsigned long)strlen(code)};
CXTranslationUnit unit = clang_parseTranslationUnit(index, "test.cpp", nullptr, 0, &unsavedFile, 1, CXTranslationUnit_None);
if (unit == nullptr) {
std::cerr << "Failed to parse translation unit." << std::endl;
return 1;
}
CXCursor cursor = clang_getTranslationUnitCursor(unit);
extractCommentBlocks(unit);
clang_disposeTranslationUnit(unit);
clang_disposeIndex(index);
return 0;
}
How to compile
On Ubuntu, install the required libraries using
sudo apt -y install libclang-19-dev
and compile the code using
g++ parse_comments.cpp -o parse_comments -std=c++17 -I/usr/lib/llvm-19/include -L/usr/lib/llvm-19/lib -lclang
Example output
Run using ./parse_comments
Comment at line 2, column 9: // This is a single line comment
Comment at line 4, column 23: /* That weird parameter */
Comment at line 5, column 23: /* Another weird parameter */
Comment at line 8, column 9: /*
* This is a multi-line
* comment block
*/
Comment at line 13, column 13: // Another single line comment
Comment at line 14, column 24: /* inline comment */
Alternate build using cmake
You might need to adjust the versions (19.1.1
) and other parameters here to get it working properly for your setup.
cmake_minimum_required(VERSION 3.10)
project(parse_comments)
# C++17 Standard festlegen
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# LLVM/Clang-Komponenten finden
find_package(LLVM 19.1.1 REQUIRED CONFIG)
find_package(Clang REQUIRED CONFIG)
# Include-Verzeichnisse hinzufügen
include_directories(${LLVM_INCLUDE_DIRS})
include_directories(${CLANG_INCLUDE_DIRS})
# Executable erstellen
add_executable(parse_comments parse_comments.cpp)
# LLVM-Definitionen hinzufügen
target_compile_definitions(parse_comments PUBLIC ${LLVM_DEFINITIONS})
# Gegen libclang linken
target_link_libraries(parse_comments PUBLIC libclang)
If this post helped you, please consider buying me a coffee or donating via PayPal to support research & publishing of new posts on TechOverflow