Wie man C++-Structs mit vorhergehenden Kommentarzeilen aus Quellcode mit clang parst
Hinweis: Verwandte Posts:
- Wie man C++-Structs & Inline-Feld-Kommentare zu JSON mit clang parst
- Wie man C++-Kommentare aus Quellcode mit clang parst
In unseren vorherigen Posts (siehe oben) haben wir gezeigt, wie man C++-struct-Definitionen aus Quellcode mit dem Clang-C++-Parser extrahiert. In diesem Post erweitern wir diese Funktionalität um vorhergehende Kommentarzeilen.
Das folgende modifizierte Code-Snippet demonstriert, wie man C++-Structs parst und ihre Felder extrahiert, einschließlich der vorhergehenden Kommentarzeile. Während derzeit nur einzeilige vorhergehende Kommentare geparst werden können, kann es erweitert werden, um auch mehrzeilige Kommentare zu behandeln.
Beispieldaten
example_structs.cpp
/* This struct defines my parameters */
typedef struct {
double a; /* That weird parameter */
double b; /* Another weird parameter */
} myParameters;
typedef struct {
double x; /* The x coordinate */
double y; /* The y coordinate */
} myPoint;
// This struct is used to pass parameters to a function
typedef struct {
myParameters params; /* Parameters for the function */
myPoint point; /* Point to evaluate */
} myFunctionInput;parse_struct_with_comments.cpp
#include <clang-c/Index.h>
#include <iostream>
#include <vector>
#include <string>
#include <cstring>
#include <fstream>
#include <rapidjson/document.h>
#include <rapidjson/writer.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/prettywriter.h>
std::string extractInlineComment(CXCursor cursor, const char* sourceCode) {
if (sourceCode == nullptr) {
return "";
}
CXSourceRange range = clang_getCursorExtent(cursor);
CXSourceLocation start = clang_getRangeStart(range);
CXSourceLocation end = clang_getRangeEnd(range);
CXFile file;
unsigned startLine, startColumn, startOffset;
unsigned endLine, endColumn, endOffset;
clang_getExpansionLocation(start, &file, &startLine, &startColumn, &startOffset);
clang_getExpansionLocation(end, &file, &endLine, &endColumn, &endOffset);
// Find the line containing this field declaration
const char* lineStart = sourceCode;
unsigned currentLine = 1;
// Navigate to the line containing the field
while (currentLine < startLine && *lineStart) {
if (*lineStart == '\n') {
currentLine++;
}
lineStart++;
}
// Find the end of the line
const char* lineEnd = lineStart;
while (*lineEnd && *lineEnd != '\n') {
lineEnd++;
}
// Extract the line as a string
std::string line(lineStart, lineEnd - lineStart);
// Look for comments in this line
std::string comment = "";
// Look for /* */ style comments
size_t blockStart = line.find("/*");
if (blockStart != std::string::npos) {
size_t blockEnd = line.find("*/", blockStart);
if (blockEnd != std::string::npos) {
comment = line.substr(blockStart, blockEnd - blockStart + 2);
}
}
// Look for // style comments if no block comment found
if (comment.empty()) {
size_t lineCommentStart = line.find("//");
if (lineCommentStart != std::string::npos) {
comment = line.substr(lineCommentStart);
}
}
return comment;
}
std::string extractPrecedingComment(CXCursor cursor, const char* sourceCode) {
if (sourceCode == nullptr) {
return "";
}
CXSourceRange range = clang_getCursorExtent(cursor);
CXSourceLocation start = clang_getRangeStart(range);
CXFile file;
unsigned startLine, startColumn, startOffset;
clang_getExpansionLocation(start, &file, &startLine, &startColumn, &startOffset);
// If we're on the first line, there's no preceding line
if (startLine <= 1) {
return "";
}
// Find the line before the struct definition
const char* lineStart = sourceCode;
unsigned currentLine = 1;
// Navigate to the line before the struct definition
while (currentLine < startLine - 1 && *lineStart) {
if (*lineStart == '\n') {
currentLine++;
}
lineStart++;
}
// Find the end of the preceding line
const char* lineEnd = lineStart;
while (*lineEnd && *lineEnd != '\n') {
lineEnd++;
}
// Extract the line as a string
std::string line(lineStart, lineEnd - lineStart);
// Trim whitespace from the line
size_t start_pos = line.find_first_not_of(" \t\r");
if (start_pos == std::string::npos) {
return "";
}
line = line.substr(start_pos);
size_t end_pos = line.find_last_not_of(" \t\r");
if (end_pos != std::string::npos) {
line = line.substr(0, end_pos + 1);
}
// Check if the line is a comment
if (line.find("//") == 0 || (line.find("/*") == 0 && line.find("*/") != std::string::npos)) {
return line;
}
return "";
}
struct VisitorData {
const char* sourceCode;
rapidjson::Value* fieldsArray;
rapidjson::Document::AllocatorType* allocator;
};
void extractStructFields(CXCursor cursor, const char* sourceCode, rapidjson::Document& doc) {
CXCursorKind kind = clang_getCursorKind(cursor);
if (kind == CXCursor_StructDecl) {
CXString structName = clang_getCursorDisplayName(cursor);
std::string structNameStr = clang_getCString(structName);
// Extract preceding comment
std::string precedingComment = extractPrecedingComment(cursor, sourceCode);
// Create struct object in JSON
rapidjson::Value structObj(rapidjson::kObjectType);
rapidjson::Value fieldsArray(rapidjson::kArrayType);
clang_disposeString(structName);
VisitorData data = {sourceCode, &fieldsArray, &doc.GetAllocator()};
clang_visitChildren(cursor, [](CXCursor c, CXCursor parent, CXClientData client_data) {
VisitorData* data = static_cast<VisitorData*>(client_data);
const char* sourceCode = data->sourceCode;
rapidjson::Value* fieldsArray = data->fieldsArray;
rapidjson::Document::AllocatorType* allocator = data->allocator;
CXCursorKind kind = clang_getCursorKind(c);
if (kind == CXCursor_FieldDecl) {
CXString fieldName = clang_getCursorDisplayName(c);
CXType fieldType = clang_getCursorType(c);
CXString typeName = clang_getTypeSpelling(fieldType);
std::string comment = extractInlineComment(c, sourceCode);
std::string fieldNameStr = clang_getCString(fieldName);
std::string typeNameStr = clang_getCString(typeName);
// Create field object
rapidjson::Value fieldObj(rapidjson::kObjectType);
rapidjson::Value nameVal(fieldNameStr.c_str(), *allocator);
rapidjson::Value typeVal(typeNameStr.c_str(), *allocator);
rapidjson::Value commentVal(comment.c_str(), *allocator);
fieldObj.AddMember("name", nameVal, *allocator);
fieldObj.AddMember("type", typeVal, *allocator);
fieldObj.AddMember("comment", commentVal, *allocator);
fieldsArray->PushBack(fieldObj, *allocator);
clang_disposeString(fieldName);
clang_disposeString(typeName);
}
return CXChildVisit_Continue;
}, &data);
// Add struct to document
structObj.AddMember("fields", fieldsArray, doc.GetAllocator());
if (!precedingComment.empty()) {
rapidjson::Value precedingCommentVal(precedingComment.c_str(), doc.GetAllocator());
structObj.AddMember("precedingComment", precedingCommentVal, doc.GetAllocator());
}
rapidjson::Value structNameVal(structNameStr.c_str(), doc.GetAllocator());
doc.AddMember(structNameVal, structObj, doc.GetAllocator());
}
}
int main(int argc, char* argv[]) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " <filename>" << std::endl;
return 1;
}
const char* filename = argv[1];
CXIndex index = clang_createIndex(0, 0);
CXTranslationUnit unit = clang_parseTranslationUnit(index, filename, nullptr, 0, nullptr, 0,
CXTranslationUnit_DetailedPreprocessingRecord |
CXTranslationUnit_SkipFunctionBodies);
if (unit == nullptr) {
std::cerr << "Failed to parse translation unit." << std::endl;
return 1;
}
// Create JSON document
rapidjson::Document doc;
doc.SetObject();
// Read the source file content
std::ifstream file(filename, std::ios::binary);
if (!file) {
std::cerr << "Failed to open file: " << filename << std::endl;
return 1;
}
file.seekg(0, std::ios::end);
size_t fileSize = file.tellg();
file.seekg(0, std::ios::beg);
std::string sourceCode(fileSize, '\0');
file.read(&sourceCode[0], fileSize);
file.close();
struct CallbackData {
rapidjson::Document* doc;
const char* sourceCode;
};
CallbackData callbackData = {&doc, sourceCode.c_str()};
CXCursor cursor = clang_getTranslationUnitCursor(unit);
clang_visitChildren(cursor, [](CXCursor c, CXCursor parent, CXClientData client_data) {
CallbackData* data = static_cast<CallbackData*>(client_data);
extractStructFields(c, data->sourceCode, *(data->doc));
return CXChildVisit_Continue;
}, &callbackData);
// Convert JSON to string and output
rapidjson::StringBuffer buffer;
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
doc.Accept(writer);
std::cout << buffer.GetString() << std::endl;
clang_disposeTranslationUnit(unit);
clang_disposeIndex(index);
return 0;
}Wie man kompiliert
Installieren Sie auf Ubuntu die erforderlichen Bibliotheken mit
install_libclang19.sh
sudo apt -y install libclang-19-devund kompilieren Sie den Code mit
build_parse_struct.sh
g++ parse_struct_with_comments.cpp -o parse_struct_with_comments -std=c++17 -I/usr/lib/llvm-19/include -L/usr/lib/llvm-19/lib -lclangTestlauf
Laden Sie dies als test_struct.h herunter:
test_struct.h
/* This struct defines my parameters */
typedef struct {
double a; /* That weird parameter */
double b; /* Another weird parameter */
} myParameters;
typedef struct {
double x; /* The x coordinate */
double y; /* The y coordinate */
} myPoint;
// This struct is used to pass parameters to a function
typedef struct {
myParameters params; /* Parameters for the function */
myPoint point; /* Point to evaluate */
} myFunctionInput;Führen Sie dann das Programm aus:
run_parse_struct_test.sh
./parse_struct test_struct.h | jqBeispiel-Ausgabe
parse_struct_output.json
{
"myParameters": {
"fields": [
{
"name": "a",
"type": "double",
"comment": "/* That weird parameter */"
},
{
"name": "b",
"type": "double",
"comment": "/* Another weird parameter */"
}
],
"precedingComment": "/* This struct defines my parameters */"
},
"myPoint": {
"fields": [
{
"name": "x",
"type": "double",
"comment": "/* The x coordinate */"
},
{
"name": "y",
"type": "double",
"comment": "/* The y coordinate */"
}
]
},
"myFunctionInput": {
"fields": [
{
"name": "params",
"type": "myParameters",
"comment": "/* Parameters for the function */"
},
{
"name": "point",
"type": "myPoint",
"comment": "/* Point to evaluate */"
}
],
"precedingComment": "// This struct is used to pass parameters to a function"
}
}Check out similar posts by category:
C/C++, Clang, Source Introspection
If this post helped you, please consider buying me a coffee or donating via PayPal to support research & publishing of new posts on TechOverflow