Modifying C++ soure code using clang: A minimal example
In this post, we will explore how to modify C++ source code using the clang C++ parser. We will create a minimal example that demonstrates how to parse a C++ struct definition, extract inline comments, and convert the struct definition to JSON.
The example
#include <iostream>
typedef struct {
double a; /* That weird parameter */
double b; /* Another weird parameter */
double c;
double d; // This is documentation for d
} myParameters;
int main() {
// Initialize struct to zero
myParameters params = {0.0, 0.0, 0.0, 0.0};
// Arithmetically modify each value
params.a += 10.5;
params.b *= 2.0;
params.c = params.c + 7.3;
params.d -= 1.2;
// Print the values
std::cout << "a: " << params.a << std::endl;
std::cout << "b: " << params.b << std::endl;
std::cout << "c: " << params.c << std::endl;
std::cout << "d: " << params.d << std::endl;
return 0;
}
This simple example prints
a: 10.5
b: 0
c: 7.3
d: -1.2
Modification program source code
#include <clang/AST/AST.h>
#include <clang/AST/ASTConsumer.h>
#include <clang/AST/RecursiveASTVisitor.h>
#include <clang/Frontend/ASTConsumers.h>
#include <clang/Frontend/FrontendActions.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Tooling/CommonOptionsParser.h>
#include <clang/Tooling/Tooling.h>
#include <clang/Rewrite/Core/Rewriter.h>
#include <llvm/Support/CommandLine.h>
#include <iostream>
#include <string>
using namespace clang;
using namespace clang::tooling;
using namespace llvm;
// Embedded source code from x.cpp
const std::string SOURCE_CODE = R"(
#include <iostream>
typedef struct {
double a; /* That weird parameter */
double b; /* Another weird parameter */
double c;
double d; // This is documentation for d
} myParameters;
int main() {
// Initialize struct to zero
myParameters params = {0.0, 0.0, 0.0, 0.0};
// Arithmetically modify each value
params.a += 10.5;
params.b *= 2.0;
params.c = params.c + 7.3;
params.d -= 1.2;
// Print the values
std::cout << "a: " << params.a << std::endl;
std::cout << "b: " << params.b << std::endl;
std::cout << "c: " << params.c << std::endl;
std::cout << "d: " << params.d << std::endl;
return 0;
}
)";
class ModificationVisitor : public RecursiveASTVisitor<ModificationVisitor> {
private:
ASTContext *Context;
Rewriter &TheRewriter;
bool FoundTargetComment = false;
SourceLocation InsertLocation;
public:
explicit ModificationVisitor(ASTContext *Context, Rewriter &R)
: Context(Context), TheRewriter(R) {}
bool VisitFunctionDecl(FunctionDecl *F) {
if (F->getNameInfo().getName().getAsString() == "main") {
// Look for the target comment in the main function
SourceManager &SM = Context->getSourceManager();
SourceLocation Start = F->getBeginLoc();
SourceLocation End = F->getEndLoc();
// Get the source text
StringRef FuncText = Lexer::getSourceText(
CharSourceRange::getTokenRange(Start, End), SM, Context->getLangOpts());
std::string FuncStr = FuncText.str();
size_t CommentPos = FuncStr.find("// Arithmetically modify each value");
if (CommentPos != std::string::npos) {
FoundTargetComment = true;
std::cout << "Found target comment in main function!" << std::endl;
// Find the location after the comment line
size_t LineEnd = FuncStr.find('\n', CommentPos);
if (LineEnd != std::string::npos) {
// Calculate the actual source location
SourceLocation CommentStart = Start.getLocWithOffset(CommentPos);
InsertLocation = Start.getLocWithOffset(LineEnd + 1);
// Generate pre-modification print statements
std::string PreModPrints =
" // Pre-modification values\n"
" std::cout << \"Pre-modification a: \" << params.a << std::endl;\n"
" std::cout << \"Pre-modification b: \" << params.b << std::endl;\n"
" std::cout << \"Pre-modification c: \" << params.c << std::endl;\n"
" std::cout << \"Pre-modification d: \" << params.d << std::endl;\n"
" \n";
TheRewriter.InsertText(InsertLocation, PreModPrints);
std::cout << "Inserted pre-modification print statements!" << std::endl;
}
}
}
return true;
}
};
class ModificationConsumer : public ASTConsumer {
private:
ModificationVisitor Visitor;
public:
explicit ModificationConsumer(ASTContext *Context, Rewriter &R)
: Visitor(Context, R) {}
void HandleTranslationUnit(ASTContext &Context) override {
Visitor.TraverseDecl(Context.getTranslationUnitDecl());
}
};
class ModificationAction : public ASTFrontendAction {
private:
Rewriter TheRewriter;
public:
std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &Compiler,
StringRef InFile) override {
TheRewriter.setSourceMgr(Compiler.getSourceManager(), Compiler.getLangOpts());
return std::make_unique<ModificationConsumer>(&Compiler.getASTContext(), TheRewriter);
}
void EndSourceFileAction() override {
SourceManager &SM = TheRewriter.getSourceMgr();
std::cout << "\n=== Modified Code ===\n";
TheRewriter.getEditBuffer(SM.getMainFileID()).write(llvm::outs());
std::cout << "\n=== End Modified Code ===\n";
}
};
int main(int argc, const char **argv) {
std::cout << "Clang-based C++ Code Modifier\n";
std::cout << "==============================\n\n";
// Create a virtual file from the embedded source
std::vector<std::string> Args = {"clang-tool", "input.cpp"};
// Run the tool on the embedded source code
bool Result = runToolOnCode(std::make_unique<ModificationAction>(), SOURCE_CODE, "input.cpp");
if (Result) {
std::cout << "\nTool executed successfully!\n";
} else {
std::cout << "\nTool execution failed!\n";
return 1;
}
return 0;
}
How to compile
g++ modify_program.cpp -o modify_program -std=c++17 -I/usr/lib/llvm-19/include -L/usr/lib/llvm-19/lib -lclang -lclang-cpp -lLLVM
How to run
./modify_program
Output
Clang-based C++ Code Modifier
==============================
Found target comment in main function!
Inserted pre-modification print statements!
=== Modified Code ===
#include <iostream>
typedef struct {
double a; /* That weird parameter */
double b; /* Another weird parameter */
double c;
double d; // This is documentation for d
} myParameters;
int main() {
// Initialize struct to zero
myParameters params = {0.0, 0.0, 0.0, 0.0};
// Arithmetically modify each value
// Pre-modification values
std::cout << "Pre-modification a: " << params.a << std::endl;
std::cout << "Pre-modification b: " << params.b << std::endl;
std::cout << "Pre-modification c: " << params.c << std::endl;
std::cout << "Pre-modification d: " << params.d << std::endl;
params.a += 10.5;
params.b *= 2.0;
params.c = params.c + 7.3;
params.d -= 1.2;
// Print the values
std::cout << "a: " << params.a << std::endl;
std::cout << "b: " << params.b << std::endl;
std::cout << "c: " << params.c << std::endl;
std::cout << "d: " << params.d << std::endl;
return 0;
}
=== End Modified Code ===
Tool executed successfully!
If this post helped you, please consider buying me a coffee or donating via PayPal to support research & publishing of new posts on TechOverflow