Modifying C++ soure code using clang: A minimal example

In this post, we will explore how to modify C++ source code using the clang C++ parser. We will create a minimal example that demonstrates how to parse a C++ struct definition, extract inline comments, and convert the struct definition to JSON.

The example

#include <iostream>

typedef struct {
    double a; /* That weird parameter */
    double b; /* Another weird parameter */
    double c;
    double d; // This is documentation for d
} myParameters;

int main() {
    // Initialize struct to zero
    myParameters params = {0.0, 0.0, 0.0, 0.0};
    
    // Arithmetically modify each value
    params.a += 10.5;
    params.b *= 2.0;
    params.c = params.c + 7.3;
    params.d -= 1.2;
    
    // Print the values
    std::cout << "a: " << params.a << std::endl;
    std::cout << "b: " << params.b << std::endl;
    std::cout << "c: " << params.c << std::endl;
    std::cout << "d: " << params.d << std::endl;
    
    return 0;
}

This simple example prints

a: 10.5
b: 0
c: 7.3
d: -1.2

Modification program source code

#include <clang/AST/AST.h>
#include <clang/AST/ASTConsumer.h>
#include <clang/AST/RecursiveASTVisitor.h>
#include <clang/Frontend/ASTConsumers.h>
#include <clang/Frontend/FrontendActions.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Tooling/CommonOptionsParser.h>
#include <clang/Tooling/Tooling.h>
#include <clang/Rewrite/Core/Rewriter.h>
#include <llvm/Support/CommandLine.h>
#include <iostream>
#include <string>

using namespace clang;
using namespace clang::tooling;
using namespace llvm;

// Embedded source code from x.cpp
const std::string SOURCE_CODE = R"(
#include <iostream>

typedef struct {
    double a; /* That weird parameter */
    double b; /* Another weird parameter */
    double c;   
    double d; // This is documentation for d
} myParameters;

int main() {
    // Initialize struct to zero
    myParameters params = {0.0, 0.0, 0.0, 0.0};
    
    // Arithmetically modify each value
    params.a += 10.5;
    params.b *= 2.0;
    params.c = params.c + 7.3;
    params.d -= 1.2;
    
    // Print the values
    std::cout << "a: " << params.a << std::endl;
    std::cout << "b: " << params.b << std::endl;
    std::cout << "c: " << params.c << std::endl;
    std::cout << "d: " << params.d << std::endl;
    
    return 0;
}
)";

class ModificationVisitor : public RecursiveASTVisitor<ModificationVisitor> {
private:
    ASTContext *Context;
    Rewriter &TheRewriter;
    bool FoundTargetComment = false;
    SourceLocation InsertLocation;

public:
    explicit ModificationVisitor(ASTContext *Context, Rewriter &R)
        : Context(Context), TheRewriter(R) {}

    bool VisitFunctionDecl(FunctionDecl *F) {
        if (F->getNameInfo().getName().getAsString() == "main") {
            // Look for the target comment in the main function
            SourceManager &SM = Context->getSourceManager();
            SourceLocation Start = F->getBeginLoc();
            SourceLocation End = F->getEndLoc();
            
            // Get the source text
            StringRef FuncText = Lexer::getSourceText(
                CharSourceRange::getTokenRange(Start, End), SM, Context->getLangOpts());
            
            std::string FuncStr = FuncText.str();
            size_t CommentPos = FuncStr.find("// Arithmetically modify each value");
            
            if (CommentPos != std::string::npos) {
                FoundTargetComment = true;
                std::cout << "Found target comment in main function!" << std::endl;
                
                // Find the location after the comment line
                size_t LineEnd = FuncStr.find('\n', CommentPos);
                if (LineEnd != std::string::npos) {
                    // Calculate the actual source location
                    SourceLocation CommentStart = Start.getLocWithOffset(CommentPos);
                    InsertLocation = Start.getLocWithOffset(LineEnd + 1);
                    
                    // Generate pre-modification print statements
                    std::string PreModPrints = 
                        "    // Pre-modification values\n"
                        "    std::cout << \"Pre-modification a: \" << params.a << std::endl;\n"
                        "    std::cout << \"Pre-modification b: \" << params.b << std::endl;\n"
                        "    std::cout << \"Pre-modification c: \" << params.c << std::endl;\n"
                        "    std::cout << \"Pre-modification d: \" << params.d << std::endl;\n"
                        "    \n";
                    
                    TheRewriter.InsertText(InsertLocation, PreModPrints);
                    std::cout << "Inserted pre-modification print statements!" << std::endl;
                }
            }
        }
        return true;
    }
};

class ModificationConsumer : public ASTConsumer {
private:
    ModificationVisitor Visitor;

public:
    explicit ModificationConsumer(ASTContext *Context, Rewriter &R)
        : Visitor(Context, R) {}

    void HandleTranslationUnit(ASTContext &Context) override {
        Visitor.TraverseDecl(Context.getTranslationUnitDecl());
    }
};

class ModificationAction : public ASTFrontendAction {
private:
    Rewriter TheRewriter;

public:
    std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &Compiler,
                                                   StringRef InFile) override {
        TheRewriter.setSourceMgr(Compiler.getSourceManager(), Compiler.getLangOpts());
        return std::make_unique<ModificationConsumer>(&Compiler.getASTContext(), TheRewriter);
    }

    void EndSourceFileAction() override {
        SourceManager &SM = TheRewriter.getSourceMgr();
        std::cout << "\n=== Modified Code ===\n";
        TheRewriter.getEditBuffer(SM.getMainFileID()).write(llvm::outs());
        std::cout << "\n=== End Modified Code ===\n";
    }
};

int main(int argc, const char **argv) {
    std::cout << "Clang-based C++ Code Modifier\n";
    std::cout << "==============================\n\n";
    
    // Create a virtual file from the embedded source
    std::vector<std::string> Args = {"clang-tool", "input.cpp"};
    
    // Run the tool on the embedded source code
    bool Result = runToolOnCode(std::make_unique<ModificationAction>(), SOURCE_CODE, "input.cpp");
    
    if (Result) {
        std::cout << "\nTool executed successfully!\n";
    } else {
        std::cout << "\nTool execution failed!\n";
        return 1;
    }
    
    return 0;
}

How to compile

g++ modify_program.cpp -o modify_program -std=c++17 -I/usr/lib/llvm-19/include -L/usr/lib/llvm-19/lib -lclang -lclang-cpp -lLLVM

How to run

./modify_program

Output

Clang-based C++ Code Modifier
==============================

Found target comment in main function!
Inserted pre-modification print statements!

=== Modified Code ===

#include <iostream>

typedef struct {
    double a; /* That weird parameter */
    double b; /* Another weird parameter */
    double c;   
    double d; // This is documentation for d
} myParameters;

int main() {
    // Initialize struct to zero
    myParameters params = {0.0, 0.0, 0.0, 0.0};
    
    // Arithmetically modify each value
    // Pre-modification values
    std::cout << "Pre-modification a: " << params.a << std::endl;
    std::cout << "Pre-modification b: " << params.b << std::endl;
    std::cout << "Pre-modification c: " << params.c << std::endl;
    std::cout << "Pre-modification d: " << params.d << std::endl;
    
    params.a += 10.5;
    params.b *= 2.0;
    params.c = params.c + 7.3;
    params.d -= 1.2;
    
    // Print the values
    std::cout << "a: " << params.a << std::endl;
    std::cout << "b: " << params.b << std::endl;
    std::cout << "c: " << params.c << std::endl;
    std::cout << "d: " << params.d << std::endl;
    
    return 0;
}

=== End Modified Code ===

Tool executed successfully!