How to parse jsonlines using boost::json (minimal example)

The following example shows how to parse a jsonlines file using boost::json. Since a jsonlines file is just a sequence of lines, each with one JSON object, we can use the boost::json::parse function to parse each line without any special actions.

This code can parse a 4.5GB jsonlines file with 4442720 lines in 42 seconds on my i7-6700 Desktop (printing the output was disabled for this benchmark).

#include <iostream>
#include <fstream>
#include <string>
#include <boost/json.hpp>

class JSONLineProcessor {
public:
    JSONLineProcessor(const std::string& filename) : file(filename) {
        if (!file.is_open()) {
            throw std::runtime_error("Error: Could not open file.");
        }
    }

    void process_lines() {
        std::string line;
        while (std::getline(file, line)) {
            process_line(line);
        }
    }

private:
    std::ifstream file;

    void process_line(const std::string& line) {
        try {
            // Parse the JSON line
            boost::json::value json_value = boost::json::parse(line);

            // Check if it's a JSON object
            if (json_value.is_object()) {
                process_json(json_value.as_object());
            } else {
                std::cerr << "Error: Parsed value is not a JSON object." << std::endl;
            }
        } catch (const boost::json::system_error& e) {
            // Handle parsing errors
            std::cerr << "Error parsing JSON line: " << e.what() << std::endl;
        }
    }

    void process_json(const boost::json::object& obj) {
        // For the sake of example, just print the JSON object
        std::cout << "Processing JSON object: " << boost::json::serialize(obj) << std::endl;
    }
};

int main(int argc, char** argv) {
    if(argc <= 1) {
        std::cerr << "Usage: " << argv[0] << std::endl;
        return 1;
    }

    try {
        JSONLineProcessor processor(argv[1]);
        processor.process_lines();
    } catch (const std::exception& e) {
        std::cerr << e.what() << std::endl;
        return 1;
    }

    return 0;
}

Compile like this:

g++ -std=c++17 -march=native -O3 -o jsonlines_parser jsonlines_parser.cpp -lboost_json

-march=native and -O3 are optional, but they can speed up the parsing process. Note that -march=native will make the binary non-portable since it’s compiled to run on processors with the same architecture as the one you’re compiling on. See the -march documentation for more information.