How to parse .xml.gz using PugiXML and boost::iostreams
In our previous post Minimal PugiXML file reader example we provided a short example of how to read from an uncompressed XML file using PugiXML. In practice, many large XML files are distributed as .xml.gz
package.
Since you can use boost::iostreams to decompress gzipped data on the fly and pipe it directly into PugiXML, you don’t need to store the uncompressed data on your hard drive.
#include <iostream>
#include <fstream>
#include <pugixml.hpp>
#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/gzip.hpp>
using namespace std;
using namespace pugi;
int main() {
// Open "raw" gzipped data stream
ifstream file("test.xml.gz", ios_base::in | ios_base::binary);
// Configure decompressor filter
boost::iostreams::filtering_streambuf<boost::iostreams::input> inbuf;
inbuf.push(boost::iostreams::gzip_decompressor());
inbuf.push(file);
//Convert streambuf to istream
istream instream(&inbuf);
// Parse from stream
xml_document doc;
xml_parse_result result = doc.load(instream);
// Print content of root element
cout << "Load result: " << result.description() << "\n"
<< doc.child("root-element").child_value() // "Test text"
<< endl;
}
cmake_minimum_required(VERSION 3.0)
find_package(Boost 1.36.0 COMPONENTS iostreams)
include_directories(${Boost_INCLUDE_DIRS})
add_executable(pugixml-example pugixml-example.cpp)
target_link_libraries(pugixml-example pugixml ${Boost_LIBRARIES})
<?xml version="1.0" encoding="UTF-8"?>
<root-element>Test text</root-element>
Download all three files and then run
gzip test.xml
cmake .
make
./pugixml-example
You should see an output like
Load result: No error
Test text