How to compress whitespace in XML using Python

The following Python function will use the lxml fast XML library to compress unneccessary whitespace in an XML string. It will not compress whitespace in text nodes, only in the XML structure itself.

import io
from lxml import etree

def compress_xml_whitespace(input_bytes: io.BytesIO) -> io.BytesIO:
    """
    Compresses the whitespace in an XML file.
    Args:
        input_bytes (io.BytesIO): The input BytesIO object containing the XML content.
    Returns:
        io.BytesIO: The output BytesIO object containing the compressed XML content.
    """
    input_bytes.seek(0)  # Reset the position to the beginning of the BytesIO object
    
    parser = etree.XMLParser(remove_blank_text=True)
    tree = etree.parse(input_bytes, parser)
    
    # Convert the XML tree to a string without pretty printing (no newlines or indentation)
    compressed_xml = etree.tostring(tree, pretty_print=False, encoding='utf-8')
    
    # Write the compressed XML to the BytesIO output
    output_bytesio = io.BytesIO(compressed_xml)
    # Reset the pointer of the output BytesIO to the beginning
    output_bytesio.seek(0)
    return output_bytesio

Demonstration

test_xml = """<root>
    <parent>
        <child>
            <subchild>   This is some text with    irregular   spacing.   </subchild>
            <subchild>Another piece of text with
                newlines and     tabs.</subchild>
        </child>
        <child>
            <subchild>Text with
                multiple
                lines.</subchild>
            <subchild>   Leading and trailing spaces   </subchild>
        </child>
    </parent>
    <parent>
        <child>
            <subchild>Mixed    whitespace    types.</subchild>
            <subchild>   </subchild>
        </child>
    </parent>
</root>"""

test_xml_bytesio = io.BytesIO(test_xml.encode('utf-8'))

compress_xml_whitespace(test_xml_bytesio).read().decode('utf-8')

Output:

'<root><parent><child><subchild>   This is some text with    irregular   spacing.   </subchild><subchild>Another piece of text with\n                newlines and     tabs.</subchild></child><child><subchild>Text with\n                multiple\n                lines.</subchild><subchild>   Leading and trailing spaces   </subchild></child></parent><parent><child><subchild>Mixed    whitespace    types.</subchild><subchild>   </subchild></child></parent></root>'