import pandas as pd
from lxml import etree
from io import BytesIO
def parse_tcx(tcx: bytes):
# Parse the XML
parser = etree.XMLParser(remove_blank_text=True)
root = etree.parse(BytesIO(tcx), parser=parser).getroot()
# Define the namespace dictionary
namespaces = {
'ns': 'http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2',
'ns3': 'http://www.garmin.com/xmlschemas/ActivityExtension/v2'
}
# Extract trackpoints
trackpoints = root.xpath('//ns:Trackpoint', namespaces=namespaces)
data = []
for tp in trackpoints:
point = {}
# Extract basic information
point['Time'] = tp.findtext('ns:Time', namespaces=namespaces)
point['LatitudeDegrees'] = tp.xpath('ns:Position/ns:LatitudeDegrees/text()', namespaces=namespaces)
point['LongitudeDegrees'] = tp.xpath('ns:Position/ns:LongitudeDegrees/text()', namespaces=namespaces)
point['AltitudeMeters'] = tp.findtext('ns:AltitudeMeters', namespaces=namespaces)
point['DistanceMeters'] = tp.findtext('ns:DistanceMeters', namespaces=namespaces)
point['HeartRateBpm'] = tp.xpath('ns:HeartRateBpm/ns:Value/text()', namespaces=namespaces)
point['Cadence'] = tp.findtext('ns:Cadence', namespaces=namespaces)
# Extract extension data
point['Speed'] = tp.xpath('ns:Extensions/ns3:TPX/ns3:Speed/text()', namespaces=namespaces)
point['Power'] = tp.xpath('ns:Extensions/ns3:TPX/ns3:Watts/text()', namespaces=namespaces)
# Convert lists to single values or None
for key, value in point.items():
if value is None:
point[key] = None
elif isinstance(value, list):
if len(value) == 0:
point[key] = None
else:
point[key] = value[0]
else:
point[key] = value
data.append(point)
# Create DataFrame
df = pd.DataFrame(data)
# Convert data types
numeric_columns = ['LatitudeDegrees', 'LongitudeDegrees', 'AltitudeMeters', 'DistanceMeters', 'HeartRateBpm', 'Cadence', 'Speed', 'Power']
for col in numeric_columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
df['Time'] = pd.to_datetime(df['Time'])
df.set_index('Time', inplace=True)
return df