Hugo Python script to check for broken image links

This post provides a Python script that checks for broken image links in Markdown files used by the Hugo static site generator. The script scans all Markdown files in the content directory, extracts image links, and verifies their accessibility.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import re
import urllib.parse

CONTENT_DIR = os.path.join(os.path.dirname(__file__), 'content')
STATIC_DIR = os.path.join(os.path.dirname(__file__), 'static')

# Regex to match Markdown image syntax: ![alt](path)
IMAGE_REGEX = re.compile(r'!\[[^\]]*\]\(([^)]+)\)')

def find_md_files(root):
    for dirpath, _, filenames in os.walk(root):
        for filename in filenames:
            if filename.endswith('.md'):
                yield os.path.join(dirpath, filename)

def check_images():
    missing = []
    for md_file in find_md_files(CONTENT_DIR):
        with open(md_file, 'r', encoding='utf-8') as f:
            content = f.read()
        for match in IMAGE_REGEX.finditer(content):
            img_path = match.group(1)
            # Remove query params/fragments if present
            img_path_clean = img_path.split('?', 1)[0].split('#', 1)[0]
            img_path_decoded = urllib.parse.unquote(img_path_clean)
            static_img_path = os.path.join(STATIC_DIR, img_path_decoded.lstrip('/'))
            if not os.path.isfile(static_img_path):
                missing.append((md_file, img_path))
    if missing:
        print('Missing images:')
        for md_file, img_path in missing:
            print(f'{md_file}: {img_path}')
    else:
        print('All images found.')

if __name__ == '__main__':
    check_images()

Example output

python check-images.py
Missing images:
/home/user/TechOverflow/content/post/2023/6/esp32-max31855-thermocouple-lcd-minimal-example-using-platformio.md: /images/2023/06/MAX31855-LCD-example-1024x617.jpg
/home/user/TechOverflow/content/post/2023/6/st7735r-lvgl-live-update-chart-example-for-platformio.md: /images/2023/06/LVGL-Live-update-chart-1024x693.jpg
/home/user/TechOverflow/content/post/2023/2/how-to-pair-bluetooth-gamepad-game-controller-from-amazon.md: /images/2023/02/Gamepad-3-1024x591.jpg
/home/user/TechOverflow/content/post/2023/2/how-to-pair-bluetooth-gamepad-game-controller-from-amazon.md: /images/2023/02/Gamepad-Pairing-1024x614.jpg
/home/user/TechOverflow/content/post/2023/8/how-to-matplotlib-plt-savefig-to-a-io-bytesio-buffer.md: /images/2023/08/plot-1024x768.png
/home/user/TechOverflow/content/post/2023/1/how-to-access-inventree-backup-settings.md: /images/2023/01/Inventree-Backup-Task-1024x706.png
/home/user/TechOverflow/content/post/2023/5/adafruit-st7735r-tft-display-minimal-text-example-for-platformio.md: /images/2023/05/ST7735R-counter-example-2-872x1024.jpg
/home/user/TechOverflow/content/post/2022/8/how-to-save-image-from-jupyter-notebook-using-right-click.md: /images/2022/08/JupyterMenu-194x300.png
/home/user/TechOverflow/content/post/2022/1/pre-and-post-script-for-veeam.md: /images/2022/01/veeam-1-1024x620.png
/home/user/TechOverflow/content/post/2018/12/how-to-use-the-old-classic-editor-in-wordpress-5-0.md: /images/2018/12/Screenshot_20181216_004907-300x49.png
[...]