git_cleaner.py: Rekursiv git gc auf allen Repositories ausführen

Dieses Skript, git_cleaner.py, wurde entwickelt, um ein angegebenes Verzeichnis (oder standardmäßig das aktuelle Verzeichnis) rekursiv nach Git-Repositorys zu durchsuchen und git gc auf jedem auszuführen. Es erfasst die Größe und Dateianzahl des .git-Verzeichnisses vor und nach der Ausführung von git gc, sodass Sie sehen können, wie viel Platz zurückgewonnen wurde und wie viele Dateien entfernt wurden. Das Skript bietet außerdem eine endgültige Zusammenfassung aller verarbeiteten Repositorys.

git_cleaner.py
#!/usr/bin/env python3
import os
import subprocess
import argparse
import sys

def get_dir_stats(path):
    """Returns a tuple of (total_size_bytes, total_file_count)."""
    total_size = 0
    file_count = 0
    for dirpath, _, filenames in os.walk(path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)
                file_count += 1
    return total_size, file_count

def format_size(bytes_size):
    """Convert bytes to a human-readable string."""
    for unit in ['B', 'KB', 'MB', 'GB']:
        if abs(bytes_size) < 1024:
            return f"{bytes_size:.2f} {unit}"
        bytes_size /= 1024
    return f"{bytes_size:.2f} TB"

def main():
    parser = argparse.ArgumentParser(
        description="Recursively run 'git gc', reporting space and files saved."
    )
    parser.add_argument(
        "target_dir",
        nargs="?",
        default=".",
        help="Directory to search (default: '.')"
    )

    args, unknown_args = parser.parse_known_args()
    target_path = os.path.abspath(args.target_dir)

    if not os.path.isdir(target_path):
        print(f"Error: {target_path} is not a directory.")
        sys.exit(1)

    total_saved_bytes = 0
    total_files_removed = 0
    repos_processed = 0

    print(f"--- Scanning: {target_path} ---")

    for root, dirs, _ in os.walk(target_path):
        if ".git" in dirs:
            repos_processed += 1
            git_dir = os.path.join(root, ".git")

            # 1. Capture stats before
            size_before, files_before = get_dir_stats(git_dir)

            print(f"\n[{repos_processed}] Optimizing: {root}")

            try:
                # 2. Run git gc
                # Note: We use -C to run git in the specific directory
                cmd = ["git", "-C", root, "gc"] + unknown_args
                subprocess.run(cmd, check=True, capture_output=True)

                # 3. Capture stats after
                size_after, files_after = get_dir_stats(git_dir)

                saved_size = size_before - size_after
                removed_files = files_before - files_after

                total_saved_bytes += max(0, saved_size)
                total_files_removed += max(0, removed_files)

                print(f"    Files: {files_before} -> {files_after} ({removed_files} removed)")
                print(f"    Size:  {format_size(size_before)} -> {format_size(size_after)} ({format_size(saved_size)} saved)")

            except subprocess.CalledProcessError as e:
                print(f"    [!] Error: {e.stderr.decode().strip()}")

            # Efficiency: don't walk into the .git folder we just processed
            dirs.remove(".git")

    # Final Summary Table
    print("\n" + "="*45)
    print(f"{'FINAL SUMMARY':^45}")
    print("-" * 45)
    print(f" Repositories Processed : {repos_processed}")
    print(f" Total Space Reclaimed  : {format_size(total_saved_bytes)}")
    print(f" Total Files Removed    : {total_files_removed}")
    print("="*45)

if __name__ == "__main__":
    main()

Beispielausgabe

git_cleaner_output.txt
[...]

[433] Optimizing: /home/uli/dev/FlareDNS
    Files: 73 -> 33 (40 removed)
    Size:  70.14 KB -> 47.03 KB (23.10 KB saved)

=============================================
                FINAL SUMMARY
---------------------------------------------
 Repositories Processed : 433
 Total Space Reclaimed  : 238.14 MB
 Total Files Removed    : 21612
=============================================

Check out similar posts by category: Git, Python