A minimal storj log analyzer

This analyzer shows how much data has been uploaded (uploaded to the storage node, i.e. downloaded from the internet, i.e. ingress traffic) and produces a histogram and total counts from it.

#!/usr/bin/env python3
import sys
import json

def human_readable_size(size_bytes):
    if size_bytes == 0:
        return "0B"
    size_name = ("B", "KB", "MB", "GB", "TB")
    i = int(len(str(size_bytes)) - 1) // 3
    p = pow(1024, i)
    s = round(size_bytes / p, 3)
    return f"{s} {size_name[i]}"

def print_histogram(sizes):
    bins = [
        (1 * 1024, 2 * 1024),
        (2 * 1024, 4 * 1024),
        (4 * 1024, 8 * 1024),
        (8 * 1024, 16 * 1024),
        (16 * 1024, 32 * 1024),
        (32 * 1024, 64 * 1024),
        (64 * 1024, 128 * 1024),
        (128 * 1024, 256 * 1024),
        (256 * 1024, 512 * 1024),
        (512 * 1024, 1 * 1024 * 1024),
        (1 * 1024 * 1024, 2 * 1024 * 1024),
        (2 * 1024 * 1024, 4 * 1024 * 1024),
        (4 * 1024 * 1024, 8 * 1024 * 1024),
        (8 * 1024 * 1024, 16 * 1024 * 1024),
        (16 * 1024 * 1024, 32 * 1024 * 1024),
        (32 * 1024, 64 * 1024),
        (64 * 1024 * 1024, 128 * 1024 * 1024),
        (128 * 1024 * 1024, 256 * 1024 * 1024),
        (256 * 1024 * 1024, float('inf'))
    ]
    bin_counts = [0] * len(bins)

    for size in sizes:
        for i, (low, high) in enumerate(bins):
            if low <= size < high:
                bin_counts[i] += 1
                break

    max_count = max(bin_counts)
    max_chars = 50

    print("File size histogram:")
    for i, (low, high) in enumerate(bins):
        bin_label = f"{human_readable_size(low)}..{human_readable_size(high)}"
        num_chars = int((bin_counts[i] / max_count) * max_chars) if max_count > 0 else 0
        count_str = f"{bin_counts[i]:8}"
        print(f"{bin_label:<20} | {count_str} | {'█' * num_chars}")

def filter_uploaded_lines(logfile):
    total_size = 0
    upload_count = 0
    sizes = []

    try:
        with open(logfile, 'r') as file:
            lines = file.readlines()

        for line in lines:
            if "uploaded" in line:
                upload_count += 1
                try:
                    json_part = line.split('uploaded')[-1].strip()
                    data = json.loads(json_part)
                    size = data.get("Size", 0)
                    total_size += size
                    sizes.append(size)
                except json.JSONDecodeError:
                    print(f"Failed to decode JSON from line: {line}")
                except Exception as e:
                    print(f"An error occurred while processing line: {e}")

        print(f"Total size of uploaded pieces: {human_readable_size(total_size)}")
        print(f"Total number of uploads: {upload_count}")
        print_histogram(sizes)

    except FileNotFoundError:
        print(f"The file {logfile} does not exist.")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python filter_log.py <logfile>")
    else:
        logfile = sys.argv[1]
        filter_uploaded_lines(logfile)

Run it on the storj logfile:

python3 storj-stats.py storage/storj.log