A minimal storj log analyzer
This analyzer shows how much data has been uploaded (uploaded to the storage node, i.e. downloaded from the internet, i.e. ingress traffic) and produces a histogram and total counts from it.
#!/usr/bin/env python3
import sys
import json
def human_readable_size(size_bytes):
if size_bytes == 0:
return "0B"
size_name = ("B", "KB", "MB", "GB", "TB")
i = int(len(str(size_bytes)) - 1) // 3
p = pow(1024, i)
s = round(size_bytes / p, 3)
return f"{s} {size_name[i]}"
def print_histogram(sizes):
bins = [
(1 * 1024, 2 * 1024),
(2 * 1024, 4 * 1024),
(4 * 1024, 8 * 1024),
(8 * 1024, 16 * 1024),
(16 * 1024, 32 * 1024),
(32 * 1024, 64 * 1024),
(64 * 1024, 128 * 1024),
(128 * 1024, 256 * 1024),
(256 * 1024, 512 * 1024),
(512 * 1024, 1 * 1024 * 1024),
(1 * 1024 * 1024, 2 * 1024 * 1024),
(2 * 1024 * 1024, 4 * 1024 * 1024),
(4 * 1024 * 1024, 8 * 1024 * 1024),
(8 * 1024 * 1024, 16 * 1024 * 1024),
(16 * 1024 * 1024, 32 * 1024 * 1024),
(32 * 1024, 64 * 1024),
(64 * 1024 * 1024, 128 * 1024 * 1024),
(128 * 1024 * 1024, 256 * 1024 * 1024),
(256 * 1024 * 1024, float('inf'))
]
bin_counts = [0] * len(bins)
for size in sizes:
for i, (low, high) in enumerate(bins):
if low <= size < high:
bin_counts[i] += 1
break
max_count = max(bin_counts)
max_chars = 50
print("File size histogram:")
for i, (low, high) in enumerate(bins):
bin_label = f"{human_readable_size(low)}..{human_readable_size(high)}"
num_chars = int((bin_counts[i] / max_count) * max_chars) if max_count > 0 else 0
count_str = f"{bin_counts[i]:8}"
print(f"{bin_label:<20} | {count_str} | {'█' * num_chars}")
def filter_uploaded_lines(logfile):
total_size = 0
upload_count = 0
sizes = []
try:
with open(logfile, 'r') as file:
lines = file.readlines()
for line in lines:
if "uploaded" in line:
upload_count += 1
try:
json_part = line.split('uploaded')[-1].strip()
data = json.loads(json_part)
size = data.get("Size", 0)
total_size += size
sizes.append(size)
except json.JSONDecodeError:
print(f"Failed to decode JSON from line: {line}")
except Exception as e:
print(f"An error occurred while processing line: {e}")
print(f"Total size of uploaded pieces: {human_readable_size(total_size)}")
print(f"Total number of uploads: {upload_count}")
print_histogram(sizes)
except FileNotFoundError:
print(f"The file {logfile} does not exist.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python filter_log.py <logfile>")
else:
logfile = sys.argv[1]
filter_uploaded_lines(logfile)
Run it on the storj logfile:
python3 storj-stats.py storage/storj.log