Python script to find & count duplicate lines in a text file
#!/usr/bin/env python3
import sys
from collections import Counter
def main():
if len(sys.argv) != 2:
print("Usage: python count_lines.py <filename>")
sys.exit(1)
filename = sys.argv[1]
try:
with open(filename, 'r') as file:
lines = file.readlines()
except FileNotFoundError:
print(f"File '{filename}' not found.")
sys.exit(1)
# Count occurrences of each line
line_counts = Counter(lines)
# Create a sorted list of lines that occur more than once
sorted_lines = sorted(
[(count, line.strip()) for line, count in line_counts.items() if count > 1],
reverse=False,
key=lambda x: x[0]
)
# Print the sorted lines
for count, line in sorted_lines:
print(f"{count} {line}")
if __name__ == "__main__":
main()
If this post helped you, please consider buying me a coffee or donating via PayPal to support research & publishing of new posts on TechOverflow