Python script to find filenames and directories including non-standard characters
This script prints file- and directorynames recursively which have characters in their name not belonging to either the Unicode basic latin or Latin-1 supplement blocks.
#!/usr/bin/env python3
import os
import argparse
def is_valid_char(c):
"""Check if a character is within Basic Latin or Latin-1 Supplement Unicode blocks."""
return '0000' <= f"{ord(c):04x}" <= '00ff'
def contains_invalid_chars(name):
"""Check if a string contains characters outside Basic Latin or Latin-1 Supplement."""
return any(not is_valid_char(c) for c in name)
def check_directory(path):
"""Recursively check directories and files for invalid characters in their names."""
for root, dirs, files in os.walk(path):
for name in dirs + files:
if contains_invalid_chars(name):
print(os.path.join(root, name))
def main():
parser = argparse.ArgumentParser(description="Check for files and directories with invalid characters in their names.")
parser.add_argument('path', type=str, help="The path to the directory to check.")
args = parser.parse_args()
if not os.path.isdir(args.path):
print(f"The path {args.path} is not a valid directory.")
return
check_directory(args.path)
if __name__ == "__main__":
main()