11import os
22import hashlib
3+ import json # Import for generating reports
34
45def get_file_hash (filepath ):
56 """Return the MD5 hash of a file."""
@@ -9,13 +10,16 @@ def get_file_hash(filepath):
910 hasher .update (buf )
1011 return hasher .hexdigest ()
1112
12- def find_duplicates (directory , min_size = 0 ):
13- """Find duplicate files in a directory."""
13+ def find_duplicates (directory , min_size = 0 , file_extensions = None ):
14+ """Find duplicate files in a directory, with optional file type filtering ."""
1415 hashes = {}
1516 duplicates = {}
1617
1718 for dirpath , dirnames , filenames in os .walk (directory ):
1819 for filename in filenames :
20+ if file_extensions and not filename .lower ().endswith (tuple (file_extensions )):
21+ continue # Skip files that don't match the extensions
22+
1923 filepath = os .path .join (dirpath , filename )
2024 if os .path .getsize (filepath ) >= min_size :
2125 file_hash = get_file_hash (filepath )
@@ -29,11 +33,20 @@ def find_duplicates(directory, min_size=0):
2933
3034 return {k : v for k , v in duplicates .items () if len (v ) > 1 }
3135
36+ def generate_report (duplicates , report_path ):
37+ """Generate a report of duplicate files in JSON format."""
38+ with open (report_path , 'w' ) as report_file :
39+ json .dump (duplicates , report_file , indent = 4 )
40+ print (f"Report generated: { report_path } " )
41+
3242def main ():
3343 directory = input ("Enter the directory to scan for duplicates: " )
3444 min_size = int (input ("Enter the minimum file size to consider (in bytes, default is 0): " ) or "0" )
3545
36- duplicates = find_duplicates (directory , min_size )
46+ file_type_input = input ("Enter the file extensions to check (comma-separated, e.g. .jpg,.png), or press Enter to check all: " )
47+ file_extensions = [ext .strip ().lower () for ext in file_type_input .split ("," )] if file_type_input else None
48+
49+ duplicates = find_duplicates (directory , min_size , file_extensions )
3750
3851 if not duplicates :
3952 print ("No duplicates found." )
@@ -45,7 +58,7 @@ def main():
4558 print (path )
4659 print ("------" )
4760
48- action = input ("\n Choose an action: (D)elete, (M)ove, (N)o action: " ).lower ()
61+ action = input ("\n Choose an action: (D)elete, (M)ove, (R)eport, ( N)o action: " ).lower ()
4962
5063 if action == "d" :
5164 for _ , paths in duplicates .items ():
@@ -64,6 +77,10 @@ def main():
6477 os .rename (path , target_path )
6578 print (f"Moved { path } to { target_path } " )
6679
80+ elif action == "r" :
81+ report_path = input ("Enter the path to save the report (e.g., duplicates_report.json): " )
82+ generate_report (duplicates , report_path )
83+
6784 else :
6885 print ("No action taken." )
6986
0 commit comments