forked from kishanrajput23/Java-Projects-Collections
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathduplicatefileremover.py
40 lines (34 loc) · 1.12 KB
/
duplicatefileremover.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import hashlib
import os
# Returns the hash string of the given file name
def hashFile(filename):
# For large files, if we read it all together it can lead to memory overflow, So we take a blocksize to read at a time
BLOCKSIZE = 65536
hasher = hashlib.md5()
with open(filename, 'rb') as file:
# Reads the particular blocksize from file
buf = file.read(BLOCKSIZE)
while(len(buf) > 0):
hasher.update(buf)
buf = file.read(BLOCKSIZE)
return hasher.hexdigest()
if __name__ == "__main__":
# Dictionary to store the hash and filename
hashMap = {}
# List to store deleted files
deletedFiles = []
filelist = [f for f in os.listdir() if os.path.isfile(f)]
for f in filelist:
key = hashFile(f)
# If key already exists, it deletes the file
if key in hashMap.keys():
deletedFiles.append(f)
os.remove(f)
else:
hashMap[key] = f
if len(deletedFiles) != 0:
print('Deleted Files')
for i in deletedFiles:
print(i)
else:
print('No duplicate files found')