-
Notifications
You must be signed in to change notification settings - Fork 12.9k
Expand file tree
/
Copy pathduplicate_file_finder.py
More file actions
49 lines (36 loc) · 1.16 KB
/
duplicate_file_finder.py
File metadata and controls
49 lines (36 loc) · 1.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os
import hashlib
def hash_file(filepath):
"""Return SHA256 hash of file"""
h = hashlib.sha256()
with open(filepath, 'rb') as f:
while chunk := f.read(8192):
h.update(chunk)
return h.hexdigest()
def find_duplicates(directory):
hashes = {}
duplicates = []
for root, dirs, files in os.walk(directory):
for file in files:
path = os.path.join(root, file)
try:
file_hash = hash_file(path)
if file_hash in hashes:
duplicates.append((path, hashes[file_hash]))
else:
hashes[file_hash] = path
except Exception as e:
print(f"Error reading {path}: {e}")
return duplicates
if __name__ == "__main__":
directory = input("Enter directory to scan: ")
if not os.path.isdir(directory):
print("Invalid directory")
exit()
duplicates = find_duplicates(directory)
if not duplicates:
print("No duplicate files found.")
else:
print("\nDuplicate files:")
for dup, original in duplicates:
print(f"{dup} == {original}")