Commit c9ac81c0 authored by ploup's avatar ploup

Upload project

parent cf18b1dc
import hashlib
import pathlib
import os
import stat
import sys
class DirectoriesComparison:
"""Compare the contents of two directories, excluding a list of files."""
def __init__(self, firstDirectory, secondDirectory, excludedFiles, text):
# Set the buffer size for file hashing
self.bufferSize = 262144
# Set the first and second directories
self.firstDirectory = firstDirectory
self.secondDirectory = secondDirectory
# Create the progress (in percents)
progress = 0
# Print the progress
self.printProgress(text, progress)
# List the files in both directories, remove duplicates, and sort
self.files = sorted(list(set(self.listFiles(self.firstDirectory, excludedFiles) + self.listFiles(self.secondDirectory, excludedFiles))))
# Create the lists of files used to store the result of the comparison
self.identicalFiles = list()
self.differentFiles = list()
self.filesMissingInFirstDirectory = list()
self.filesMissingInSecondDirectory = list()
# Iterate over the files
for counter, file in enumerate(self.files):
# Compute the file path in both directories
firstPath = self.firstDirectory.joinpath(file)
secondPath = self.secondDirectory.joinpath(file)
# If the file isn't in the first directory
if not firstPath.is_symlink() and not firstPath.exists():
# Else, if the file isn't in the second directory
elif not secondPath.is_symlink() and not secondPath.exists():
# Else, if the files are identical
elif self.compareFiles(firstPath, secondPath):
# Else, the files are different
# Compute the new progress
newProgress = (counter + 1) / len(self.files) * 100
# If the progress changed in a visible way
if self.formatProgress(newProgress) != self.formatProgress(progress):
# Update and print the progress
progress = newProgress
self.printProgressWithFilesCounter(text, progress, counter + 1, len(self.files))
def listFiles(self, directory, excludedFiles):
"""List the files in a directory, excluding a list of files."""
# List the files in the directory
files = list(directory.rglob("*"))
# Iterate over the files
for n, file in enumerate(files):
files[n] = file.relative_to(directory)
# If there are files to exclude
if excludedFiles:
# Remove excluded files from the list
files[:] = [file for file in files if file not in excludedFiles]
# Return the files
return files
def compareFiles(self, firstPath, secondPath):
"""Compare two files, returns True if the files are considered identical."""
# Compute the stat of both files, using lstat to not follow symbolic links
firstStat = os.lstat(firstPath)
secondStat = os.lstat(secondPath)
# If the files signatures are different
if self.computeSignature(firstStat) != self.computeSignature(secondStat):
# The files are different
return False
# If the files aren't regular files
if not stat.S_ISREG(firstStat.st_mode):
# The files are identical
return True
# The files are identical if their hashes are identical
return (self.computeHash(firstPath) == self.computeHash(secondPath))
def computeSignature(self, statResult):
"""Compute a file "signature" from a stat_result object as returned by os.stat()."""
# The signature is made of the mode, uid and gid
signature = (statResult.st_mode,
# If it isn't the signature of a directory
if not stat.S_ISDIR(statResult.st_mode):
# Add the size to the signature
signature += (statResult.st_size,)
# Return the signature
return signature
def computeHash(self, path):
"""Compute the SHA256 hash of a file."""
with open(path, "rb") as file:
# Create the hash algorithm
hashAlgorithm = hashlib.sha256()
# Read data from the file
data =
# While data was read
while data:
# Feed the data to the hash algorithm
# Read more data from the file
data =
# Return the hash
return hashAlgorithm.digest()
def printProgress(self, text, progress):
"""Print a progress percentage after a given text."""
sys.stdout.write("\r" + text + self.formatProgress(progress))
def printProgressWithFilesCounter(self, text, progress, currentFileNumber, totalFilesNumber):
"""Print a progress percentage after a given text, with a files counter."""
sys.stdout.write("\r" + text + self.formatProgress(progress) + " (" + str(currentFileNumber) + "/" + str(totalFilesNumber) + ")")
def formatProgress(self, progress):
"""Format a progress value to a string representation."""
return "{:.2f}%".format(progress)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment