moved diff functionality into own service

This commit is contained in:
Jonathan Treffler 2023-09-02 02:48:27 +02:00 committed by root
parent c9d52b4c77
commit a0b6a25508
4 changed files with 359 additions and 219 deletions

View file

@ -1,208 +0,0 @@
<?php
namespace OCA\GroupfolderFilesystemSnapshots\Manager;
class DiffManager {
public function __construct(){
}
private function seperateFilesFromFolders($parentDir, $items) {
$files = [];
$folders = [];
foreach($items as $item) {
if(is_dir($parentDir . DIRECTORY_SEPARATOR . $item)) {
$folders[] = $item;
} else {
$files[] = $item;
}
}
return array($files, $folders);
}
private function getFilesAndFolders($dir) {
$scan = array_diff(scandir($dir), array('..', '.'));
return $this->seperateFilesFromFolders($dir, $scan);
}
private function getFilesizesOfFiles($prefix, array $files) {
$result = array();
foreach($files as $index=>$file) {
$result[$index] = filesize($prefix . DIRECTORY_SEPARATOR . $file);
}
return $result;
}
function diffDirectories($dir1, $dir2, $prefix = "") {
$diff = [];
$scan1files = [];
$scan1folders = [];
if(file_exists($dir1) && is_dir($dir1)) {
list($scan1files, $scan1folders) = $this->getFilesAndFolders($dir1);
}
$scan2files = [];
$scan2folders = [];
if(file_exists($dir2) && is_dir($dir2)) {
list($scan2files, $scan2folders) = $this->getFilesAndFolders($dir2);
}
$fileCreations = array_diff($scan2files, $scan1files);
$fileCreationsFilesizes = $this->getFilesizesOfFiles($dir2, $fileCreations);
$fileDeletions = array_diff($scan1files, $scan2files);
$fileDeletionsFilesizes = $this->getFilesizesOfFiles($dir1, $fileDeletions);
$filePossibleEdits = array_intersect($scan1files, $scan2files);
$allSubfolders = array_unique(array_merge($scan1folders, $scan2folders));
/*$diff[] = [
"type" => "DEBUG",
"prefix" => $prefix,
"fileCreations" => $fileCreations,
"fileCreationsFilesizes" => $fileCreationsFilesizes,
"fileDeletions" => $fileDeletions,
"fileDeletionsFilesizes" => $fileDeletionsFilesizes,
//"folderCreations" => $folderCreations,
//"folderDeletions" => $folderDeletions,
"allSubfolders" => $allSubfolders,
];*/
// search for creations and deletions, that are actually renames
foreach($fileCreations as $creationIndex=>$creation) {
$creationPath = $dir2 . DIRECTORY_SEPARATOR . $creation;
$creationSize = $fileCreationsFilesizes[$creationIndex];
$renameContenders = array_keys($fileDeletionsFilesizes, $creationSize);
if(sizeof($renameContenders) != 0) {
/*$diff[] = [
"type" => "DEBUG",
"comparing" => [
"creation" => $creationIndex,
"deletions" => $renameContenders,
],
];*/
$creationSHA = sha1_file($creationPath);
foreach($renameContenders as $contender) {
$deletion = $fileDeletions[$contender];
$deletionPath = $dir1 . DIRECTORY_SEPARATOR . $deletion;
$deletionSHA = sha1_file($deletionPath);
if($deletionSHA == $creationSHA) {
$diff[] = [
"type" => "RENAME",
"before" => [
"exists" => True,
"path" => $prefix . DIRECTORY_SEPARATOR . $deletion,
"size" => $creationSize,
],
"afterwards" => [
"exists" => True,
"path" => $prefix . DIRECTORY_SEPARATOR . $creation,
"size" => $creationSize,
]
];
unset($fileCreations[$creationIndex]);
unset($fileDeletions[$contender]);
break;
}
}
}
}
foreach($fileCreations as $index=>$creation) {
$diff[] = [
"type" => "CREATION",
"before" => [
"exists" => False,
],
"afterwards" => [
"exists" => True,
"path" => $prefix . DIRECTORY_SEPARATOR . $creation,
"size" => $fileCreationsFilesizes[$index],
]
];
}
foreach($fileDeletions as $index=>$deletion) {
$diff[] = [
"type" => "DELETION",
"before" => [
"exists" => True,
"path" => $prefix . DIRECTORY_SEPARATOR . $deletion,
"size" => $fileDeletionsFilesizes[$index],
],
"afterwards" => [
"exists" => False,
],
];
}
foreach($filePossibleEdits as $possibleEdit) {
$file1 = $dir1 . DIRECTORY_SEPARATOR . $possibleEdit;
$file2 = $dir2 . DIRECTORY_SEPARATOR . $possibleEdit;
$file1Size = filesize($file1);
$file2Size = filesize($file2);
if(filemtime($file1) == filemtime($file2)) {
//not different because same mtime
continue;
} else {
// mtime different, but could just have gotten touched without modifications
if($file1Size == $file2Size) {
// if filesize is the same check for binary differences
$handle1 = fopen($file1, 'rb');
$handle2 = fopen($file2, 'rb');
$filesdifferent = false;
while(!feof($handle1)) {
if(fread($handle1, 8192) != fread($handle2, 8192)) {
// files are different
$filesdifferent = true;
break;
}
}
fclose($handle1);
fclose($handle2);
if(!$filesdifferent) {
continue;
}
}
}
$diff[] = [
"type" => "EDIT",
"before" => [
"path" => $prefix . DIRECTORY_SEPARATOR . $possibleEdit,
"size" => $file1Size,
],
"afterwards" => [
"path" => $prefix . DIRECTORY_SEPARATOR . $possibleEdit,
"size" => $file2Size,
]
];
}
foreach($allSubfolders as $folder) {
array_push($diff, ...($this->diffDirectories($dir1 . DIRECTORY_SEPARATOR . $folder, $dir2 . DIRECTORY_SEPARATOR . $folder, $prefix . DIRECTORY_SEPARATOR . $folder)));
}
return $diff;
}
}

View file

@ -3,18 +3,15 @@
namespace OCA\GroupfolderFilesystemSnapshots\Manager;
use OCA\GroupfolderFilesystemSnapshots\Manager\PathManager;
use OCA\GroupfolderFilesystemSnapshots\Manager\DiffManager;
use OCA\GroupfolderFilesystemSnapshots\Entity\Snapshot;
class SnapshotManager {
private PathManager $pathManager;
private DiffManager $diffManager;
public function __construct(PathManager $pathManager, DiffManager $diffManager){
public function __construct(PathManager $pathManager){
$this->pathManager = $pathManager;
$this->diffManager = $diffManager;
}
private function validSnapshotId(string $snapshotId) {
@ -46,11 +43,4 @@ class SnapshotManager {
yield new Snapshot($fileinfo->getFilename());
}
}
function getDiff(int $groupfolderId, string $snapshotId) {
$groupfolderPath = $this->pathManager->getGroupFolderDirectory($groupfolderId);
$snapshotPath = $this->pathManager->getGroupFolderSnapshotDirectory($groupfolderId, $snapshotId);
return $this->diffManager->diffDirectories($snapshotPath, $groupfolderPath);
}
}

235
lib/RecursiveDiff.php Normal file
View file

@ -0,0 +1,235 @@
<?php
namespace OCA\GroupfolderFilesystemSnapshots;
use OCA\GroupfolderFilesystemSnapshots\Helpers\FileHelper;
class RecursiveDiff {
public string $dir1;
public string $dir2;
private $prefix;
private $newResultCallback;
private $progressCallback;
private $scan1files = [];
private $scan1folders = [];
private $scan2files = [];
private $scan2folders = [];
private $subJobs = [];
private $subJobProgress = [];
private $progress = 0;
public function __construct($dir1, $dir2, $prefix = "", $newResultCallback, $progressCallback){
$this->dir1 = $dir1;
$this->dir2 = $dir2;
$this->prefix = $prefix;
$this->newResultCallback = $newResultCallback;
$this->progressCallback = $progressCallback;
}
public function scan() {
$scan_num_files = 0;
if(file_exists($this->dir1) && is_dir($this->dir1)) {
list($this->scan1files, $this->scan1folders) = FileHelper::getFilesAndFolders($this->dir1);
}
if(file_exists($this->dir2) && is_dir($this->dir2)) {
list($this->scan2files, $this->scan2folders) = FileHelper::getFilesAndFolders($this->dir2);
}
$scan_num_files += sizeof($this->scan1files);
$scan_num_files += sizeof($this->scan2files);
$allSubfolders = array_unique(array_merge($this->scan1folders, $this->scan2folders));
foreach($allSubfolders as $key=>$folder) {
$subdir1 = $this->dir1 . DIRECTORY_SEPARATOR . $folder;
$subdir2 = $this->dir2 . DIRECTORY_SEPARATOR . $folder;
$subprefix = $this->prefix . DIRECTORY_SEPARATOR . $folder;
$newJob = new RecursiveDiff($subdir1, $subdir2, $subprefix, $this->newResultCallback, function($numDoneFiles) use ($key) {
$this->subJobProgress[$key] = $numDoneFiles;
$this->updateProgress();
});
$this->subJobs[] = $newJob;
$scan_num_files += $newJob->scan();
}
return $scan_num_files;
}
private function updateProgress() {
($this->progressCallback)(array_sum($this->subJobProgress) + $this->progress);
}
function diff() {
$diff = [];
foreach($this->subJobs as $job) {
$result = $job->diff();
array_push($diff, ...$result);
}
$fileCreations = array_diff($this->scan2files, $this->scan1files);
$fileCreationsFilesizes = FileHelper::getFilesizesOfFiles($this->dir2, $fileCreations);
$fileDeletions = array_diff($this->scan1files, $this->scan2files);
$fileDeletionsFilesizes = FileHelper::getFilesizesOfFiles($this->dir1, $fileDeletions);
$filePossibleEdits = array_intersect($this->scan1files, $this->scan2files);
/*$diff[] = [
"type" => "DEBUG",
"prefix" => $this->prefix,
"fileCreations" => $fileCreations,
"fileCreationsFilesizes" => $fileCreationsFilesizes,
"fileDeletions" => $fileDeletions,
"fileDeletionsFilesizes" => $fileDeletionsFilesizes,
//"folderCreations" => $folderCreations,
//"folderDeletions" => $folderDeletions,
"allSubfolders" => $allSubfolders,
];*/
// search for creations and deletions, that are actually renames
foreach($fileCreations as $creationIndex=>$creation) {
$creationPath = $this->dir2 . DIRECTORY_SEPARATOR . $creation;
$creationSize = $fileCreationsFilesizes[$creationIndex];
$renameContenders = array_keys($fileDeletionsFilesizes, $creationSize);
if(sizeof($renameContenders) != 0) {
/*$diff[] = [
"type" => "DEBUG",
"comparing" => [
"creation" => $creationIndex,
"deletions" => $renameContenders,
],
];*/
$creationSHA = sha1_file($creationPath);
foreach($renameContenders as $contender) {
$deletion = $fileDeletions[$contender];
$deletionPath = $this->dir1 . DIRECTORY_SEPARATOR . $deletion;
$deletionSHA = sha1_file($deletionPath);
if($deletionSHA == $creationSHA) {
($this->newResultCallback)(
type: "RENAME",
beforeFileExists: True,
beforePath: $this->prefix . DIRECTORY_SEPARATOR . $deletion,
beforeSize: $creationSize,
currentFileExists: True,
currentPath: $this->prefix . DIRECTORY_SEPARATOR . $creation,
currentSize: $creationSize,
);
unset($fileCreations[$creationIndex]);
unset($fileDeletions[$contender]);
$this->progress += 2;
$this->updateProgress();
sleep(1);
break;
}
}
}
}
foreach($fileCreations as $index=>$creation) {
($this->newResultCallback)(
type: "CREATION",
beforeFileExists: False,
beforePath: NULL,
beforeSize: NULL,
currentFileExists: True,
currentPath: $this->prefix . DIRECTORY_SEPARATOR . $creation,
currentSize: $fileCreationsFilesizes[$index],
);
$this->progress++;
$this->updateProgress();
sleep(1);
}
foreach($fileDeletions as $index=>$deletion) {
($this->newResultCallback)(
type: "DELETION",
beforeFileExists: True,
beforePath: $this->prefix . DIRECTORY_SEPARATOR . $deletion,
beforeSize: $fileDeletionsFilesizes[$index],
currentFileExists: False,
currentPath: NULL,
currentSize: NULL,
);
$this->progress++;
$this->updateProgress();
sleep(1);
}
foreach($filePossibleEdits as $possibleEdit) {
$file1 = $this->dir1 . DIRECTORY_SEPARATOR . $possibleEdit;
$file2 = $this->dir2 . DIRECTORY_SEPARATOR . $possibleEdit;
$file1Size = filesize($file1);
$file2Size = filesize($file2);
$this->progress += 2;
$this->updateProgress();
sleep(1);
if(filemtime($file1) == filemtime($file2)) {
//not different because same mtime
continue;
} else {
// mtime different, but could just have gotten touched without modifications
if($file1Size == $file2Size) {
// if filesize is the same check for binary differences
$handle1 = fopen($file1, 'rb');
$handle2 = fopen($file2, 'rb');
$filesdifferent = false;
while(!feof($handle1)) {
if(fread($handle1, 8192) != fread($handle2, 8192)) {
// files are different
$filesdifferent = true;
break;
}
}
fclose($handle1);
fclose($handle2);
if(!$filesdifferent) {
continue;
}
}
}
($this->newResultCallback)(
type: "EDIT",
beforeFileExists: True,
beforePath: $this->prefix . DIRECTORY_SEPARATOR . $possibleEdit,
beforeSize: $file1Size,
currentFileExists: True,
currentPath: $this->prefix . DIRECTORY_SEPARATOR . $possibleEdit,
currentSize: $file2Size,
);
}
return $diff;
}
}

View file

@ -0,0 +1,123 @@
<?php
namespace OCA\GroupfolderFilesystemSnapshots\Service;
use Exception;
use OCP\AppFramework\Db\DoesNotExistException;
use OCP\AppFramework\Db\MultipleObjectsReturnedException;
use OCA\GroupfolderFilesystemSnapshots\Db\DiffTask;
use OCA\GroupfolderFilesystemSnapshots\Db\DiffTaskMapper;
use OCA\GroupfolderFilesystemSnapshots\Db\DiffTaskResult;
use OCA\GroupfolderFilesystemSnapshots\Db\DiffTaskResultMapper;
use OCA\GroupfolderFilesystemSnapshots\Manager\PathManager;
use OCA\GroupfolderFilesystemSnapshots\RecursiveDiff;
class DiffTaskService {
private DiffTaskMapper $mapper;
private DiffTaskResultMapper $diffTaskResultMapper;
private PathManager $pathManager;
public function __construct(DiffTaskMapper $mapper, DiffTaskResultMapper $diffTaskResultMapper, PathManager $pathManager){
$this->mapper = $mapper;
$this->diffTaskResultMapper = $diffTaskResultMapper;
$this->pathManager = $pathManager;
}
/**
* @return DiffTask[]
*/
public function findAll(string $userId): array {
return $this->mapper->findAll($userId);
}
/**
* @return never
*/
private function handleException ($e) {
if ($e instanceof DoesNotExistException ||
$e instanceof MultipleObjectsReturnedException) {
throw new NotFoundException($e->getMessage());
} else {
throw $e;
}
}
public function find(int $id, string $userId): DiffTask {
try {
return $this->mapper->find($id, $userId);
} catch(Exception $e) {
$this->handleException($e);
}
}
function create(int $groupfolderId, string $snapshotId, string $userId, $progressCallback): DiffTask {
$snapshotPath = $this->pathManager->getGroupFolderSnapshotDirectory($groupfolderId, $snapshotId);
$groupfolderPath = $this->pathManager->getGroupFolderDirectory($groupfolderId);
$newTask = new DiffTask();
$newTask->setGroupfolderId($groupfolderId);
$newTask->setSnapshotId($snapshotId);
$newTask->setTimestamp(time());
$newTask->setUserId($userId);
$task = $this->mapper->insert($newTask);
$numFiles = 0;
$diffTask = new RecursiveDiff(
$snapshotPath,
$groupfolderPath,
"",
function(string $type, bool $beforeFileExists, ?string $beforePath, ?int $beforeSize, bool $currentFileExists, ?string $currentPath, ?int $currentSize) use ($task) {
$newResult = new DiffTaskResult();
$newResult->setTaskId($task->getId());
$newResult->setType($type);
$newResult->setBeforeFileExists($beforeFileExists);
$newResult->setBeforePath($beforePath);
$newResult->setBeforeSize($beforeSize);
$newResult->setCurrentFileExists($currentFileExists);
$newResult->setCurrentPath($currentPath);
$newResult->setCurrentSize($currentSize);
$newResult = $this->diffTaskResultMapper->insert($newResult);
},
function($numDoneFiles) use ($progressCallback, &$numFiles) {
if(($numFiles != 0) && ($numFiles != $numDoneFiles)) {
($progressCallback)([
"overallFiles" => $numFiles,
"doneFiles" => $numDoneFiles,
"progress" => number_format(($numDoneFiles / $numFiles),2),
"progressPercent" => (number_format(($numDoneFiles / $numFiles),2) * 100) . "%",
]);
}
},
);
$numFiles = $diffTask->scan();
$diffTask->diff();
($progressCallback)([
"overallFiles" => $numFiles,
"doneFiles" => $numFiles,
"progress" => 1.0,
"progressPercent" => "100.00%",
// TODO: include task results in object
"result" => $task,
]);
return $task;
}
public function delete(int $id, string $userId): DiffTask {
try {
$task = $this->mapper->find($id, $userId);
$this->mapper->delete($task);
return $task;
} catch(Exception $e) {
$this->handleException($e);
}
}
}