more options
This commit is contained in:
parent
aae71672f5
commit
09d285d94d
|
@ -35,6 +35,8 @@ private:
|
||||||
bool hardlink = true;
|
bool hardlink = true;
|
||||||
bool ignoredotfiles = false;
|
bool ignoredotfiles = false;
|
||||||
bool dryrun = true;
|
bool dryrun = true;
|
||||||
|
bool deleteDuplicates = true;
|
||||||
|
bool ignoreInodeID = true;
|
||||||
uint64_t deduplicatedBytes = 0; // amount of bytes deduplicated
|
uint64_t deduplicatedBytes = 0; // amount of bytes deduplicated
|
||||||
uint64_t deduplicatedFiles = 0;
|
uint64_t deduplicatedFiles = 0;
|
||||||
};
|
};
|
||||||
|
|
|
@ -16,10 +16,14 @@ public:
|
||||||
|
|
||||||
using map_t = std::multimap<uint64_t, std::shared_ptr<File>>;
|
using map_t = std::multimap<uint64_t, std::shared_ptr<File>>;
|
||||||
map_t index(const SearchFolder& sf);
|
map_t index(const SearchFolder& sf);
|
||||||
|
|
||||||
|
void setIgnoreDotFiles(bool b = true);
|
||||||
|
void setIgnoreInodeIDs(bool b = true);
|
||||||
private:
|
private:
|
||||||
void handleFolderContent(dirent* dircont, const SearchFolder& sf, std::list<SearchFolder>& newfolders, map_t& newfiles);
|
void handleFolderContent(dirent* dircont, const SearchFolder& sf, std::list<SearchFolder>& newfolders, map_t& newfiles);
|
||||||
void handleNewFile(ino_t inode, const std::string& path, map_t& newfiles);
|
void handleNewFile(ino_t inode, const std::string& path, map_t& newfiles);
|
||||||
|
|
||||||
std::set<uint64_t> knownInodes; // file inodes, that are known and should not be indexed again
|
std::set<uint64_t> knownInodes; // file inodes, that are known and should not be indexed again
|
||||||
bool ignoredotfiles = false;
|
bool ignoredotfiles = false;
|
||||||
|
bool ignoreInodeID = false;
|
||||||
};
|
};
|
||||||
|
|
|
@ -38,6 +38,8 @@ Files& getFiles(uint64_t fs, std::map<uint64_t, Files>& m) {
|
||||||
|
|
||||||
void Dedup::start() {
|
void Dedup::start() {
|
||||||
FileIndexer::map_t foundfiles;
|
FileIndexer::map_t foundfiles;
|
||||||
|
indexer.setIgnoreDotFiles(ignoredotfiles);
|
||||||
|
indexer.setIgnoreInodeIDs(ignoreInodeID);
|
||||||
for(const SearchFolder& sf : folders) {
|
for(const SearchFolder& sf : folders) {
|
||||||
foundfiles.merge(indexer.index(sf));
|
foundfiles.merge(indexer.index(sf));
|
||||||
}
|
}
|
||||||
|
@ -192,7 +194,9 @@ bool Dedup::relinkFile(const std::string& linkbase, const std::string& replacedf
|
||||||
|
|
||||||
if(dryrun || !hardlink) {
|
if(dryrun || !hardlink) {
|
||||||
Log::note << "delete " << replacedfile;
|
Log::note << "delete " << replacedfile;
|
||||||
Log::note << "link " << linkbase << " -> " << replacedfile;
|
if(!deleteDuplicates) {
|
||||||
|
Log::note << "link " << linkbase << " -> " << replacedfile;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
int res = ::unlink(replacedfile.c_str());
|
int res = ::unlink(replacedfile.c_str());
|
||||||
if(res != 0) {
|
if(res != 0) {
|
||||||
|
@ -200,11 +204,13 @@ bool Dedup::relinkFile(const std::string& linkbase, const std::string& replacedf
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
res = ::link(linkbase.c_str(), replacedfile.c_str());
|
if(!deleteDuplicates) {
|
||||||
if(res != 0) {
|
res = ::link(linkbase.c_str(), replacedfile.c_str());
|
||||||
Log::error << "link(" << linkbase << ", " << replacedfile << ") failed: " << strerror(errno) << " (" << errno << ')';
|
if(res != 0) {
|
||||||
// TODO try to symlink?
|
Log::error << "link(" << linkbase << ", " << replacedfile << ") failed: " << strerror(errno) << " (" << errno << ')';
|
||||||
return false;
|
// TODO try to symlink?
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -35,6 +35,13 @@ std::multimap<uint64_t, std::shared_ptr<File>> FileIndexer::index(const SearchFo
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void FileIndexer::setIgnoreDotFiles(bool b) {
|
||||||
|
ignoredotfiles = b;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FileIndexer::setIgnoreInodeIDs(bool b) {
|
||||||
|
ignoreInodeID = b;
|
||||||
|
}
|
||||||
|
|
||||||
void FileIndexer::handleFolderContent(dirent* dircont, const SearchFolder& sf, std::list<SearchFolder>& newfolders, map_t& newfiles) {
|
void FileIndexer::handleFolderContent(dirent* dircont, const SearchFolder& sf, std::list<SearchFolder>& newfolders, map_t& newfiles) {
|
||||||
std::string name(dircont->d_name);
|
std::string name(dircont->d_name);
|
||||||
|
@ -62,10 +69,12 @@ void FileIndexer::handleFolderContent(dirent* dircont, const SearchFolder& sf, s
|
||||||
|
|
||||||
void FileIndexer::handleNewFile(ino_t inode, const std::string& path, map_t& newfiles) {
|
void FileIndexer::handleNewFile(ino_t inode, const std::string& path, map_t& newfiles) {
|
||||||
// check for already scanned inodes
|
// check for already scanned inodes
|
||||||
auto it = knownInodes.find(inode);
|
if(!ignoreInodeID) {
|
||||||
if(it != knownInodes.end()) {
|
auto it = knownInodes.find(inode);
|
||||||
Log::note << "found already detected file: " << inode << ' ' << path;
|
if(it != knownInodes.end()) {
|
||||||
return;
|
Log::note << "found already detected file: " << inode << ' ' << path;
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct stat statbuf;
|
struct stat statbuf;
|
||||||
|
@ -78,8 +87,9 @@ void FileIndexer::handleNewFile(ino_t inode, const std::string& path, map_t& new
|
||||||
loff_t fileSize = statbuf.st_size;
|
loff_t fileSize = statbuf.st_size;
|
||||||
nlink_t linkCount = statbuf.st_nlink;
|
nlink_t linkCount = statbuf.st_nlink;
|
||||||
|
|
||||||
|
if(!ignoreInodeID) {
|
||||||
knownInodes.insert(inode);
|
knownInodes.insert(inode);
|
||||||
|
}
|
||||||
|
|
||||||
newfiles.insert({fileSize, std::make_shared<File>(fileSize, inode, linkCount, path)});
|
newfiles.insert({fileSize, std::make_shared<File>(fileSize, inode, linkCount, path)});
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue