dedup/src/dedup.cpp

110 lines
2.4 KiB
C++

#include "dedup.h"
#include <cstring>
#include <numeric>
#include <sys/stat.h>
#include <Log.h>
Dedup::Dedup() {
}
Dedup::~Dedup() {
}
void Dedup::addSearchFolder(const std::string& path, bool recurse) {
folders.emplace_back(path, recurse);
}
void Dedup::start() {
for(uint32_t i = 0; i < folders.size(); ++i) {
SearchFolder sf = folders.at(i);
indexFolder(sf);
}
uint64_t accFileSize = std::accumulate(inodes.begin(), inodes.end(), 0ul, [](uint64_t val, auto r){ return val + r.second->filesize.fs; });
Log::info << "indexing done " << inodes.size() << " unique files found with a total file size of " << FileSize(accFileSize);
}
void Dedup::indexFolder(const SearchFolder& sf) {
DIR* dirptr = ::opendir(sf.path.c_str());
if(!dirptr) {
Log::error << "Could not open directory: " << sf.path;
return;
}
errno = 0;
while(dirent* dircont = ::readdir(dirptr)) {
handleFolderContent(dircont, sf);
}
if(errno != 0) {
Log::error << "failed to read directory: " << sf.path;
}
::closedir(dirptr);
}
void Dedup::handleFolderContent(dirent* dircont, const SearchFolder& sf) {
std::string name(dircont->d_name);
std::string filepath = sf.path + "/" + name;
// handle dotfiles
if(name == "." || name == ".." || (ignoredotfiles && name.at(0) == '.')) {
return;
}
// handle subfolders
if(dircont->d_type == DT_DIR) {
if(sf.recurse) {
folders.emplace_back(filepath, true);
Log::note << "found new subdir to index: " << filepath;
return;
}
}
// regular file
if(dircont->d_type == DT_REG) {
handleNewFile(dircont->d_ino, filepath);
}
}
void Dedup::handleNewFile(ino_t inode, const std::string& path) {
// check for already scanned inodes
auto it = inodes.find(inode);
if(it != inodes.end()) {
Log::note << "found already detected file: " << inode << ' ' << path;
return;
}
struct stat statbuf;
int res = stat(path.c_str(), &statbuf);
if(res == -1) {
Log::error << "stat failed: " << path << " error: " << strerror(errno) << " (" << errno << ')';
return;
}
loff_t fileSize = statbuf.st_size;
nlink_t linkCount = statbuf.st_nlink;
auto file = std::make_shared<File>(fileSize, inode, linkCount, path);
inodes.insert({inode, file});
Files& files = getFiles(fileSize);
files.addNewFile(file);
}
Files& Dedup::getFiles(uint64_t fs) {
auto it = filesizes.find(fs);
if(it == filesizes.end()) {
auto pair = filesizes.insert({fs, {}});
return pair.first->second;
}
return it->second;
}