|
|
|
@ -50,7 +50,7 @@ void Dedup::start() {
|
|
|
|
|
removeUninterestingFiles(foundfiles);
|
|
|
|
|
|
|
|
|
|
// hashing
|
|
|
|
|
uint64_t bytesToHash = std::accumulate(foundfiles.begin(), foundfiles.end(), 0ul, [](uint64_t& c, const std::pair<uint64_t, std::shared_ptr<File>>& it) { return c + it.second->filesize; });
|
|
|
|
|
uint64_t bytesToHash = std::accumulate(foundfiles.begin(), foundfiles.end(), 0ul, [](uint64_t& c, auto it) { return c + it.second->filesize; });
|
|
|
|
|
|
|
|
|
|
Log::info << foundfiles.size() << " files and " << FileSize(bytesToHash) << " are going to be hashed";
|
|
|
|
|
|
|
|
|
@ -92,34 +92,38 @@ std::map<uint64_t, Files> Dedup::hash(std::multimap<uint64_t, std::shared_ptr<Fi
|
|
|
|
|
std::mutex globalData;
|
|
|
|
|
|
|
|
|
|
std::vector<HasherThread*> threads;
|
|
|
|
|
std::vector<std::multimap<uint64_t, std::shared_ptr<File>>::iterator> threadit;
|
|
|
|
|
std::vector<std::multimap<uint64_t, std::shared_ptr<File>>::iterator> threadends;
|
|
|
|
|
std::vector<std::multimap<uint64_t, std::shared_ptr<File>>::iterator> threadits;
|
|
|
|
|
threads.reserve(HASHTHREADCOUNT);
|
|
|
|
|
threadit.reserve(HASHTHREADCOUNT + 1);
|
|
|
|
|
threadends.resize(HASHTHREADCOUNT);
|
|
|
|
|
threadits.resize(HASHTHREADCOUNT);
|
|
|
|
|
|
|
|
|
|
// create threads
|
|
|
|
|
Log::info << "spawning " << (int) HASHTHREADCOUNT << " hashing threads";
|
|
|
|
|
|
|
|
|
|
threadit.push_back(foundfiles.begin());
|
|
|
|
|
for(uint_fast8_t i = 0; i < HASHTHREADCOUNT; ++i) {
|
|
|
|
|
threadits.at(0) = foundfiles.begin();
|
|
|
|
|
for(uint_fast8_t i = 1; i < HASHTHREADCOUNT; ++i) {
|
|
|
|
|
// make a copy!
|
|
|
|
|
std::multimap<uint64_t, std::shared_ptr<File>>::iterator it = threadit.at(i);
|
|
|
|
|
std::multimap<uint64_t, std::shared_ptr<File>>::iterator it = threadits.at(i-1);
|
|
|
|
|
std::advance(it, stepsize);
|
|
|
|
|
threadit.push_back(it);
|
|
|
|
|
|
|
|
|
|
threadits.at(i) = it;
|
|
|
|
|
threadends.at(i-1) = it;
|
|
|
|
|
}
|
|
|
|
|
// make sure the last one is realy the last
|
|
|
|
|
threadit.at(HASHTHREADCOUNT) = foundfiles.end();
|
|
|
|
|
threadends.at(HASHTHREADCOUNT-1) = foundfiles.end();
|
|
|
|
|
|
|
|
|
|
for(uint_fast8_t i = 0; i < HASHTHREADCOUNT; ++i) {
|
|
|
|
|
std::multimap<uint64_t, std::shared_ptr<File>>::iterator* itptr = &(threadit.at(i));
|
|
|
|
|
std::multimap<uint64_t, std::shared_ptr<File>>::iterator* endptr = &(threadit.at(i+1));
|
|
|
|
|
std::multimap<uint64_t, std::shared_ptr<File>>::iterator* itptr = &(threadits.at(i));
|
|
|
|
|
const std::multimap<uint64_t, std::shared_ptr<File>>::iterator* endptr = &(threadends.at(i));
|
|
|
|
|
|
|
|
|
|
threads.push_back(new HasherThread(
|
|
|
|
|
globalData,
|
|
|
|
|
[itptr, endptr]() -> std::shared_ptr<File> {
|
|
|
|
|
[itptr, endptr, i]() -> std::shared_ptr<File> {
|
|
|
|
|
if(*itptr != *endptr) {
|
|
|
|
|
auto copy = *itptr;
|
|
|
|
|
std::shared_ptr<File> value = (*itptr)->second;
|
|
|
|
|
++(*itptr);
|
|
|
|
|
return copy->second;
|
|
|
|
|
return value;
|
|
|
|
|
}
|
|
|
|
|
return nullptr;
|
|
|
|
|
},
|
|
|
|
|