This commit is contained in:
mrbesen 2022-10-21 19:39:55 +02:00
commit d2bb0d1ccc
Signed by: MrBesen
GPG Key ID: 596B2350DCD67504
18 changed files with 556 additions and 0 deletions

10
.gitignore vendored Normal file
View File

@ -0,0 +1,10 @@
*.d
*.o
*.img
build/
dedup
test
tests/data/
.vscode/settings.json

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "thirdparty/Log"]
path = thirdparty/Log
url = https://git.okaestne.de/okaestne/Log.git

18
.vscode/c_cpp_properties.json vendored Normal file
View File

@ -0,0 +1,18 @@
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/src/",
"${workspaceFolder}/thirdparty/Log/",
"${workspaceFolder}/inc/**"
],
"defines": [],
"compilerPath": "/usr/bin/g++",
"cStandard": "c11",
"cppStandard": "c++17",
"intelliSenseMode": "gcc-x64"
}
],
"version": 4
}

50
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,50 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Debuggen (gdb)",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/dedup",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"setupCommands": [
{
"description": "Automatische Strukturierung und Einrückung für \"gdb\" aktivieren",
"text": "-enable-pretty-printing",
"ignoreFailures": true
}
],
"preLaunchTask": "make all",
"miDebuggerPath": "/usr/bin/gdb"
},
{
"name": "test Debuggen (gdb)",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/test",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"setupCommands": [
{
"description": "Automatische Strukturierung und Einrückung für \"gdb\" aktivieren",
"text": "-enable-pretty-printing",
"ignoreFailures": true
}
],
"preLaunchTask": "make test",
"miDebuggerPath": "/usr/bin/gdb"
}
]
}

31
.vscode/tasks.json vendored Normal file
View File

@ -0,0 +1,31 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
"label": "make all",
"type": "shell",
"command": "make -j all",
"group": {
"kind": "build",
"isDefault": true
},
"problemMatcher": [
"$gcc"
]
},
{
"label": "make clean",
"type": "shell",
"command": "make clean",
"problemMatcher": []
},
{
"label": "make test",
"type": "shell",
"command": "make -j test",
"problemMatcher": ["$gcc"],
}
]
}

90
Makefile Normal file
View File

@ -0,0 +1,90 @@
# Author Yannis Gerlach
# Hochschule Osnabrück
# 13.11.2020
# `make clean all` nicht mit -j verwenden! -> race condition im make file
# statdessen: `make clean; make all -j` verwenden
NAME = dedup
NAMETEST = test
CFLAGS = -std=c++17 -O2 -g -pipe -Wall -Wextra -Wno-unused-parameter -Wpedantic -rdynamic #-march=native -Wall
CXX = g++
SRCF = src/
BUILDDIR = build/
TESTF = tests/
DEPF = $(BUILDDIR)deps/
INCF = ./inc/
INCFS = $(shell find $(INCF) -type d)
LOGF = ./thirdparty/Log/
LOGO = $(LOGF)Log.o
export LOG_USEMUTEX = 0
INCLUDES = -I$(LOGF) $(addprefix -I, $(INCFS))
LDFLAGS = -lssl -lcrypto
SRCFILES = $(shell find $(SRCF) -name "*.cpp")
OBJFILES = $(patsubst $(SRCF)%, $(BUILDDIR)%, $(patsubst %.cpp, %.o, $(SRCFILES))) $(LOGO)
DEPFILES = $(wildcard $(DEPF)*.d)
SOURCEDIRS = $(shell find $(SRCF) -type d -printf "%p/\n")
BUILDDIRS = $(patsubst $(SRCF)%, $(BUILDDIR)%, $(SOURCEDIRS))
BUILDDIRTEST = $(BUILDDIR)tests/
TESTSRCFILES = $(wildcard $(TESTF)*.cpp)
TESTOBJFILES = $(patsubst $(TESTF)%, $(BUILDDIRTEST)%, $(patsubst %.cpp, %.o, $(TESTSRCFILES)))
OBJFILESTEST = $(filter-out $(BUILDDIR)main.o, $(OBJFILES)) $(TESTOBJFILES)
BUILDDIRS += $(BUILDDIRTEST)
INCLUDES += $(addprefix -I, $(SOURCEDIRS))
all: $(NAME) runtest
$(NAME): $(BUILDDIRS) $(DEPF) $(OBJFILES)
@echo "Linking $@"
@$(CXX) $(CFLAGS) -o $@ $(filter %.o, $^) $(LDFLAGS)
# normal cpp files
$(BUILDDIR)%.o: $(SRCF)%.cpp
@echo "Compiling: $@"
@$(CXX) $(CFLAGS) $(INCLUDES) $< -MM -MT $@ > $(DEPF)$(subst /,_,$*).d
@$(CXX) -c -o $@ $(CFLAGS) $(INCLUDES) $<
# test cpp files
$(BUILDDIRTEST)%.o: $(TESTF)%.cpp
@echo "Compiling: $@"
@$(CXX) $(CFLAGS) $(INCLUDES) $< -MM -MT $@ > $(DEPF)test_$(subst /,_,$*).d
@$(CXX) -c -o $@ $(CFLAGS) $(INCLUDES) $<
$(NAME)_strip: $(NAME)
@echo "Strip $<"
@strip -o $@ $<
%/:
mkdir -p $@
clean-depends:
$(RM) -r $(DEPF)
$(LOGO):
$(MAKE) -C $(LOGF) all
clean:
$(RM) -r $(NAME) $(BUILDDIR) $(NAMETEST) $(NAME)_strip
$(MAKE) -C $(LOGF) $@
$(NAMETEST): $(BUILDDIRS) $(DEPF) $(OBJFILESTEST)
@echo "Linking tests"
@$(CXX) -o $@ $(filter %.o, $^) $(filter %.a, $^) $(CFLAGS) $(LDFLAGS)
runtest: $(BUILDDIR)testrun
$(BUILDDIR)testrun: $(NAMETEST)
@echo "Running tests"
bash -c '. test.env 2> /dev/null; ./$<'
@touch $(BUILDDIR)testrun
.PHONY: clean all clean-depends runtest
include $(DEPFILES)

27
inc/dedup.h Normal file
View File

@ -0,0 +1,27 @@
#pragma once
#include <cstdint>
#include <map>
#include <memory>
#include "file.h"
#include "files.h"
#include "searchfolder.h"
class Dedup {
public:
Dedup();
~Dedup();
void addSearchFolder(const std::string& path, bool recurse = true);
void start();
private:
std::map<uint64_t, Files> filesizes;
std::map<uint64_t, std::shared_ptr<File>> inodes;
std::vector<SearchFolder> folders;
bool hardlink = true;
};

13
inc/file.h Normal file
View File

@ -0,0 +1,13 @@
#pragma once
#include <cstdint>
#include <string>
class File {
public:
private:
uint64_t filesize;
uint64_t inodeid;
uint64_t linkcount;
std::string path;
};

13
inc/files.h Normal file
View File

@ -0,0 +1,13 @@
#pragma once
#include <vector>
#include "mergefile.h"
// a list of files with the same size
class Files {
public:
private:
std::vector<MergeFile> files;
};

26
inc/hash.h Normal file
View File

@ -0,0 +1,26 @@
#pragma once
#include <string>
#include <ostream>
class Hash {
public:
Hash();
virtual ~Hash();
static bool create(Hash& h, const std::string& file);
static bool create(Hash& h, int fd);
operator bool() const;
operator std::string() const;
bool operator==(const Hash& rhs) const;
bool operator!=(const Hash& rhs) const;
private:
const unsigned char* data = nullptr;
friend std::ostream& operator<<(std::ostream& str, const Hash& rhs);
};
std::ostream& operator<<(std::ostream& str, const Hash& rhs);

16
inc/mergefile.h Normal file
View File

@ -0,0 +1,16 @@
#pragma once
#include <vector>
#include "file.h"
#include "hash.h"
// eine liste an gleichen Dateien, die gemerged werden sollen
class MergeFile {
public:
private:
std::vector<std::shared_ptr<File>> files;
Hash hash;
};

8
inc/searchfolder.h Normal file
View File

@ -0,0 +1,8 @@
#pragma once
#include <string>
struct SearchFolder {
std::string path;
bool recurse;
};

114
src/hash.cpp Normal file
View File

@ -0,0 +1,114 @@
#include "hash.h"
#include <cstring>
#include <fcntl.h>
#include <iomanip>
#include <sstream>
#include <sys/mman.h>
#include <unistd.h>
#include <openssl/sha.h>
#include <Log.h>
const static size_t HASHSIZE = 32; // the size of the SHA256 Hashsum in bytes
Hash::Hash() {}
Hash::~Hash() {
if(data) {
delete data;
data = nullptr;
}
}
bool Hash::create(Hash& h, const std::string& file) {
int fd = ::open(file.c_str(), O_RDONLY);
bool result = Hash::create(h, fd);
::close(fd);
return result;
}
bool Hash::create(Hash& h, int fd) {
if(fd < 0 || h.data) {
return false;
}
SHA256_CTX hashctx;
if(SHA256_Init(&hashctx) != 1) {
Log::error << "Can not create SHA256 CTX";
return false;
}
//determine filesize
loff_t fs = lseek64(fd, 0, SEEK_END);
lseek64(fd, 0, SEEK_SET);
void* mapping = ::mmap(NULL, fs, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0);
if(mapping == MAP_FAILED) {
return false;
}
::madvise(mapping, fs, MADV_DONTDUMP);
::madvise(mapping, fs, MADV_DONTFORK);
::madvise(mapping, fs, MADV_SEQUENTIAL);
::madvise(mapping, fs, MADV_WILLNEED);
if(SHA256_Update(&hashctx, mapping, fs) != 1) {
Log::error << "Can not update hash";
return false;
}
::munmap(mapping, fs);
unsigned char* md = new unsigned char[HASHSIZE];
if(SHA256_Final(md, &hashctx) != 1) {
Log::error << "Can not finalize hash";
return false;
}
h.data = md;
return true;
}
Hash::operator bool() const {
return data;
}
Hash::operator std::string() const {
std::stringstream str;
str << *this;
return str.str();
}
bool Hash::operator==(const Hash& rhs) const {
// same data ptr
if(data == rhs.data) return true;
// both no data
if(static_cast<bool>(data) == static_cast<bool>(rhs) && data == nullptr) return true;
return std::memcmp(data, rhs.data, HASHSIZE) == 0;
}
bool Hash::operator!=(const Hash& rhs) const {
return !(*this == rhs);
}
std::ostream& operator<<(std::ostream& str, const Hash& rhs) {
str << std::hex << std::setfill('0');
for(size_t i = 0; i < HASHSIZE; ++i) {
str << std::setw(2) << (int) rhs.data[i];
}
return str;
}

16
src/main.cpp Normal file
View File

@ -0,0 +1,16 @@
#include "Log.h"
int main(int argc, const char** argv) {
Log::init();
Log::setConsoleLogLevel(Log::Level::trace);
Log::addLogfile("log.txt", Log::Level::trace);
#if __unix__
Log::setColoredOutput(true);
#endif
Log::info << "Hello, World!";
Log::stop();
return 0;
}

23
tests/hashtest.cpp Normal file
View File

@ -0,0 +1,23 @@
#include "test.h"
#include "hash.h"
TEST(hash) {
Hash h;
CMPASSERT(h, false);
CMPASSERT(Hash::create(h, TESTDATA "random.img"), true);
CMPASSERT(h, true);
// fail to write a existing hash
CMPASSERT(Hash::create(h, TESTDATA "random.img"), false);
CMPASSERT(h, true);
CMPASSERT(static_cast<std::string>(h), "de05bb13b33f1cc593348d733b84820c77f8f6be89cf417d21d8b2af81d3ebd8");
CMPASSERT(h, true);
CMPASSERT(h, h);
} TESTEND

54
tests/main.cpp Normal file
View File

@ -0,0 +1,54 @@
#include <stdio.h>
#include "test.h"
#include <map>
#include <string>
#include <chrono>
#define RED "\033[1;91m"
#define GREEN "\033[1;92m"
#define YELLOW "\033[1;93m"
#define AQUA "\033[1;36m"
#define RESET "\033[;1m"
int main(int argc, char** argv) {
auto start = std::chrono::high_resolution_clock::now();
testdef* startit = &__start_testlist, *endit = &__stop_testlist;
int failcount = 0;
int skipcount = 0;
int testcount = endit-startit;
int testnumber = 0;
// go through back -> front (tests are inserted in reverse order)
for(testdef* it = startit + testcount-1; it >= startit; --it) {
printf("\033[1mRunning test: %d/%d " AQUA "%s " RESET, ++testnumber, testcount, it->name);
// run test
int result = TESTFAILED;
try {
result = (it->testf)();
} catch(std::exception& e) {
std::cout << "catched exception: \"" << e.what() << "\" " << std::flush;
} catch(...) {}
if(result == TESTGOOD) {
printf(GREEN "succeeded" RESET "!\n");
} else if(result == TESTSKIPPED) {
printf(YELLOW "skipped" RESET "!\n");
skipcount++;
} else {
printf(RED "failed" RESET "\n");
failcount++;
}
}
const char* color = (failcount > 0 ? RED : GREEN); // red or green
printf("%s%d" RESET "/%d failed (" YELLOW "%d " RESET "skipped)\n", color, failcount, testcount, skipcount);
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> t = end - start;
printf("Testing took: %fms\n", (t.count() * 1000));
return failcount > 0;
}

43
tests/test.h Normal file
View File

@ -0,0 +1,43 @@
#define TESTFAILED 0
#define TESTGOOD 1
#define TESTSKIPPED -1
#include <iostream>
#define TESTDATA "./tests/data/"
// very helpfull: https://mgalgs.io/2013/05/10/hacking-your-ELF-for-fun-and-profit.html
#define TESTNAME(NAME) test_##NAME
#define TESTFUNC(NAME) int TESTNAME(NAME)()
#define REGISTERTEST(NAME) static const testdef __test_ ## NAME \
__attribute((__section__("testlist"))) \
__attribute((__used__)) = { \
TESTNAME(NAME), \
#NAME, \
}
#define TEST(NAME) static TESTFUNC(NAME); \
REGISTERTEST(NAME); \
TESTFUNC(NAME) {
#define TESTEND return TESTGOOD; } \
#define ASSERT(BED, ERR) if(!(BED)) { std::cout << __FILE__ << ":" << __LINE__ << " " << ERR << ' ' << std::flush; return TESTFAILED; }
#define CMPASSERTE(IS, SHOULD, ERR) if( !((IS) == (SHOULD))) { std::cout << __FILE__ << ":" << __LINE__ << " is: \"" << (IS) << "\" should: \"" << (SHOULD) << "\" "<< std::flush; return TESTFAILED; }
#define CMPASSERT(IS, SHOULD) CMPASSERTE(IS, SHOULD, "")
#define SKIPTEST return TESTSKIPPED
typedef int (*test_t)();
struct testdef {
test_t testf;
const char* name;
};
// linker generates this <3
extern struct testdef __start_testlist;
extern struct testdef __stop_testlist;

1
thirdparty/Log vendored Submodule

@ -0,0 +1 @@
Subproject commit 027f901dbe1002f40658ccc231997f94bb472bd1