initial
This commit is contained in:
commit
d2bb0d1ccc
|
@ -0,0 +1,10 @@
|
|||
*.d
|
||||
*.o
|
||||
*.img
|
||||
|
||||
build/
|
||||
dedup
|
||||
test
|
||||
tests/data/
|
||||
|
||||
.vscode/settings.json
|
|
@ -0,0 +1,3 @@
|
|||
[submodule "thirdparty/Log"]
|
||||
path = thirdparty/Log
|
||||
url = https://git.okaestne.de/okaestne/Log.git
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/src/",
|
||||
"${workspaceFolder}/thirdparty/Log/",
|
||||
"${workspaceFolder}/inc/**"
|
||||
],
|
||||
"defines": [],
|
||||
"compilerPath": "/usr/bin/g++",
|
||||
"cStandard": "c11",
|
||||
"cppStandard": "c++17",
|
||||
"intelliSenseMode": "gcc-x64"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Debuggen (gdb)",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/dedup",
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceFolder}",
|
||||
"environment": [],
|
||||
"externalConsole": false,
|
||||
"MIMode": "gdb",
|
||||
"setupCommands": [
|
||||
{
|
||||
"description": "Automatische Strukturierung und Einrückung für \"gdb\" aktivieren",
|
||||
"text": "-enable-pretty-printing",
|
||||
"ignoreFailures": true
|
||||
}
|
||||
],
|
||||
"preLaunchTask": "make all",
|
||||
"miDebuggerPath": "/usr/bin/gdb"
|
||||
},
|
||||
{
|
||||
"name": "test Debuggen (gdb)",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/test",
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceFolder}",
|
||||
"environment": [],
|
||||
"externalConsole": false,
|
||||
"MIMode": "gdb",
|
||||
"setupCommands": [
|
||||
{
|
||||
"description": "Automatische Strukturierung und Einrückung für \"gdb\" aktivieren",
|
||||
"text": "-enable-pretty-printing",
|
||||
"ignoreFailures": true
|
||||
}
|
||||
],
|
||||
"preLaunchTask": "make test",
|
||||
"miDebuggerPath": "/usr/bin/gdb"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
// See https://go.microsoft.com/fwlink/?LinkId=733558
|
||||
// for the documentation about the tasks.json format
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "make all",
|
||||
"type": "shell",
|
||||
"command": "make -j all",
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
},
|
||||
"problemMatcher": [
|
||||
"$gcc"
|
||||
]
|
||||
},
|
||||
{
|
||||
"label": "make clean",
|
||||
"type": "shell",
|
||||
"command": "make clean",
|
||||
"problemMatcher": []
|
||||
},
|
||||
{
|
||||
"label": "make test",
|
||||
"type": "shell",
|
||||
"command": "make -j test",
|
||||
"problemMatcher": ["$gcc"],
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
# Author Yannis Gerlach
|
||||
# Hochschule Osnabrück
|
||||
# 13.11.2020
|
||||
|
||||
# `make clean all` nicht mit -j verwenden! -> race condition im make file
|
||||
# statdessen: `make clean; make all -j` verwenden
|
||||
|
||||
NAME = dedup
|
||||
NAMETEST = test
|
||||
CFLAGS = -std=c++17 -O2 -g -pipe -Wall -Wextra -Wno-unused-parameter -Wpedantic -rdynamic #-march=native -Wall
|
||||
CXX = g++
|
||||
SRCF = src/
|
||||
BUILDDIR = build/
|
||||
TESTF = tests/
|
||||
DEPF = $(BUILDDIR)deps/
|
||||
INCF = ./inc/
|
||||
INCFS = $(shell find $(INCF) -type d)
|
||||
|
||||
LOGF = ./thirdparty/Log/
|
||||
LOGO = $(LOGF)Log.o
|
||||
export LOG_USEMUTEX = 0
|
||||
|
||||
INCLUDES = -I$(LOGF) $(addprefix -I, $(INCFS))
|
||||
LDFLAGS = -lssl -lcrypto
|
||||
|
||||
SRCFILES = $(shell find $(SRCF) -name "*.cpp")
|
||||
OBJFILES = $(patsubst $(SRCF)%, $(BUILDDIR)%, $(patsubst %.cpp, %.o, $(SRCFILES))) $(LOGO)
|
||||
DEPFILES = $(wildcard $(DEPF)*.d)
|
||||
|
||||
SOURCEDIRS = $(shell find $(SRCF) -type d -printf "%p/\n")
|
||||
BUILDDIRS = $(patsubst $(SRCF)%, $(BUILDDIR)%, $(SOURCEDIRS))
|
||||
|
||||
BUILDDIRTEST = $(BUILDDIR)tests/
|
||||
TESTSRCFILES = $(wildcard $(TESTF)*.cpp)
|
||||
TESTOBJFILES = $(patsubst $(TESTF)%, $(BUILDDIRTEST)%, $(patsubst %.cpp, %.o, $(TESTSRCFILES)))
|
||||
OBJFILESTEST = $(filter-out $(BUILDDIR)main.o, $(OBJFILES)) $(TESTOBJFILES)
|
||||
|
||||
BUILDDIRS += $(BUILDDIRTEST)
|
||||
|
||||
INCLUDES += $(addprefix -I, $(SOURCEDIRS))
|
||||
|
||||
all: $(NAME) runtest
|
||||
|
||||
$(NAME): $(BUILDDIRS) $(DEPF) $(OBJFILES)
|
||||
@echo "Linking $@"
|
||||
@$(CXX) $(CFLAGS) -o $@ $(filter %.o, $^) $(LDFLAGS)
|
||||
|
||||
# normal cpp files
|
||||
$(BUILDDIR)%.o: $(SRCF)%.cpp
|
||||
@echo "Compiling: $@"
|
||||
@$(CXX) $(CFLAGS) $(INCLUDES) $< -MM -MT $@ > $(DEPF)$(subst /,_,$*).d
|
||||
@$(CXX) -c -o $@ $(CFLAGS) $(INCLUDES) $<
|
||||
|
||||
# test cpp files
|
||||
$(BUILDDIRTEST)%.o: $(TESTF)%.cpp
|
||||
@echo "Compiling: $@"
|
||||
@$(CXX) $(CFLAGS) $(INCLUDES) $< -MM -MT $@ > $(DEPF)test_$(subst /,_,$*).d
|
||||
@$(CXX) -c -o $@ $(CFLAGS) $(INCLUDES) $<
|
||||
|
||||
$(NAME)_strip: $(NAME)
|
||||
@echo "Strip $<"
|
||||
@strip -o $@ $<
|
||||
|
||||
%/:
|
||||
mkdir -p $@
|
||||
|
||||
clean-depends:
|
||||
$(RM) -r $(DEPF)
|
||||
|
||||
$(LOGO):
|
||||
$(MAKE) -C $(LOGF) all
|
||||
|
||||
clean:
|
||||
$(RM) -r $(NAME) $(BUILDDIR) $(NAMETEST) $(NAME)_strip
|
||||
$(MAKE) -C $(LOGF) $@
|
||||
|
||||
$(NAMETEST): $(BUILDDIRS) $(DEPF) $(OBJFILESTEST)
|
||||
@echo "Linking tests"
|
||||
@$(CXX) -o $@ $(filter %.o, $^) $(filter %.a, $^) $(CFLAGS) $(LDFLAGS)
|
||||
|
||||
runtest: $(BUILDDIR)testrun
|
||||
|
||||
$(BUILDDIR)testrun: $(NAMETEST)
|
||||
@echo "Running tests"
|
||||
bash -c '. test.env 2> /dev/null; ./$<'
|
||||
@touch $(BUILDDIR)testrun
|
||||
|
||||
.PHONY: clean all clean-depends runtest
|
||||
|
||||
include $(DEPFILES)
|
|
@ -0,0 +1,27 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
#include "file.h"
|
||||
#include "files.h"
|
||||
#include "searchfolder.h"
|
||||
|
||||
class Dedup {
|
||||
public:
|
||||
Dedup();
|
||||
~Dedup();
|
||||
|
||||
void addSearchFolder(const std::string& path, bool recurse = true);
|
||||
|
||||
void start();
|
||||
|
||||
private:
|
||||
std::map<uint64_t, Files> filesizes;
|
||||
std::map<uint64_t, std::shared_ptr<File>> inodes;
|
||||
|
||||
std::vector<SearchFolder> folders;
|
||||
|
||||
bool hardlink = true;
|
||||
};
|
|
@ -0,0 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
class File {
|
||||
public:
|
||||
private:
|
||||
uint64_t filesize;
|
||||
uint64_t inodeid;
|
||||
uint64_t linkcount;
|
||||
std::string path;
|
||||
};
|
|
@ -0,0 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "mergefile.h"
|
||||
|
||||
// a list of files with the same size
|
||||
class Files {
|
||||
public:
|
||||
|
||||
private:
|
||||
std::vector<MergeFile> files;
|
||||
};
|
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <ostream>
|
||||
|
||||
class Hash {
|
||||
public:
|
||||
Hash();
|
||||
virtual ~Hash();
|
||||
|
||||
static bool create(Hash& h, const std::string& file);
|
||||
static bool create(Hash& h, int fd);
|
||||
|
||||
operator bool() const;
|
||||
operator std::string() const;
|
||||
|
||||
bool operator==(const Hash& rhs) const;
|
||||
bool operator!=(const Hash& rhs) const;
|
||||
|
||||
private:
|
||||
const unsigned char* data = nullptr;
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& str, const Hash& rhs);
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& str, const Hash& rhs);
|
|
@ -0,0 +1,16 @@
|
|||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "file.h"
|
||||
#include "hash.h"
|
||||
|
||||
// eine liste an gleichen Dateien, die gemerged werden sollen
|
||||
class MergeFile {
|
||||
public:
|
||||
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<File>> files;
|
||||
Hash hash;
|
||||
};
|
|
@ -0,0 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
struct SearchFolder {
|
||||
std::string path;
|
||||
bool recurse;
|
||||
};
|
|
@ -0,0 +1,114 @@
|
|||
#include "hash.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <fcntl.h>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#include <Log.h>
|
||||
|
||||
const static size_t HASHSIZE = 32; // the size of the SHA256 Hashsum in bytes
|
||||
|
||||
Hash::Hash() {}
|
||||
|
||||
Hash::~Hash() {
|
||||
if(data) {
|
||||
delete data;
|
||||
data = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool Hash::create(Hash& h, const std::string& file) {
|
||||
int fd = ::open(file.c_str(), O_RDONLY);
|
||||
|
||||
bool result = Hash::create(h, fd);
|
||||
|
||||
::close(fd);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Hash::create(Hash& h, int fd) {
|
||||
if(fd < 0 || h.data) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SHA256_CTX hashctx;
|
||||
if(SHA256_Init(&hashctx) != 1) {
|
||||
Log::error << "Can not create SHA256 CTX";
|
||||
return false;
|
||||
}
|
||||
|
||||
//determine filesize
|
||||
loff_t fs = lseek64(fd, 0, SEEK_END);
|
||||
lseek64(fd, 0, SEEK_SET);
|
||||
|
||||
void* mapping = ::mmap(NULL, fs, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0);
|
||||
if(mapping == MAP_FAILED) {
|
||||
return false;
|
||||
}
|
||||
|
||||
::madvise(mapping, fs, MADV_DONTDUMP);
|
||||
::madvise(mapping, fs, MADV_DONTFORK);
|
||||
::madvise(mapping, fs, MADV_SEQUENTIAL);
|
||||
::madvise(mapping, fs, MADV_WILLNEED);
|
||||
|
||||
if(SHA256_Update(&hashctx, mapping, fs) != 1) {
|
||||
Log::error << "Can not update hash";
|
||||
return false;
|
||||
}
|
||||
|
||||
::munmap(mapping, fs);
|
||||
|
||||
unsigned char* md = new unsigned char[HASHSIZE];
|
||||
if(SHA256_Final(md, &hashctx) != 1) {
|
||||
Log::error << "Can not finalize hash";
|
||||
return false;
|
||||
}
|
||||
|
||||
h.data = md;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Hash::operator bool() const {
|
||||
return data;
|
||||
}
|
||||
|
||||
Hash::operator std::string() const {
|
||||
std::stringstream str;
|
||||
str << *this;
|
||||
return str.str();
|
||||
}
|
||||
|
||||
|
||||
bool Hash::operator==(const Hash& rhs) const {
|
||||
// same data ptr
|
||||
if(data == rhs.data) return true;
|
||||
|
||||
// both no data
|
||||
if(static_cast<bool>(data) == static_cast<bool>(rhs) && data == nullptr) return true;
|
||||
|
||||
return std::memcmp(data, rhs.data, HASHSIZE) == 0;
|
||||
}
|
||||
|
||||
bool Hash::operator!=(const Hash& rhs) const {
|
||||
return !(*this == rhs);
|
||||
}
|
||||
|
||||
|
||||
std::ostream& operator<<(std::ostream& str, const Hash& rhs) {
|
||||
str << std::hex << std::setfill('0');
|
||||
|
||||
for(size_t i = 0; i < HASHSIZE; ++i) {
|
||||
str << std::setw(2) << (int) rhs.data[i];
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
|
||||
#include "Log.h"
|
||||
|
||||
int main(int argc, const char** argv) {
|
||||
Log::init();
|
||||
Log::setConsoleLogLevel(Log::Level::trace);
|
||||
Log::addLogfile("log.txt", Log::Level::trace);
|
||||
#if __unix__
|
||||
Log::setColoredOutput(true);
|
||||
#endif
|
||||
|
||||
Log::info << "Hello, World!";
|
||||
|
||||
Log::stop();
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
#include "test.h"
|
||||
|
||||
#include "hash.h"
|
||||
|
||||
TEST(hash) {
|
||||
|
||||
Hash h;
|
||||
|
||||
CMPASSERT(h, false);
|
||||
|
||||
CMPASSERT(Hash::create(h, TESTDATA "random.img"), true);
|
||||
CMPASSERT(h, true);
|
||||
|
||||
// fail to write a existing hash
|
||||
CMPASSERT(Hash::create(h, TESTDATA "random.img"), false);
|
||||
CMPASSERT(h, true);
|
||||
|
||||
CMPASSERT(static_cast<std::string>(h), "de05bb13b33f1cc593348d733b84820c77f8f6be89cf417d21d8b2af81d3ebd8");
|
||||
CMPASSERT(h, true);
|
||||
|
||||
CMPASSERT(h, h);
|
||||
|
||||
} TESTEND
|
|
@ -0,0 +1,54 @@
|
|||
#include <stdio.h>
|
||||
#include "test.h"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
|
||||
#define RED "\033[1;91m"
|
||||
#define GREEN "\033[1;92m"
|
||||
#define YELLOW "\033[1;93m"
|
||||
#define AQUA "\033[1;36m"
|
||||
#define RESET "\033[;1m"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
testdef* startit = &__start_testlist, *endit = &__stop_testlist;
|
||||
int failcount = 0;
|
||||
int skipcount = 0;
|
||||
int testcount = endit-startit;
|
||||
int testnumber = 0;
|
||||
|
||||
// go through back -> front (tests are inserted in reverse order)
|
||||
for(testdef* it = startit + testcount-1; it >= startit; --it) {
|
||||
printf("\033[1mRunning test: %d/%d " AQUA "%s " RESET, ++testnumber, testcount, it->name);
|
||||
|
||||
// run test
|
||||
int result = TESTFAILED;
|
||||
try {
|
||||
result = (it->testf)();
|
||||
} catch(std::exception& e) {
|
||||
std::cout << "catched exception: \"" << e.what() << "\" " << std::flush;
|
||||
} catch(...) {}
|
||||
|
||||
if(result == TESTGOOD) {
|
||||
printf(GREEN "succeeded" RESET "!\n");
|
||||
} else if(result == TESTSKIPPED) {
|
||||
printf(YELLOW "skipped" RESET "!\n");
|
||||
skipcount++;
|
||||
} else {
|
||||
printf(RED "failed" RESET "\n");
|
||||
failcount++;
|
||||
}
|
||||
}
|
||||
|
||||
const char* color = (failcount > 0 ? RED : GREEN); // red or green
|
||||
printf("%s%d" RESET "/%d failed (" YELLOW "%d " RESET "skipped)\n", color, failcount, testcount, skipcount);
|
||||
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> t = end - start;
|
||||
printf("Testing took: %fms\n", (t.count() * 1000));
|
||||
|
||||
return failcount > 0;
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
#define TESTFAILED 0
|
||||
#define TESTGOOD 1
|
||||
#define TESTSKIPPED -1
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#define TESTDATA "./tests/data/"
|
||||
|
||||
// very helpfull: https://mgalgs.io/2013/05/10/hacking-your-ELF-for-fun-and-profit.html
|
||||
|
||||
#define TESTNAME(NAME) test_##NAME
|
||||
#define TESTFUNC(NAME) int TESTNAME(NAME)()
|
||||
|
||||
#define REGISTERTEST(NAME) static const testdef __test_ ## NAME \
|
||||
__attribute((__section__("testlist"))) \
|
||||
__attribute((__used__)) = { \
|
||||
TESTNAME(NAME), \
|
||||
#NAME, \
|
||||
}
|
||||
|
||||
#define TEST(NAME) static TESTFUNC(NAME); \
|
||||
REGISTERTEST(NAME); \
|
||||
TESTFUNC(NAME) {
|
||||
|
||||
#define TESTEND return TESTGOOD; } \
|
||||
|
||||
|
||||
#define ASSERT(BED, ERR) if(!(BED)) { std::cout << __FILE__ << ":" << __LINE__ << " " << ERR << ' ' << std::flush; return TESTFAILED; }
|
||||
#define CMPASSERTE(IS, SHOULD, ERR) if( !((IS) == (SHOULD))) { std::cout << __FILE__ << ":" << __LINE__ << " is: \"" << (IS) << "\" should: \"" << (SHOULD) << "\" "<< std::flush; return TESTFAILED; }
|
||||
#define CMPASSERT(IS, SHOULD) CMPASSERTE(IS, SHOULD, "")
|
||||
|
||||
#define SKIPTEST return TESTSKIPPED
|
||||
|
||||
typedef int (*test_t)();
|
||||
|
||||
struct testdef {
|
||||
test_t testf;
|
||||
const char* name;
|
||||
};
|
||||
|
||||
// linker generates this <3
|
||||
extern struct testdef __start_testlist;
|
||||
extern struct testdef __stop_testlist;
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 027f901dbe1002f40658ccc231997f94bb472bd1
|
Loading…
Reference in New Issue