Compare commits

...

2 Commits

Author SHA1 Message Date
mrbesen 94f4903786
deduplicating messages 2021-04-07 17:52:32 +02:00
mrbesen f167d51840
nested searches 2021-04-07 17:00:19 +02:00
3 changed files with 125 additions and 10 deletions

View File

@ -31,21 +31,28 @@ int main(int argc, const char** argv) {
}
time_t start = time(nullptr);
Search search;
Search rootsearch;
for(int i = 1; i < argc; ++i) {
Log::info << "Load File: " << argv[i];
search.addFile(argv[i]);
rootsearch.addFile(argv[i]);
}
rootsearch.finalize();
time_t end = time(nullptr);
Log::info << search.getChatCount() << " Chats with " << search.getMessageCount() << " Messages loaded in " << end-start << "s";
Log::info << rootsearch.getChatCount() << " Chats with " << rootsearch.getMessageCount() << " Messages loaded in " << end-start << "s";
signal(SIGINT, sig_handler);
Search* search = nullptr;
while(run) {
Log::info << "Enter Search String: ";
if(!search)
search = &rootsearch;
Log::info << search->getMessageCount() << " Messages. Enter searchterm: ";
std::string searchterm;
std::getline(std::cin, searchterm);
Log::trace << "searchterm: \"" << searchterm << "\"";
if(!run) break;
@ -56,10 +63,12 @@ int main(int argc, const char** argv) {
if(!run) break;
std::list<const Message*> results = search.search(searchterm, parsedflags);
Log::info << results.size() << " results";
time_t start = time(nullptr);
std::list<const Message*> results = search->search(searchterm, parsedflags);
time_t end = time(nullptr);
Log::info << results.size() << " results found in " << end-start << "s";
if(results.size()) {
Log::info << "Print results?";
Log::info << "Print results ?";
char c;
std::cin >> c;
@ -68,12 +77,40 @@ int main(int argc, const char** argv) {
if(c == 'y' || c == 'Y') {
//print results
for(const Message* m : results) {
Log::info << search.getShortChatname(m->chatid) << ": (" << m->messageid << ") " << m->text;
Log::info << search->getShortChatname(m->chatid) << ": (" << m->messageid << ") " << m->text;
}
}
Log::info << "New Subfilter (y)? Clear Search (c)? Change current Filter (anykey)?";
std::cin >> c;
if(!run) break;
if(c == 'y' || c == 'Y') {
//filter
Search* subsearch = new Search(*search, &results);
//delete old subsearch (dont delete root)
if(search != &rootsearch)
delete search;
//set new search
search = subsearch;
} else if(c == 'c' || c == 'C') {
//move to root search
if(search != &rootsearch) {
delete search;
search = &rootsearch;
}
}
}
std::cin.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
}
if(search != &rootsearch)
delete search;
Log::stop();
return 0;
}

View File

@ -8,6 +8,21 @@
#include <nlohmann/json.hpp>
using json = nlohmann::json;
bool Message::operator==(const Message& m) const {
return (m.chatid == chatid) && (m.messageid == messageid);
}
bool Message::operator!=(const Message& m) const {
return (m.chatid != chatid) || (m.messageid != messageid);
}
bool Message::operator<(const Message& m) const {
if (chatid < m.chatid) return true;
if (chatid > m.chatid) return false;
// chatid == m.chatid
return (messageid < m.messageid);
}
Searchflags operator|=(Searchflags& lhs, const Searchflags sf) {
lhs = (Searchflags) ((uint32_t) lhs | (uint32_t) sf);
return lhs;
@ -35,6 +50,19 @@ static bool matchesRegex(const std::string& msg, const std::regex& reg) {
}
Search::Search() {}
Search::Search(const Search& orig, std::list<const Message*>* list) {
//copy messages
if(list) {
for(const Message* m : *list) {
msgs.push_back(*m);
}
} else {
msgs = orig.msgs;
}
//copy chatnames
chatnames = orig.chatnames;
}
Search::~Search() {}
Searchflags Search::fromString(const std::string& str) {
@ -69,6 +97,8 @@ void Search::addFile(const std::string& file) {
if(j.contains("messages")) {
chatnames.insert({j["id"], j["name"].get<std::string>()});
loadMessages(j["messages"], j["id"]);
return;
}
//multi chat export
@ -89,12 +119,24 @@ void Search::addFile(const std::string& file) {
Log::note << "Loaded Chat: " << name << " (" << id << ")";
}
return;
}
Log::warn << "File " << file << " could not be parsed! Export as json!";
} catch (nlohmann::detail::parse_error& e) {
Log::error << "Could not load File: " << e.what();
}
}
void Search::finalize() {
msgs.reserve(msgs.size() + deduplicate.size());
for(const auto it : deduplicate) {
msgs.push_back(it);
}
deduplicate.clear();
}
std::list<const Message*> Search::search(std::string text, Searchflags flags) const {
std::list<const Message*> out;
@ -155,11 +197,35 @@ void Search::runsearch(T st, bool (*checker)(const std::string& msg, T text), st
}
}
static void readText(const json& t, std::string& out) {
if(t.is_null()) return;
if(t.is_string())
out = t;
if(t.is_array()) {
std::ostringstream buff;
for(const json& entr : t) {
if(entr.is_string())
buff << (const std::string&) entr;
else if(entr.contains("text"))
buff << (const std::string&) entr["text"];
}
out = buff.str();
}
}
void Search::loadMessages(const json& j, uint64_t chatid) {
uint32_t failed = 0;
for(const json& m : j) {
try {
msgs.push_back({m["text"], chatid, m["id"]});
if(m.contains("text")) {
std::string text;
readText(m["text"], text);
deduplicate.insert({text, chatid, m["id"]});
} else {
Log::warn << "text less message: " << m;
}
} catch(const nlohmann::detail::exception& e) {
Log::warn << "Parse error: " << e.id << " " << e.what();
} catch(...) {
failed ++;
}

View File

@ -2,6 +2,8 @@
#include <string>
#include <list>
#include <vector>
#include <set>
#include <map>
#include <cstdint>
@ -12,12 +14,18 @@ struct Message {
std::string text;
uint64_t chatid;
uint64_t messageid;
bool operator==(const Message& m) const;
bool operator!=(const Message& m) const;
bool operator<(const Message& m) const;
};
enum class Searchflags {
NONE = 0,
IGNORECASE = 1,
REGEX = 2,
//ideen: nach sender filtern, nur nachrichten mit medien, nur nachrichten ohne medien, medien Dateinamen, nach datum filtern
};
Searchflags operator|=(Searchflags& lhs, const Searchflags sf);
@ -27,11 +35,14 @@ bool operator&(Searchflags& lhs, const Searchflags sf);
class Search {
public:
Search();
//copy chatnames from orig, and message list either from orig, or - if set - from list
Search(const Search& orig, std::list<const Message*>* list = nullptr);
~Search();
static Searchflags fromString(const std::string&);
void addFile(const std::string& file);
void finalize(); //stop adding files and finalize deduplication, could be called twice, but then a deduplication is not guaranteed
std::list<const Message*> search(std::string text, Searchflags flags = Searchflags::NONE) const;
const std::string& getChatname(uint64_t id) const;
@ -47,6 +58,7 @@ private:
void loadMessages(const json& j, uint64_t chatid);
std::list<Message> msgs;
std::vector<Message> msgs;
std::set<Message> deduplicate; //intermediate store, for reading files and deduplicate them
std::map<uint64_t, std::string> chatnames;
};