Compare commits

...

2 Commits

Author SHA1 Message Date
mrbesen 94f4903786
deduplicating messages 2021-04-07 17:52:32 +02:00
mrbesen f167d51840
nested searches 2021-04-07 17:00:19 +02:00
3 changed files with 125 additions and 10 deletions

View File

@ -31,21 +31,28 @@ int main(int argc, const char** argv) {
} }
time_t start = time(nullptr); time_t start = time(nullptr);
Search search; Search rootsearch;
for(int i = 1; i < argc; ++i) { for(int i = 1; i < argc; ++i) {
Log::info << "Load File: " << argv[i]; Log::info << "Load File: " << argv[i];
search.addFile(argv[i]); rootsearch.addFile(argv[i]);
} }
rootsearch.finalize();
time_t end = time(nullptr); time_t end = time(nullptr);
Log::info << search.getChatCount() << " Chats with " << search.getMessageCount() << " Messages loaded in " << end-start << "s"; Log::info << rootsearch.getChatCount() << " Chats with " << rootsearch.getMessageCount() << " Messages loaded in " << end-start << "s";
signal(SIGINT, sig_handler); signal(SIGINT, sig_handler);
Search* search = nullptr;
while(run) { while(run) {
Log::info << "Enter Search String: "; if(!search)
search = &rootsearch;
Log::info << search->getMessageCount() << " Messages. Enter searchterm: ";
std::string searchterm; std::string searchterm;
std::getline(std::cin, searchterm); std::getline(std::cin, searchterm);
Log::trace << "searchterm: \"" << searchterm << "\"";
if(!run) break; if(!run) break;
@ -56,10 +63,12 @@ int main(int argc, const char** argv) {
if(!run) break; if(!run) break;
std::list<const Message*> results = search.search(searchterm, parsedflags); time_t start = time(nullptr);
Log::info << results.size() << " results"; std::list<const Message*> results = search->search(searchterm, parsedflags);
time_t end = time(nullptr);
Log::info << results.size() << " results found in " << end-start << "s";
if(results.size()) { if(results.size()) {
Log::info << "Print results?"; Log::info << "Print results ?";
char c; char c;
std::cin >> c; std::cin >> c;
@ -68,12 +77,40 @@ int main(int argc, const char** argv) {
if(c == 'y' || c == 'Y') { if(c == 'y' || c == 'Y') {
//print results //print results
for(const Message* m : results) { for(const Message* m : results) {
Log::info << search.getShortChatname(m->chatid) << ": (" << m->messageid << ") " << m->text; Log::info << search->getShortChatname(m->chatid) << ": (" << m->messageid << ") " << m->text;
}
}
Log::info << "New Subfilter (y)? Clear Search (c)? Change current Filter (anykey)?";
std::cin >> c;
if(!run) break;
if(c == 'y' || c == 'Y') {
//filter
Search* subsearch = new Search(*search, &results);
//delete old subsearch (dont delete root)
if(search != &rootsearch)
delete search;
//set new search
search = subsearch;
} else if(c == 'c' || c == 'C') {
//move to root search
if(search != &rootsearch) {
delete search;
search = &rootsearch;
} }
} }
} }
std::cin.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
} }
if(search != &rootsearch)
delete search;
Log::stop(); Log::stop();
return 0; return 0;
} }

View File

@ -8,6 +8,21 @@
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
using json = nlohmann::json; using json = nlohmann::json;
bool Message::operator==(const Message& m) const {
return (m.chatid == chatid) && (m.messageid == messageid);
}
bool Message::operator!=(const Message& m) const {
return (m.chatid != chatid) || (m.messageid != messageid);
}
bool Message::operator<(const Message& m) const {
if (chatid < m.chatid) return true;
if (chatid > m.chatid) return false;
// chatid == m.chatid
return (messageid < m.messageid);
}
Searchflags operator|=(Searchflags& lhs, const Searchflags sf) { Searchflags operator|=(Searchflags& lhs, const Searchflags sf) {
lhs = (Searchflags) ((uint32_t) lhs | (uint32_t) sf); lhs = (Searchflags) ((uint32_t) lhs | (uint32_t) sf);
return lhs; return lhs;
@ -35,6 +50,19 @@ static bool matchesRegex(const std::string& msg, const std::regex& reg) {
} }
Search::Search() {} Search::Search() {}
Search::Search(const Search& orig, std::list<const Message*>* list) {
//copy messages
if(list) {
for(const Message* m : *list) {
msgs.push_back(*m);
}
} else {
msgs = orig.msgs;
}
//copy chatnames
chatnames = orig.chatnames;
}
Search::~Search() {} Search::~Search() {}
Searchflags Search::fromString(const std::string& str) { Searchflags Search::fromString(const std::string& str) {
@ -69,6 +97,8 @@ void Search::addFile(const std::string& file) {
if(j.contains("messages")) { if(j.contains("messages")) {
chatnames.insert({j["id"], j["name"].get<std::string>()}); chatnames.insert({j["id"], j["name"].get<std::string>()});
loadMessages(j["messages"], j["id"]); loadMessages(j["messages"], j["id"]);
return;
} }
//multi chat export //multi chat export
@ -89,12 +119,24 @@ void Search::addFile(const std::string& file) {
Log::note << "Loaded Chat: " << name << " (" << id << ")"; Log::note << "Loaded Chat: " << name << " (" << id << ")";
} }
return;
} }
Log::warn << "File " << file << " could not be parsed! Export as json!";
} catch (nlohmann::detail::parse_error& e) { } catch (nlohmann::detail::parse_error& e) {
Log::error << "Could not load File: " << e.what(); Log::error << "Could not load File: " << e.what();
} }
} }
void Search::finalize() {
msgs.reserve(msgs.size() + deduplicate.size());
for(const auto it : deduplicate) {
msgs.push_back(it);
}
deduplicate.clear();
}
std::list<const Message*> Search::search(std::string text, Searchflags flags) const { std::list<const Message*> Search::search(std::string text, Searchflags flags) const {
std::list<const Message*> out; std::list<const Message*> out;
@ -155,11 +197,35 @@ void Search::runsearch(T st, bool (*checker)(const std::string& msg, T text), st
} }
} }
static void readText(const json& t, std::string& out) {
if(t.is_null()) return;
if(t.is_string())
out = t;
if(t.is_array()) {
std::ostringstream buff;
for(const json& entr : t) {
if(entr.is_string())
buff << (const std::string&) entr;
else if(entr.contains("text"))
buff << (const std::string&) entr["text"];
}
out = buff.str();
}
}
void Search::loadMessages(const json& j, uint64_t chatid) { void Search::loadMessages(const json& j, uint64_t chatid) {
uint32_t failed = 0; uint32_t failed = 0;
for(const json& m : j) { for(const json& m : j) {
try { try {
msgs.push_back({m["text"], chatid, m["id"]}); if(m.contains("text")) {
std::string text;
readText(m["text"], text);
deduplicate.insert({text, chatid, m["id"]});
} else {
Log::warn << "text less message: " << m;
}
} catch(const nlohmann::detail::exception& e) {
Log::warn << "Parse error: " << e.id << " " << e.what();
} catch(...) { } catch(...) {
failed ++; failed ++;
} }

View File

@ -2,6 +2,8 @@
#include <string> #include <string>
#include <list> #include <list>
#include <vector>
#include <set>
#include <map> #include <map>
#include <cstdint> #include <cstdint>
@ -12,12 +14,18 @@ struct Message {
std::string text; std::string text;
uint64_t chatid; uint64_t chatid;
uint64_t messageid; uint64_t messageid;
bool operator==(const Message& m) const;
bool operator!=(const Message& m) const;
bool operator<(const Message& m) const;
}; };
enum class Searchflags { enum class Searchflags {
NONE = 0, NONE = 0,
IGNORECASE = 1, IGNORECASE = 1,
REGEX = 2, REGEX = 2,
//ideen: nach sender filtern, nur nachrichten mit medien, nur nachrichten ohne medien, medien Dateinamen, nach datum filtern
}; };
Searchflags operator|=(Searchflags& lhs, const Searchflags sf); Searchflags operator|=(Searchflags& lhs, const Searchflags sf);
@ -27,11 +35,14 @@ bool operator&(Searchflags& lhs, const Searchflags sf);
class Search { class Search {
public: public:
Search(); Search();
//copy chatnames from orig, and message list either from orig, or - if set - from list
Search(const Search& orig, std::list<const Message*>* list = nullptr);
~Search(); ~Search();
static Searchflags fromString(const std::string&); static Searchflags fromString(const std::string&);
void addFile(const std::string& file); void addFile(const std::string& file);
void finalize(); //stop adding files and finalize deduplication, could be called twice, but then a deduplication is not guaranteed
std::list<const Message*> search(std::string text, Searchflags flags = Searchflags::NONE) const; std::list<const Message*> search(std::string text, Searchflags flags = Searchflags::NONE) const;
const std::string& getChatname(uint64_t id) const; const std::string& getChatname(uint64_t id) const;
@ -47,6 +58,7 @@ private:
void loadMessages(const json& j, uint64_t chatid); void loadMessages(const json& j, uint64_t chatid);
std::list<Message> msgs; std::vector<Message> msgs;
std::set<Message> deduplicate; //intermediate store, for reading files and deduplicate them
std::map<uint64_t, std::string> chatnames; std::map<uint64_t, std::string> chatnames;
}; };