TelegramSearch/src/search.cpp

261 lines
6.1 KiB
C++

#include "search.h"
#include <fstream>
#include <Log.h>
#include <mrbesen.h>
#include <regex>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
bool Message::operator==(const Message& m) const {
return (m.chatid == chatid) && (m.messageid == messageid);
}
bool Message::operator!=(const Message& m) const {
return (m.chatid != chatid) || (m.messageid != messageid);
}
bool Message::operator<(const Message& m) const {
if (chatid < m.chatid) return true;
if (chatid > m.chatid) return false;
// chatid == m.chatid
return (messageid < m.messageid);
}
bool Message::hasFile() const {
return !filename.empty();
}
Searchflags operator|=(Searchflags& lhs, const Searchflags sf) {
lhs = (Searchflags) ((uint32_t) lhs | (uint32_t) sf);
return lhs;
}
bool operator&(Searchflags& lhs, const Searchflags sf) {
return (bool) ((uint32_t) lhs & (uint32_t) sf);
}
static bool matches(const std::string& msg, const std::string& text) {
//simpler contains check
return (msg.find(text) != std::string::npos);
}
static bool matchesIC(const std::string& msg, const std::string& text) {
//turn compare string to lower
std::string lower;
mrbesen::util::toLower(msg, lower);
return (lower.find(text) != std::string::npos);
}
// matches reges is inline, so i need a wrapper
static bool matchesRegex(const std::string& msg, const std::regex& reg) {
return std::regex_search(msg, reg);
}
Search::Search() {}
Search::Search(const Search& orig, std::list<const Message*>* list) {
//copy messages
if(list) {
for(const Message* m : *list) {
msgs.push_back(*m);
}
} else {
msgs = orig.msgs;
}
//copy chatnames
chatnames = orig.chatnames;
}
Search::~Search() {}
Searchflags Search::fromString(const std::string& str) {
Searchflags f = Searchflags::NONE;
for(char c : str) {
switch(c) {
case 'R':
case 'r':
f |= Searchflags::REGEX;
break;
case 'i':
case 'I':
f |= Searchflags::IGNORECASE;
break;
default:
break;
}
}
return f;
}
void Search::addFile(const std::string& file) {
//laden den datei
try {
std::ifstream fstream(file);
json j;
fstream >> j;
//single chat export
if(j.contains("messages")) {
chatnames.insert({j["id"], j["name"].get<std::string>()});
loadMessages(j["messages"], j["id"]);
return;
}
//multi chat export
if(j.contains("chats")) {
const json& chatlist = j["chats"]["list"]; //asume that list exists
if(chatlist.is_null()) {
Log::error << "File does not contain a chatlist";
return;
}
for(const json& chat : chatlist) {
int64_t id = chat["id"];
std::string name = "";
if(chat.contains("name") && !chat["name"].is_null())
name = chat.value("name", "");
chatnames.insert({id, name});
loadMessages(chat["messages"], id);
Log::note << "Loaded Chat: " << name << " (" << id << ")";
}
return;
}
//get contact list
if(j.contains("frequent_contacts")) {
const json& contacts = j["frequent_contacts"]["list"];//asume that list exists
for(const json& contact : contacts) {
int64_t id = contact["id"];
std::string name = contact["name"];
chatnames.insert({id, name});
}
}
Log::warn << "File " << file << " could not be parsed! Export as json!";
} catch (nlohmann::detail::parse_error& e) {
Log::error << "Could not load File: " << e.what();
}
}
void Search::finalize() {
msgs.reserve(msgs.size() + deduplicate.size());
for(const auto it : deduplicate) {
msgs.push_back(it);
}
deduplicate.clear();
}
std::list<const Message*> Search::search(std::string text, Searchflags flags) const {
std::list<const Message*> out;
if(flags & Searchflags::REGEX) {
searchRegex(text, flags & Searchflags::IGNORECASE, out);
return out;
}
if(flags & Searchflags::IGNORECASE) {
//turn search to lower
mrbesen::util::toLower(text);
runsearch<const std::string&>(text, matchesIC, out);
} else {
runsearch<const std::string&>(text, matches, out);
}
return out;
}
const std::string& Search::getChatname(int64_t id) const {
static const std::string UNKOWNCHAT = "<unknownchat>";
auto it = chatnames.find(id);
if(it == chatnames.end()) return UNKOWNCHAT;
if(it->second.empty()) return UNKOWNCHAT;
return it->second;
}
std::string Search::getShortChatname(int64_t id) const {
std::string chatname = getChatname(id);
if(chatname.size() > 14) {
return chatname.substr(0, 14);
}
return chatname;
}
uint32_t Search::getChatCount() const {
return chatnames.size();
}
uint64_t Search::getMessageCount() const {
return msgs.size();
}
void Search::searchRegex(const std::string& text, bool ignoreCase, std::list<const Message*>& out) const {
//build regex pattern
const std::regex pattern(text, (ignoreCase ? std::regex::icase : (std::regex::flag_type) 0));
runsearch<const std::regex&>(pattern, matchesRegex, out);
}
template<typename T>
void Search::runsearch(T st, bool (*checker)(const std::string& msg, T text), std::list<const Message*>& out) const {
for(const Message& m : msgs) {
if(checker(m.text, st)) {
out.push_back(&m);
}
}
}
static void readText(const json& t, std::string& out) {
if(t.is_null()) return;
if(t.is_string())
out = t;
if(t.is_array()) {
std::ostringstream buff;
for(const json& entr : t) {
if(entr.is_string())
buff << (const std::string&) entr;
else if(entr.contains("text"))
buff << (const std::string&) entr["text"];
}
out = buff.str();
}
}
void Search::loadMessages(const json& j, int64_t chatid) {
uint32_t failed = 0;
for(const json& m : j) {
try {
if(m.contains("text")) {
std::string text;
readText(m["text"], text);
std::string file = "";
if(m.contains("file"))
file = m["file"];
else if(m.contains("photo"))
file = m["photo"];
int64_t sender = 0;
if(m.contains("from_id") && m["from_id"].is_number_unsigned())
sender = m["from_id"];
deduplicate.insert({chatid, m["id"], sender, text, file, m.contains("reply_to_message_id")});
} else {
Log::warn << "text less message: " << m;
}
} catch(const nlohmann::detail::exception& e) {
Log::warn << "Parse error: " << e.id << " " << e.what();
} catch(...) {
failed ++;
}
}
if(failed != 0) {
Log::warn << failed << " Messages failed to load";
}
}