deduplicating messages
This commit is contained in:
parent
f167d51840
commit
94f4903786
|
@ -36,6 +36,7 @@ int main(int argc, const char** argv) {
|
|||
Log::info << "Load File: " << argv[i];
|
||||
rootsearch.addFile(argv[i]);
|
||||
}
|
||||
rootsearch.finalize();
|
||||
|
||||
time_t end = time(nullptr);
|
||||
Log::info << rootsearch.getChatCount() << " Chats with " << rootsearch.getMessageCount() << " Messages loaded in " << end-start << "s";
|
||||
|
|
|
@ -8,6 +8,21 @@
|
|||
#include <nlohmann/json.hpp>
|
||||
using json = nlohmann::json;
|
||||
|
||||
bool Message::operator==(const Message& m) const {
|
||||
return (m.chatid == chatid) && (m.messageid == messageid);
|
||||
}
|
||||
|
||||
bool Message::operator!=(const Message& m) const {
|
||||
return (m.chatid != chatid) || (m.messageid != messageid);
|
||||
}
|
||||
|
||||
bool Message::operator<(const Message& m) const {
|
||||
if (chatid < m.chatid) return true;
|
||||
if (chatid > m.chatid) return false;
|
||||
// chatid == m.chatid
|
||||
return (messageid < m.messageid);
|
||||
}
|
||||
|
||||
Searchflags operator|=(Searchflags& lhs, const Searchflags sf) {
|
||||
lhs = (Searchflags) ((uint32_t) lhs | (uint32_t) sf);
|
||||
return lhs;
|
||||
|
@ -114,6 +129,14 @@ void Search::addFile(const std::string& file) {
|
|||
}
|
||||
}
|
||||
|
||||
void Search::finalize() {
|
||||
msgs.reserve(msgs.size() + deduplicate.size());
|
||||
for(const auto it : deduplicate) {
|
||||
msgs.push_back(it);
|
||||
}
|
||||
deduplicate.clear();
|
||||
}
|
||||
|
||||
std::list<const Message*> Search::search(std::string text, Searchflags flags) const {
|
||||
std::list<const Message*> out;
|
||||
|
||||
|
@ -197,7 +220,7 @@ void Search::loadMessages(const json& j, uint64_t chatid) {
|
|||
if(m.contains("text")) {
|
||||
std::string text;
|
||||
readText(m["text"], text);
|
||||
msgs.push_back({text, chatid, m["id"]});
|
||||
deduplicate.insert({text, chatid, m["id"]});
|
||||
} else {
|
||||
Log::warn << "text less message: " << m;
|
||||
}
|
||||
|
|
10
src/search.h
10
src/search.h
|
@ -2,6 +2,8 @@
|
|||
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <cstdint>
|
||||
|
||||
|
@ -12,6 +14,10 @@ struct Message {
|
|||
std::string text;
|
||||
uint64_t chatid;
|
||||
uint64_t messageid;
|
||||
|
||||
bool operator==(const Message& m) const;
|
||||
bool operator!=(const Message& m) const;
|
||||
bool operator<(const Message& m) const;
|
||||
};
|
||||
|
||||
enum class Searchflags {
|
||||
|
@ -36,6 +42,7 @@ public:
|
|||
static Searchflags fromString(const std::string&);
|
||||
|
||||
void addFile(const std::string& file);
|
||||
void finalize(); //stop adding files and finalize deduplication, could be called twice, but then a deduplication is not guaranteed
|
||||
|
||||
std::list<const Message*> search(std::string text, Searchflags flags = Searchflags::NONE) const;
|
||||
const std::string& getChatname(uint64_t id) const;
|
||||
|
@ -51,6 +58,7 @@ private:
|
|||
|
||||
void loadMessages(const json& j, uint64_t chatid);
|
||||
|
||||
std::list<Message> msgs;
|
||||
std::vector<Message> msgs;
|
||||
std::set<Message> deduplicate; //intermediate store, for reading files and deduplicate them
|
||||
std::map<uint64_t, std::string> chatnames;
|
||||
};
|
Loading…
Reference in New Issue