From 5a062e0cbce215142a91de4e49af5a38fa0b1862 Mon Sep 17 00:00:00 2001 From: mrbesen Date: Wed, 1 Jul 2020 11:38:05 +0200 Subject: [PATCH] neue statistiken --- src/main.py | 223 +++++++++++++++++++++++++++++++++++++++++++++------- src/out.py | 13 +-- 2 files changed, 201 insertions(+), 35 deletions(-) mode change 100755 => 100644 src/main.py diff --git a/src/main.py b/src/main.py old mode 100755 new mode 100644 index 5b8a39c..6bf8bc4 --- a/src/main.py +++ b/src/main.py @@ -7,6 +7,8 @@ import out import math, datetime import colorama from colorama import Fore, Style +from collections import Counter +import re class Stat: @@ -23,7 +25,7 @@ class Stat: all = self.getAll(count) if len(all) == 0: return '' - if len(all) == 1: + if isinstance(all, list) and len(all) == 1: return next(iter(all)) for name, val in all.items(): if out: @@ -31,7 +33,7 @@ class Stat: out = out + name + ": " + str(val) return out - def getAll(self, count): + def getAll(self, msgcount): pass def parse(self, val): @@ -40,13 +42,13 @@ class Stat: class Countable(Stat): acc = 0 - mult = 0 + #mult = 0 min = 1 << 31 max = 0 def __init__(self, name): self.acc = 0 - self.mult = 0 + #self.mult = 0 self.min = 1 << 31 self.max = 0 super().__init__(name) @@ -63,12 +65,12 @@ class Countable(Stat): def getAvg(self, count): return self.parse(int(self.acc / count)) - def getGAvg(self): - return self.parse(math.sqrt(self.mult)) + # def getGAvg(self): + # return self.parse(math.sqrt(self.mult)) - def getAll(self, count): - return {'min': self.getMin(), 'max': self.getMax(), 'count': self.getAcc(), 'avg': self.getAvg(count), - 'gavg': self.getGAvg()} + def getAll(self, msgcount): + return {'min': self.getMin(), 'max': self.getMax(), 'count': self.getAcc(), 'avg': self.getAvg(msgcount)} + # ,'gavg': self.getGAvg()} def addMsg(self, msgnum, msg, chat): count = self.count(msgnum, msg, chat) @@ -77,7 +79,7 @@ class Countable(Stat): if count < self.min: self.min = count self.acc = self.acc + count - self.mult = self.mult + (count * count) + # self.mult = self.mult + (count * count) def count(self, msgnum, msg, chat): pass @@ -129,6 +131,9 @@ class DiscreteCount(Stat): def getAll(self, count): return self.list + def inc(self, key): + self.list[key] = self.list.get(key, 0) + 1 + class UserCount(DiscreteCount): @@ -136,11 +141,9 @@ class UserCount(DiscreteCount): super().__init__(name) def addMsg(self, msgnum, msg, chat): - global me - fromid = msg.from_id - self.list[fromid] = self.list.get(fromid, 0) + 1 + self.inc(msg.from_id) - def getAll(self, count): + def getAll(self, msgcount): # replace ids with names newdict = {} for id, count in self.list.items(): @@ -149,12 +152,147 @@ class UserCount(DiscreteCount): return newdict +# prozentuale discrete metric +class UserProp(UserCount): + def __init__(self, name: str = "%/User"): + super().__init__(name) + self.counter = UserCount("") + + def addMsg(self, msgnum, msg, chat): + self.counter.addMsg(msgnum, msg, chat) + + def getAll(self, msgcount): + # replace ids with names + newdict = {} + for id, count in self.list.items(): + name = getUsernamebyID(id) + if count: + newdict[name] = count/self.counter.list.get(id) + else: + newdict[name] = 0 + + return newdict + + +# user welche selten nachrichten hinter einander schreiben +# 1 = der user schreibt nie 2 Nachrichten hintereinander, 0 = der User schreibt immer alle Nachrichten hintereinander, ohne das einer dazwischen quatscht +class Interactivity(UserProp): + def __init__(self, name : str = "User Interactivity"): + super().__init__(name) + self.lastmsg = None + + # nachrichten werden von jüngster zu ältester durch gegangen + def addMsg(self, msgnum, msg, chat): + super().addMsg(msgnum, msg, chat) + fromid = msg.from_id + if self.lastmsg: + if self.lastmsg != fromid: + self.inc(fromid) + self.lastmsg = fromid + + +class UserEdits(UserCount): + def __init__(self): + super().__init__("Edits/User") + + def addMsg(self, msgnum, msg, chat): + if msg.edit_date: + self.inc(msg.from_id) + + +class UserAwnsers(UserCount): + def __init__(self): + super().__init__("AwnsersFromUser") + + def addMsg(self, msgnum, msg, chat): + if msg.is_reply: + self.inc(msg.from_id) + + +class MediaCounter(UserCount): + def __init__(self): + super().__init__("Media/User") + + def addMsg(self, msgnum, msg, chat): + if msg.media: + self.inc(msg.from_id) + + +class MaxCounter(DiscreteCount): + count = 0 + + def __init__(self, name, c: int = 5): + super().__init__(name) + self.count = c + + def getAll(self, msgcount): + out = Counter(self.list) + return dict(out.most_common(self.count)) + + +class WordCounter(MaxCounter): + def __init__(self, name: str = "WordCounts", c: int = 5): + super().__init__(name, c) + + def addMsg(self, msgnum, msg, chat): + txt = msg.message + if txt is not None: + txt = re.findall(r'\w+', txt.lower()) + for i in txt: + i = i.strip().lower() + if i: + self.inc(i) + + +class MentionCounter(MaxCounter): + def __init__(self, name: str = "MentionCounter", c: int = 5): + super().__init__(name, c) + + def addMsg(self, msgnum, msg, chat): + txt = msg.message + if txt is not None: + txt = re.findall(r'\s@\w{5,}', txt.lower()) + for i in txt: + i = i.strip().lower() + if i: + self.inc(i) + + +class MaxCharCounter(MaxCounter): + Blacklist = "" + + def __init__(self, name: str = "MaxCharCounter", c: int = 5): + super().__init__(name, c) + + def addMsg(self, msgnum, msg, chat): + txt = msg.message + if txt is not None: + txt = txt.replace(" ", "").lower() + for i in txt: + if self.checkChar(i): + if i not in self.Blacklist: + self.inc(i) + + def checkChar(self, char): + return True + + +class EmojiCounter(MaxCharCounter): + Blacklist = "äÄöÖüÜßẞ̒ ͎'^°~`…·–÷×" + + def __init__(self, name: str = "EmojiCounter", c: int = 5): + super().__init__(name, c) + + def checkChar(self, char): + return ord(char) > 0x7F + + + def printDialog(interid, d): # debug only color = '' if interid & 1: color = Fore.LIGHTBLACK_EX - print(color + '{0:3d}| {1:14d} | {2:30} | {3:1}'.format(interid, d.id, getDialogType(d) + d.name, - d.pinned) + Fore.RESET) + print(color + '{0:3d}| {1:14d} | {2:30} | {3:1}'.format(interid, d.id, getDialogType(d) + d.name, d.pinned) + Fore.RESET) def getDialogType(dialog): @@ -164,16 +302,21 @@ def getDialogType(dialog): ent = dialog.entity if isinstance(ent, User): if ent.bot: - return '🤖' + return 'BOT ' return '👤' def getUsernamebyID(userid): - global dialogs - for d in dialogs: - if d.id == userid: - return d.name - return "Unknown" # refresh dialog cache? + global idlookup + try: + return idlookup[userid] + except KeyError: + global dialogs + for d in dialogs: + if d.id == userid: + return d.name + + return "Unknown" def dialogByTgID(tgid): @@ -199,10 +342,17 @@ def analyseChat(dialog, output): # chat = client.get_messages(selectedDialog) chat = client.iter_messages(dialog, limit=None) - stats = { + statList = { CharCount(), Dist(), - UserCount() + UserCount(), # oldest msg?, media types, most linked site + UserEdits(), + WordCounter(), + MentionCounter(), + EmojiCounter(), + UserAwnsers(), + MediaCounter(), + Interactivity() } """print('before' + Fore.MAGENTA) outputs['stdout'].print(stats, 10, dialog) @@ -210,16 +360,16 @@ def analyseChat(dialog, output): # run messure msgnum = 0 - for msg in chat: + for msg in chat: # reinfolge ist sehr wichtig! jüngste -< älteste if msg != None: msgnum = msgnum + 1 - for stat in stats: + for stat in statList: stat.addMsg(msgnum, msg, dialog) print(Fore.BLUE, msgnum, Fore.MAGENTA + 'Nachrichten Analysiert' + Fore.RESET) # write to output - output.print(stats, msgnum, dialog) + output.print(statList, msgnum, dialog) def parseInput(uinput): @@ -253,6 +403,19 @@ def parseInput(uinput): return selected +#returns a dict with a id -> name maping +def getUsers(dialog): + global client + parts = client.get_participants(dialog) + outdict = {} + for p in parts: + name = p.first_name + if(p.last_name != None): + name += " " + p.last_name + outdict[p.id] = name + return outdict + + # ================================== # MAIN program if __name__ == "__main__": @@ -318,12 +481,14 @@ if __name__ == "__main__": pass # some error, retry start = timer() + idlookup = {} for d in selected: try: + newusers = getUsers(d) + idlookup.update(newusers) analyseChat(d, out) except Exception as e: - print(Fore.RED + 'Fehler beim bearbeiten von: ' + Fore.RESET, d.name, '\n', e) - print(e) + print(Fore.RED + 'Fehler beim bearbeiten von: ' + Fore.RESET, d.name, '\n', str(e)) out.close() took = datetime.timedelta(seconds=(timer() - start)) took = datetime.timedelta(seconds=int(took.total_seconds())) diff --git a/src/out.py b/src/out.py index f4a27b8..46f084b 100644 --- a/src/out.py +++ b/src/out.py @@ -14,7 +14,7 @@ class Output: def close(self): pass - def print(self, stats, count, dialog): + def print(self, stats, msgcount, dialog): pass @@ -23,9 +23,10 @@ class STDOUT(Output): def __init__(self, name: str = "stdout"): super().__init__(name) - def print(self, stats, count, dialog): + def print(self, stats, msgcount, dialog): for stat in stats: - print(stat.name, ': ', stat.getValue(count), sep='') + print(stat.name, ': ', stat.getValue(msgcount), sep='') + class jsonOut(Output): @@ -45,8 +46,8 @@ class jsonOut(Output): json.dump(self.outbuff, self.file, indent=4) self.file.close() - def print(self, stats, count, dialog): - jsonpre = {"count": count, 'chatid': dialog.id, 'chatname': dialog.name} + def print(self, stats, msgcount, dialog): + jsonpre = {"count": msgcount, 'chatid': dialog.id, 'chatname': dialog.name} for stat in stats: - jsonpre[stat.name] = stat.getAll(count) + jsonpre[stat.name] = stat.getAll(msgcount) self.outbuff.append(jsonpre)