neue statistiken

This commit is contained in:
mrbesen 2020-07-01 11:38:05 +02:00
parent 14b2a584a7
commit 5a062e0cbc
Signed by: MrBesen
GPG Key ID: 596B2350DCD67504
2 changed files with 201 additions and 35 deletions

223
src/main.py Executable file → Normal file
View File

@ -7,6 +7,8 @@ import out
import math, datetime import math, datetime
import colorama import colorama
from colorama import Fore, Style from colorama import Fore, Style
from collections import Counter
import re
class Stat: class Stat:
@ -23,7 +25,7 @@ class Stat:
all = self.getAll(count) all = self.getAll(count)
if len(all) == 0: if len(all) == 0:
return '' return ''
if len(all) == 1: if isinstance(all, list) and len(all) == 1:
return next(iter(all)) return next(iter(all))
for name, val in all.items(): for name, val in all.items():
if out: if out:
@ -31,7 +33,7 @@ class Stat:
out = out + name + ": " + str(val) out = out + name + ": " + str(val)
return out return out
def getAll(self, count): def getAll(self, msgcount):
pass pass
def parse(self, val): def parse(self, val):
@ -40,13 +42,13 @@ class Stat:
class Countable(Stat): class Countable(Stat):
acc = 0 acc = 0
mult = 0 #mult = 0
min = 1 << 31 min = 1 << 31
max = 0 max = 0
def __init__(self, name): def __init__(self, name):
self.acc = 0 self.acc = 0
self.mult = 0 #self.mult = 0
self.min = 1 << 31 self.min = 1 << 31
self.max = 0 self.max = 0
super().__init__(name) super().__init__(name)
@ -63,12 +65,12 @@ class Countable(Stat):
def getAvg(self, count): def getAvg(self, count):
return self.parse(int(self.acc / count)) return self.parse(int(self.acc / count))
def getGAvg(self): # def getGAvg(self):
return self.parse(math.sqrt(self.mult)) # return self.parse(math.sqrt(self.mult))
def getAll(self, count): def getAll(self, msgcount):
return {'min': self.getMin(), 'max': self.getMax(), 'count': self.getAcc(), 'avg': self.getAvg(count), return {'min': self.getMin(), 'max': self.getMax(), 'count': self.getAcc(), 'avg': self.getAvg(msgcount)}
'gavg': self.getGAvg()} # ,'gavg': self.getGAvg()}
def addMsg(self, msgnum, msg, chat): def addMsg(self, msgnum, msg, chat):
count = self.count(msgnum, msg, chat) count = self.count(msgnum, msg, chat)
@ -77,7 +79,7 @@ class Countable(Stat):
if count < self.min: if count < self.min:
self.min = count self.min = count
self.acc = self.acc + count self.acc = self.acc + count
self.mult = self.mult + (count * count) # self.mult = self.mult + (count * count)
def count(self, msgnum, msg, chat): def count(self, msgnum, msg, chat):
pass pass
@ -129,6 +131,9 @@ class DiscreteCount(Stat):
def getAll(self, count): def getAll(self, count):
return self.list return self.list
def inc(self, key):
self.list[key] = self.list.get(key, 0) + 1
class UserCount(DiscreteCount): class UserCount(DiscreteCount):
@ -136,11 +141,9 @@ class UserCount(DiscreteCount):
super().__init__(name) super().__init__(name)
def addMsg(self, msgnum, msg, chat): def addMsg(self, msgnum, msg, chat):
global me self.inc(msg.from_id)
fromid = msg.from_id
self.list[fromid] = self.list.get(fromid, 0) + 1
def getAll(self, count): def getAll(self, msgcount):
# replace ids with names # replace ids with names
newdict = {} newdict = {}
for id, count in self.list.items(): for id, count in self.list.items():
@ -149,12 +152,147 @@ class UserCount(DiscreteCount):
return newdict return newdict
# prozentuale discrete metric
class UserProp(UserCount):
def __init__(self, name: str = "%/User"):
super().__init__(name)
self.counter = UserCount("")
def addMsg(self, msgnum, msg, chat):
self.counter.addMsg(msgnum, msg, chat)
def getAll(self, msgcount):
# replace ids with names
newdict = {}
for id, count in self.list.items():
name = getUsernamebyID(id)
if count:
newdict[name] = count/self.counter.list.get(id)
else:
newdict[name] = 0
return newdict
# user welche selten nachrichten hinter einander schreiben
# 1 = der user schreibt nie 2 Nachrichten hintereinander, 0 = der User schreibt immer alle Nachrichten hintereinander, ohne das einer dazwischen quatscht
class Interactivity(UserProp):
def __init__(self, name : str = "User Interactivity"):
super().__init__(name)
self.lastmsg = None
# nachrichten werden von jüngster zu ältester durch gegangen
def addMsg(self, msgnum, msg, chat):
super().addMsg(msgnum, msg, chat)
fromid = msg.from_id
if self.lastmsg:
if self.lastmsg != fromid:
self.inc(fromid)
self.lastmsg = fromid
class UserEdits(UserCount):
def __init__(self):
super().__init__("Edits/User")
def addMsg(self, msgnum, msg, chat):
if msg.edit_date:
self.inc(msg.from_id)
class UserAwnsers(UserCount):
def __init__(self):
super().__init__("AwnsersFromUser")
def addMsg(self, msgnum, msg, chat):
if msg.is_reply:
self.inc(msg.from_id)
class MediaCounter(UserCount):
def __init__(self):
super().__init__("Media/User")
def addMsg(self, msgnum, msg, chat):
if msg.media:
self.inc(msg.from_id)
class MaxCounter(DiscreteCount):
count = 0
def __init__(self, name, c: int = 5):
super().__init__(name)
self.count = c
def getAll(self, msgcount):
out = Counter(self.list)
return dict(out.most_common(self.count))
class WordCounter(MaxCounter):
def __init__(self, name: str = "WordCounts", c: int = 5):
super().__init__(name, c)
def addMsg(self, msgnum, msg, chat):
txt = msg.message
if txt is not None:
txt = re.findall(r'\w+', txt.lower())
for i in txt:
i = i.strip().lower()
if i:
self.inc(i)
class MentionCounter(MaxCounter):
def __init__(self, name: str = "MentionCounter", c: int = 5):
super().__init__(name, c)
def addMsg(self, msgnum, msg, chat):
txt = msg.message
if txt is not None:
txt = re.findall(r'\s@\w{5,}', txt.lower())
for i in txt:
i = i.strip().lower()
if i:
self.inc(i)
class MaxCharCounter(MaxCounter):
Blacklist = ""
def __init__(self, name: str = "MaxCharCounter", c: int = 5):
super().__init__(name, c)
def addMsg(self, msgnum, msg, chat):
txt = msg.message
if txt is not None:
txt = txt.replace(" ", "").lower()
for i in txt:
if self.checkChar(i):
if i not in self.Blacklist:
self.inc(i)
def checkChar(self, char):
return True
class EmojiCounter(MaxCharCounter):
Blacklist = "äÄöÖüÜßẞ̒ ͎'^°~`…·–÷×"
def __init__(self, name: str = "EmojiCounter", c: int = 5):
super().__init__(name, c)
def checkChar(self, char):
return ord(char) > 0x7F
def printDialog(interid, d): # debug only def printDialog(interid, d): # debug only
color = '' color = ''
if interid & 1: if interid & 1:
color = Fore.LIGHTBLACK_EX color = Fore.LIGHTBLACK_EX
print(color + '{0:3d}| {1:14d} | {2:30} | {3:1}'.format(interid, d.id, getDialogType(d) + d.name, print(color + '{0:3d}| {1:14d} | {2:30} | {3:1}'.format(interid, d.id, getDialogType(d) + d.name, d.pinned) + Fore.RESET)
d.pinned) + Fore.RESET)
def getDialogType(dialog): def getDialogType(dialog):
@ -164,16 +302,21 @@ def getDialogType(dialog):
ent = dialog.entity ent = dialog.entity
if isinstance(ent, User): if isinstance(ent, User):
if ent.bot: if ent.bot:
return '🤖' return 'BOT '
return '👤' return '👤'
def getUsernamebyID(userid): def getUsernamebyID(userid):
global dialogs global idlookup
for d in dialogs: try:
if d.id == userid: return idlookup[userid]
return d.name except KeyError:
return "Unknown" # refresh dialog cache? global dialogs
for d in dialogs:
if d.id == userid:
return d.name
return "Unknown"
def dialogByTgID(tgid): def dialogByTgID(tgid):
@ -199,10 +342,17 @@ def analyseChat(dialog, output):
# chat = client.get_messages(selectedDialog) # chat = client.get_messages(selectedDialog)
chat = client.iter_messages(dialog, limit=None) chat = client.iter_messages(dialog, limit=None)
stats = { statList = {
CharCount(), CharCount(),
Dist(), Dist(),
UserCount() UserCount(), # oldest msg?, media types, most linked site
UserEdits(),
WordCounter(),
MentionCounter(),
EmojiCounter(),
UserAwnsers(),
MediaCounter(),
Interactivity()
} }
"""print('before' + Fore.MAGENTA) """print('before' + Fore.MAGENTA)
outputs['stdout'].print(stats, 10, dialog) outputs['stdout'].print(stats, 10, dialog)
@ -210,16 +360,16 @@ def analyseChat(dialog, output):
# run messure # run messure
msgnum = 0 msgnum = 0
for msg in chat: for msg in chat: # reinfolge ist sehr wichtig! jüngste -< älteste
if msg != None: if msg != None:
msgnum = msgnum + 1 msgnum = msgnum + 1
for stat in stats: for stat in statList:
stat.addMsg(msgnum, msg, dialog) stat.addMsg(msgnum, msg, dialog)
print(Fore.BLUE, msgnum, Fore.MAGENTA + 'Nachrichten Analysiert' + Fore.RESET) print(Fore.BLUE, msgnum, Fore.MAGENTA + 'Nachrichten Analysiert' + Fore.RESET)
# write to output # write to output
output.print(stats, msgnum, dialog) output.print(statList, msgnum, dialog)
def parseInput(uinput): def parseInput(uinput):
@ -253,6 +403,19 @@ def parseInput(uinput):
return selected return selected
#returns a dict with a id -> name maping
def getUsers(dialog):
global client
parts = client.get_participants(dialog)
outdict = {}
for p in parts:
name = p.first_name
if(p.last_name != None):
name += " " + p.last_name
outdict[p.id] = name
return outdict
# ================================== # ==================================
# MAIN program # MAIN program
if __name__ == "__main__": if __name__ == "__main__":
@ -318,12 +481,14 @@ if __name__ == "__main__":
pass # some error, retry pass # some error, retry
start = timer() start = timer()
idlookup = {}
for d in selected: for d in selected:
try: try:
newusers = getUsers(d)
idlookup.update(newusers)
analyseChat(d, out) analyseChat(d, out)
except Exception as e: except Exception as e:
print(Fore.RED + 'Fehler beim bearbeiten von: ' + Fore.RESET, d.name, '\n', e) print(Fore.RED + 'Fehler beim bearbeiten von: ' + Fore.RESET, d.name, '\n', str(e))
print(e)
out.close() out.close()
took = datetime.timedelta(seconds=(timer() - start)) took = datetime.timedelta(seconds=(timer() - start))
took = datetime.timedelta(seconds=int(took.total_seconds())) took = datetime.timedelta(seconds=int(took.total_seconds()))

View File

@ -14,7 +14,7 @@ class Output:
def close(self): def close(self):
pass pass
def print(self, stats, count, dialog): def print(self, stats, msgcount, dialog):
pass pass
@ -23,9 +23,10 @@ class STDOUT(Output):
def __init__(self, name: str = "stdout"): def __init__(self, name: str = "stdout"):
super().__init__(name) super().__init__(name)
def print(self, stats, count, dialog): def print(self, stats, msgcount, dialog):
for stat in stats: for stat in stats:
print(stat.name, ': ', stat.getValue(count), sep='') print(stat.name, ': ', stat.getValue(msgcount), sep='')
class jsonOut(Output): class jsonOut(Output):
@ -45,8 +46,8 @@ class jsonOut(Output):
json.dump(self.outbuff, self.file, indent=4) json.dump(self.outbuff, self.file, indent=4)
self.file.close() self.file.close()
def print(self, stats, count, dialog): def print(self, stats, msgcount, dialog):
jsonpre = {"count": count, 'chatid': dialog.id, 'chatname': dialog.name} jsonpre = {"count": msgcount, 'chatid': dialog.id, 'chatname': dialog.name}
for stat in stats: for stat in stats:
jsonpre[stat.name] = stat.getAll(count) jsonpre[stat.name] = stat.getAll(msgcount)
self.outbuff.append(jsonpre) self.outbuff.append(jsonpre)