neue statistiken

This commit is contained in:
mrbesen 2020-07-01 11:38:05 +02:00
parent 14b2a584a7
commit 5a062e0cbc
Signed by: MrBesen
GPG Key ID: 596B2350DCD67504
2 changed files with 201 additions and 35 deletions

223
src/main.py Executable file → Normal file
View File

@ -7,6 +7,8 @@ import out
import math, datetime
import colorama
from colorama import Fore, Style
from collections import Counter
import re
class Stat:
@ -23,7 +25,7 @@ class Stat:
all = self.getAll(count)
if len(all) == 0:
return ''
if len(all) == 1:
if isinstance(all, list) and len(all) == 1:
return next(iter(all))
for name, val in all.items():
if out:
@ -31,7 +33,7 @@ class Stat:
out = out + name + ": " + str(val)
return out
def getAll(self, count):
def getAll(self, msgcount):
pass
def parse(self, val):
@ -40,13 +42,13 @@ class Stat:
class Countable(Stat):
acc = 0
mult = 0
#mult = 0
min = 1 << 31
max = 0
def __init__(self, name):
self.acc = 0
self.mult = 0
#self.mult = 0
self.min = 1 << 31
self.max = 0
super().__init__(name)
@ -63,12 +65,12 @@ class Countable(Stat):
def getAvg(self, count):
return self.parse(int(self.acc / count))
def getGAvg(self):
return self.parse(math.sqrt(self.mult))
# def getGAvg(self):
# return self.parse(math.sqrt(self.mult))
def getAll(self, count):
return {'min': self.getMin(), 'max': self.getMax(), 'count': self.getAcc(), 'avg': self.getAvg(count),
'gavg': self.getGAvg()}
def getAll(self, msgcount):
return {'min': self.getMin(), 'max': self.getMax(), 'count': self.getAcc(), 'avg': self.getAvg(msgcount)}
# ,'gavg': self.getGAvg()}
def addMsg(self, msgnum, msg, chat):
count = self.count(msgnum, msg, chat)
@ -77,7 +79,7 @@ class Countable(Stat):
if count < self.min:
self.min = count
self.acc = self.acc + count
self.mult = self.mult + (count * count)
# self.mult = self.mult + (count * count)
def count(self, msgnum, msg, chat):
pass
@ -129,6 +131,9 @@ class DiscreteCount(Stat):
def getAll(self, count):
return self.list
def inc(self, key):
self.list[key] = self.list.get(key, 0) + 1
class UserCount(DiscreteCount):
@ -136,11 +141,9 @@ class UserCount(DiscreteCount):
super().__init__(name)
def addMsg(self, msgnum, msg, chat):
global me
fromid = msg.from_id
self.list[fromid] = self.list.get(fromid, 0) + 1
self.inc(msg.from_id)
def getAll(self, count):
def getAll(self, msgcount):
# replace ids with names
newdict = {}
for id, count in self.list.items():
@ -149,12 +152,147 @@ class UserCount(DiscreteCount):
return newdict
# prozentuale discrete metric
class UserProp(UserCount):
def __init__(self, name: str = "%/User"):
super().__init__(name)
self.counter = UserCount("")
def addMsg(self, msgnum, msg, chat):
self.counter.addMsg(msgnum, msg, chat)
def getAll(self, msgcount):
# replace ids with names
newdict = {}
for id, count in self.list.items():
name = getUsernamebyID(id)
if count:
newdict[name] = count/self.counter.list.get(id)
else:
newdict[name] = 0
return newdict
# user welche selten nachrichten hinter einander schreiben
# 1 = der user schreibt nie 2 Nachrichten hintereinander, 0 = der User schreibt immer alle Nachrichten hintereinander, ohne das einer dazwischen quatscht
class Interactivity(UserProp):
def __init__(self, name : str = "User Interactivity"):
super().__init__(name)
self.lastmsg = None
# nachrichten werden von jüngster zu ältester durch gegangen
def addMsg(self, msgnum, msg, chat):
super().addMsg(msgnum, msg, chat)
fromid = msg.from_id
if self.lastmsg:
if self.lastmsg != fromid:
self.inc(fromid)
self.lastmsg = fromid
class UserEdits(UserCount):
def __init__(self):
super().__init__("Edits/User")
def addMsg(self, msgnum, msg, chat):
if msg.edit_date:
self.inc(msg.from_id)
class UserAwnsers(UserCount):
def __init__(self):
super().__init__("AwnsersFromUser")
def addMsg(self, msgnum, msg, chat):
if msg.is_reply:
self.inc(msg.from_id)
class MediaCounter(UserCount):
def __init__(self):
super().__init__("Media/User")
def addMsg(self, msgnum, msg, chat):
if msg.media:
self.inc(msg.from_id)
class MaxCounter(DiscreteCount):
count = 0
def __init__(self, name, c: int = 5):
super().__init__(name)
self.count = c
def getAll(self, msgcount):
out = Counter(self.list)
return dict(out.most_common(self.count))
class WordCounter(MaxCounter):
def __init__(self, name: str = "WordCounts", c: int = 5):
super().__init__(name, c)
def addMsg(self, msgnum, msg, chat):
txt = msg.message
if txt is not None:
txt = re.findall(r'\w+', txt.lower())
for i in txt:
i = i.strip().lower()
if i:
self.inc(i)
class MentionCounter(MaxCounter):
def __init__(self, name: str = "MentionCounter", c: int = 5):
super().__init__(name, c)
def addMsg(self, msgnum, msg, chat):
txt = msg.message
if txt is not None:
txt = re.findall(r'\s@\w{5,}', txt.lower())
for i in txt:
i = i.strip().lower()
if i:
self.inc(i)
class MaxCharCounter(MaxCounter):
Blacklist = ""
def __init__(self, name: str = "MaxCharCounter", c: int = 5):
super().__init__(name, c)
def addMsg(self, msgnum, msg, chat):
txt = msg.message
if txt is not None:
txt = txt.replace(" ", "").lower()
for i in txt:
if self.checkChar(i):
if i not in self.Blacklist:
self.inc(i)
def checkChar(self, char):
return True
class EmojiCounter(MaxCharCounter):
Blacklist = "äÄöÖüÜßẞ̒ ͎'^°~`…·–÷×"
def __init__(self, name: str = "EmojiCounter", c: int = 5):
super().__init__(name, c)
def checkChar(self, char):
return ord(char) > 0x7F
def printDialog(interid, d): # debug only
color = ''
if interid & 1:
color = Fore.LIGHTBLACK_EX
print(color + '{0:3d}| {1:14d} | {2:30} | {3:1}'.format(interid, d.id, getDialogType(d) + d.name,
d.pinned) + Fore.RESET)
print(color + '{0:3d}| {1:14d} | {2:30} | {3:1}'.format(interid, d.id, getDialogType(d) + d.name, d.pinned) + Fore.RESET)
def getDialogType(dialog):
@ -164,16 +302,21 @@ def getDialogType(dialog):
ent = dialog.entity
if isinstance(ent, User):
if ent.bot:
return '🤖'
return 'BOT '
return '👤'
def getUsernamebyID(userid):
global dialogs
for d in dialogs:
if d.id == userid:
return d.name
return "Unknown" # refresh dialog cache?
global idlookup
try:
return idlookup[userid]
except KeyError:
global dialogs
for d in dialogs:
if d.id == userid:
return d.name
return "Unknown"
def dialogByTgID(tgid):
@ -199,10 +342,17 @@ def analyseChat(dialog, output):
# chat = client.get_messages(selectedDialog)
chat = client.iter_messages(dialog, limit=None)
stats = {
statList = {
CharCount(),
Dist(),
UserCount()
UserCount(), # oldest msg?, media types, most linked site
UserEdits(),
WordCounter(),
MentionCounter(),
EmojiCounter(),
UserAwnsers(),
MediaCounter(),
Interactivity()
}
"""print('before' + Fore.MAGENTA)
outputs['stdout'].print(stats, 10, dialog)
@ -210,16 +360,16 @@ def analyseChat(dialog, output):
# run messure
msgnum = 0
for msg in chat:
for msg in chat: # reinfolge ist sehr wichtig! jüngste -< älteste
if msg != None:
msgnum = msgnum + 1
for stat in stats:
for stat in statList:
stat.addMsg(msgnum, msg, dialog)
print(Fore.BLUE, msgnum, Fore.MAGENTA + 'Nachrichten Analysiert' + Fore.RESET)
# write to output
output.print(stats, msgnum, dialog)
output.print(statList, msgnum, dialog)
def parseInput(uinput):
@ -253,6 +403,19 @@ def parseInput(uinput):
return selected
#returns a dict with a id -> name maping
def getUsers(dialog):
global client
parts = client.get_participants(dialog)
outdict = {}
for p in parts:
name = p.first_name
if(p.last_name != None):
name += " " + p.last_name
outdict[p.id] = name
return outdict
# ==================================
# MAIN program
if __name__ == "__main__":
@ -318,12 +481,14 @@ if __name__ == "__main__":
pass # some error, retry
start = timer()
idlookup = {}
for d in selected:
try:
newusers = getUsers(d)
idlookup.update(newusers)
analyseChat(d, out)
except Exception as e:
print(Fore.RED + 'Fehler beim bearbeiten von: ' + Fore.RESET, d.name, '\n', e)
print(e)
print(Fore.RED + 'Fehler beim bearbeiten von: ' + Fore.RESET, d.name, '\n', str(e))
out.close()
took = datetime.timedelta(seconds=(timer() - start))
took = datetime.timedelta(seconds=int(took.total_seconds()))

View File

@ -14,7 +14,7 @@ class Output:
def close(self):
pass
def print(self, stats, count, dialog):
def print(self, stats, msgcount, dialog):
pass
@ -23,9 +23,10 @@ class STDOUT(Output):
def __init__(self, name: str = "stdout"):
super().__init__(name)
def print(self, stats, count, dialog):
def print(self, stats, msgcount, dialog):
for stat in stats:
print(stat.name, ': ', stat.getValue(count), sep='')
print(stat.name, ': ', stat.getValue(msgcount), sep='')
class jsonOut(Output):
@ -45,8 +46,8 @@ class jsonOut(Output):
json.dump(self.outbuff, self.file, indent=4)
self.file.close()
def print(self, stats, count, dialog):
jsonpre = {"count": count, 'chatid': dialog.id, 'chatname': dialog.name}
def print(self, stats, msgcount, dialog):
jsonpre = {"count": msgcount, 'chatid': dialog.id, 'chatname': dialog.name}
for stat in stats:
jsonpre[stat.name] = stat.getAll(count)
jsonpre[stat.name] = stat.getAll(msgcount)
self.outbuff.append(jsonpre)