neue statistiken
This commit is contained in:
parent
14b2a584a7
commit
5a062e0cbc
|
@ -7,6 +7,8 @@ import out
|
|||
import math, datetime
|
||||
import colorama
|
||||
from colorama import Fore, Style
|
||||
from collections import Counter
|
||||
import re
|
||||
|
||||
|
||||
class Stat:
|
||||
|
@ -23,7 +25,7 @@ class Stat:
|
|||
all = self.getAll(count)
|
||||
if len(all) == 0:
|
||||
return ''
|
||||
if len(all) == 1:
|
||||
if isinstance(all, list) and len(all) == 1:
|
||||
return next(iter(all))
|
||||
for name, val in all.items():
|
||||
if out:
|
||||
|
@ -31,7 +33,7 @@ class Stat:
|
|||
out = out + name + ": " + str(val)
|
||||
return out
|
||||
|
||||
def getAll(self, count):
|
||||
def getAll(self, msgcount):
|
||||
pass
|
||||
|
||||
def parse(self, val):
|
||||
|
@ -40,13 +42,13 @@ class Stat:
|
|||
|
||||
class Countable(Stat):
|
||||
acc = 0
|
||||
mult = 0
|
||||
#mult = 0
|
||||
min = 1 << 31
|
||||
max = 0
|
||||
|
||||
def __init__(self, name):
|
||||
self.acc = 0
|
||||
self.mult = 0
|
||||
#self.mult = 0
|
||||
self.min = 1 << 31
|
||||
self.max = 0
|
||||
super().__init__(name)
|
||||
|
@ -63,12 +65,12 @@ class Countable(Stat):
|
|||
def getAvg(self, count):
|
||||
return self.parse(int(self.acc / count))
|
||||
|
||||
def getGAvg(self):
|
||||
return self.parse(math.sqrt(self.mult))
|
||||
# def getGAvg(self):
|
||||
# return self.parse(math.sqrt(self.mult))
|
||||
|
||||
def getAll(self, count):
|
||||
return {'min': self.getMin(), 'max': self.getMax(), 'count': self.getAcc(), 'avg': self.getAvg(count),
|
||||
'gavg': self.getGAvg()}
|
||||
def getAll(self, msgcount):
|
||||
return {'min': self.getMin(), 'max': self.getMax(), 'count': self.getAcc(), 'avg': self.getAvg(msgcount)}
|
||||
# ,'gavg': self.getGAvg()}
|
||||
|
||||
def addMsg(self, msgnum, msg, chat):
|
||||
count = self.count(msgnum, msg, chat)
|
||||
|
@ -77,7 +79,7 @@ class Countable(Stat):
|
|||
if count < self.min:
|
||||
self.min = count
|
||||
self.acc = self.acc + count
|
||||
self.mult = self.mult + (count * count)
|
||||
# self.mult = self.mult + (count * count)
|
||||
|
||||
def count(self, msgnum, msg, chat):
|
||||
pass
|
||||
|
@ -129,6 +131,9 @@ class DiscreteCount(Stat):
|
|||
def getAll(self, count):
|
||||
return self.list
|
||||
|
||||
def inc(self, key):
|
||||
self.list[key] = self.list.get(key, 0) + 1
|
||||
|
||||
|
||||
class UserCount(DiscreteCount):
|
||||
|
||||
|
@ -136,11 +141,9 @@ class UserCount(DiscreteCount):
|
|||
super().__init__(name)
|
||||
|
||||
def addMsg(self, msgnum, msg, chat):
|
||||
global me
|
||||
fromid = msg.from_id
|
||||
self.list[fromid] = self.list.get(fromid, 0) + 1
|
||||
self.inc(msg.from_id)
|
||||
|
||||
def getAll(self, count):
|
||||
def getAll(self, msgcount):
|
||||
# replace ids with names
|
||||
newdict = {}
|
||||
for id, count in self.list.items():
|
||||
|
@ -149,12 +152,147 @@ class UserCount(DiscreteCount):
|
|||
return newdict
|
||||
|
||||
|
||||
# prozentuale discrete metric
|
||||
class UserProp(UserCount):
|
||||
def __init__(self, name: str = "%/User"):
|
||||
super().__init__(name)
|
||||
self.counter = UserCount("")
|
||||
|
||||
def addMsg(self, msgnum, msg, chat):
|
||||
self.counter.addMsg(msgnum, msg, chat)
|
||||
|
||||
def getAll(self, msgcount):
|
||||
# replace ids with names
|
||||
newdict = {}
|
||||
for id, count in self.list.items():
|
||||
name = getUsernamebyID(id)
|
||||
if count:
|
||||
newdict[name] = count/self.counter.list.get(id)
|
||||
else:
|
||||
newdict[name] = 0
|
||||
|
||||
return newdict
|
||||
|
||||
|
||||
# user welche selten nachrichten hinter einander schreiben
|
||||
# 1 = der user schreibt nie 2 Nachrichten hintereinander, 0 = der User schreibt immer alle Nachrichten hintereinander, ohne das einer dazwischen quatscht
|
||||
class Interactivity(UserProp):
|
||||
def __init__(self, name : str = "User Interactivity"):
|
||||
super().__init__(name)
|
||||
self.lastmsg = None
|
||||
|
||||
# nachrichten werden von jüngster zu ältester durch gegangen
|
||||
def addMsg(self, msgnum, msg, chat):
|
||||
super().addMsg(msgnum, msg, chat)
|
||||
fromid = msg.from_id
|
||||
if self.lastmsg:
|
||||
if self.lastmsg != fromid:
|
||||
self.inc(fromid)
|
||||
self.lastmsg = fromid
|
||||
|
||||
|
||||
class UserEdits(UserCount):
|
||||
def __init__(self):
|
||||
super().__init__("Edits/User")
|
||||
|
||||
def addMsg(self, msgnum, msg, chat):
|
||||
if msg.edit_date:
|
||||
self.inc(msg.from_id)
|
||||
|
||||
|
||||
class UserAwnsers(UserCount):
|
||||
def __init__(self):
|
||||
super().__init__("AwnsersFromUser")
|
||||
|
||||
def addMsg(self, msgnum, msg, chat):
|
||||
if msg.is_reply:
|
||||
self.inc(msg.from_id)
|
||||
|
||||
|
||||
class MediaCounter(UserCount):
|
||||
def __init__(self):
|
||||
super().__init__("Media/User")
|
||||
|
||||
def addMsg(self, msgnum, msg, chat):
|
||||
if msg.media:
|
||||
self.inc(msg.from_id)
|
||||
|
||||
|
||||
class MaxCounter(DiscreteCount):
|
||||
count = 0
|
||||
|
||||
def __init__(self, name, c: int = 5):
|
||||
super().__init__(name)
|
||||
self.count = c
|
||||
|
||||
def getAll(self, msgcount):
|
||||
out = Counter(self.list)
|
||||
return dict(out.most_common(self.count))
|
||||
|
||||
|
||||
class WordCounter(MaxCounter):
|
||||
def __init__(self, name: str = "WordCounts", c: int = 5):
|
||||
super().__init__(name, c)
|
||||
|
||||
def addMsg(self, msgnum, msg, chat):
|
||||
txt = msg.message
|
||||
if txt is not None:
|
||||
txt = re.findall(r'\w+', txt.lower())
|
||||
for i in txt:
|
||||
i = i.strip().lower()
|
||||
if i:
|
||||
self.inc(i)
|
||||
|
||||
|
||||
class MentionCounter(MaxCounter):
|
||||
def __init__(self, name: str = "MentionCounter", c: int = 5):
|
||||
super().__init__(name, c)
|
||||
|
||||
def addMsg(self, msgnum, msg, chat):
|
||||
txt = msg.message
|
||||
if txt is not None:
|
||||
txt = re.findall(r'\s@\w{5,}', txt.lower())
|
||||
for i in txt:
|
||||
i = i.strip().lower()
|
||||
if i:
|
||||
self.inc(i)
|
||||
|
||||
|
||||
class MaxCharCounter(MaxCounter):
|
||||
Blacklist = ""
|
||||
|
||||
def __init__(self, name: str = "MaxCharCounter", c: int = 5):
|
||||
super().__init__(name, c)
|
||||
|
||||
def addMsg(self, msgnum, msg, chat):
|
||||
txt = msg.message
|
||||
if txt is not None:
|
||||
txt = txt.replace(" ", "").lower()
|
||||
for i in txt:
|
||||
if self.checkChar(i):
|
||||
if i not in self.Blacklist:
|
||||
self.inc(i)
|
||||
|
||||
def checkChar(self, char):
|
||||
return True
|
||||
|
||||
|
||||
class EmojiCounter(MaxCharCounter):
|
||||
Blacklist = "äÄöÖüÜßẞ̒ ͎'^°~`…·–÷×"
|
||||
|
||||
def __init__(self, name: str = "EmojiCounter", c: int = 5):
|
||||
super().__init__(name, c)
|
||||
|
||||
def checkChar(self, char):
|
||||
return ord(char) > 0x7F
|
||||
|
||||
|
||||
|
||||
def printDialog(interid, d): # debug only
|
||||
color = ''
|
||||
if interid & 1:
|
||||
color = Fore.LIGHTBLACK_EX
|
||||
print(color + '{0:3d}| {1:14d} | {2:30} | {3:1}'.format(interid, d.id, getDialogType(d) + d.name,
|
||||
d.pinned) + Fore.RESET)
|
||||
print(color + '{0:3d}| {1:14d} | {2:30} | {3:1}'.format(interid, d.id, getDialogType(d) + d.name, d.pinned) + Fore.RESET)
|
||||
|
||||
|
||||
def getDialogType(dialog):
|
||||
|
@ -164,16 +302,21 @@ def getDialogType(dialog):
|
|||
ent = dialog.entity
|
||||
if isinstance(ent, User):
|
||||
if ent.bot:
|
||||
return '🤖'
|
||||
return 'BOT '
|
||||
return '👤'
|
||||
|
||||
|
||||
def getUsernamebyID(userid):
|
||||
global dialogs
|
||||
for d in dialogs:
|
||||
if d.id == userid:
|
||||
return d.name
|
||||
return "Unknown" # refresh dialog cache?
|
||||
global idlookup
|
||||
try:
|
||||
return idlookup[userid]
|
||||
except KeyError:
|
||||
global dialogs
|
||||
for d in dialogs:
|
||||
if d.id == userid:
|
||||
return d.name
|
||||
|
||||
return "Unknown"
|
||||
|
||||
|
||||
def dialogByTgID(tgid):
|
||||
|
@ -199,10 +342,17 @@ def analyseChat(dialog, output):
|
|||
# chat = client.get_messages(selectedDialog)
|
||||
chat = client.iter_messages(dialog, limit=None)
|
||||
|
||||
stats = {
|
||||
statList = {
|
||||
CharCount(),
|
||||
Dist(),
|
||||
UserCount()
|
||||
UserCount(), # oldest msg?, media types, most linked site
|
||||
UserEdits(),
|
||||
WordCounter(),
|
||||
MentionCounter(),
|
||||
EmojiCounter(),
|
||||
UserAwnsers(),
|
||||
MediaCounter(),
|
||||
Interactivity()
|
||||
}
|
||||
"""print('before' + Fore.MAGENTA)
|
||||
outputs['stdout'].print(stats, 10, dialog)
|
||||
|
@ -210,16 +360,16 @@ def analyseChat(dialog, output):
|
|||
|
||||
# run messure
|
||||
msgnum = 0
|
||||
for msg in chat:
|
||||
for msg in chat: # reinfolge ist sehr wichtig! jüngste -< älteste
|
||||
if msg != None:
|
||||
msgnum = msgnum + 1
|
||||
for stat in stats:
|
||||
for stat in statList:
|
||||
stat.addMsg(msgnum, msg, dialog)
|
||||
|
||||
print(Fore.BLUE, msgnum, Fore.MAGENTA + 'Nachrichten Analysiert' + Fore.RESET)
|
||||
|
||||
# write to output
|
||||
output.print(stats, msgnum, dialog)
|
||||
output.print(statList, msgnum, dialog)
|
||||
|
||||
|
||||
def parseInput(uinput):
|
||||
|
@ -253,6 +403,19 @@ def parseInput(uinput):
|
|||
return selected
|
||||
|
||||
|
||||
#returns a dict with a id -> name maping
|
||||
def getUsers(dialog):
|
||||
global client
|
||||
parts = client.get_participants(dialog)
|
||||
outdict = {}
|
||||
for p in parts:
|
||||
name = p.first_name
|
||||
if(p.last_name != None):
|
||||
name += " " + p.last_name
|
||||
outdict[p.id] = name
|
||||
return outdict
|
||||
|
||||
|
||||
# ==================================
|
||||
# MAIN program
|
||||
if __name__ == "__main__":
|
||||
|
@ -318,12 +481,14 @@ if __name__ == "__main__":
|
|||
pass # some error, retry
|
||||
|
||||
start = timer()
|
||||
idlookup = {}
|
||||
for d in selected:
|
||||
try:
|
||||
newusers = getUsers(d)
|
||||
idlookup.update(newusers)
|
||||
analyseChat(d, out)
|
||||
except Exception as e:
|
||||
print(Fore.RED + 'Fehler beim bearbeiten von: ' + Fore.RESET, d.name, '\n', e)
|
||||
print(e)
|
||||
print(Fore.RED + 'Fehler beim bearbeiten von: ' + Fore.RESET, d.name, '\n', str(e))
|
||||
out.close()
|
||||
took = datetime.timedelta(seconds=(timer() - start))
|
||||
took = datetime.timedelta(seconds=int(took.total_seconds()))
|
||||
|
|
13
src/out.py
13
src/out.py
|
@ -14,7 +14,7 @@ class Output:
|
|||
def close(self):
|
||||
pass
|
||||
|
||||
def print(self, stats, count, dialog):
|
||||
def print(self, stats, msgcount, dialog):
|
||||
pass
|
||||
|
||||
|
||||
|
@ -23,9 +23,10 @@ class STDOUT(Output):
|
|||
def __init__(self, name: str = "stdout"):
|
||||
super().__init__(name)
|
||||
|
||||
def print(self, stats, count, dialog):
|
||||
def print(self, stats, msgcount, dialog):
|
||||
for stat in stats:
|
||||
print(stat.name, ': ', stat.getValue(count), sep='')
|
||||
print(stat.name, ': ', stat.getValue(msgcount), sep='')
|
||||
|
||||
|
||||
class jsonOut(Output):
|
||||
|
||||
|
@ -45,8 +46,8 @@ class jsonOut(Output):
|
|||
json.dump(self.outbuff, self.file, indent=4)
|
||||
self.file.close()
|
||||
|
||||
def print(self, stats, count, dialog):
|
||||
jsonpre = {"count": count, 'chatid': dialog.id, 'chatname': dialog.name}
|
||||
def print(self, stats, msgcount, dialog):
|
||||
jsonpre = {"count": msgcount, 'chatid': dialog.id, 'chatname': dialog.name}
|
||||
for stat in stats:
|
||||
jsonpre[stat.name] = stat.getAll(count)
|
||||
jsonpre[stat.name] = stat.getAll(msgcount)
|
||||
self.outbuff.append(jsonpre)
|
||||
|
|
Loading…
Reference in New Issue