2019-07-19 13:47:49 +02:00
#!/usr/bin/env python3
from telethon import TelegramClient , sync
2019-07-19 18:29:21 +02:00
from telethon . tl . types import User
2019-07-19 13:47:49 +02:00
from timeit import default_timer as timer
import configparser
import out
import math , datetime
from colorama import Fore , Style
2020-07-01 11:38:05 +02:00
from collections import Counter
import re
2019-07-19 13:47:49 +02:00
2019-07-19 18:29:21 +02:00
2019-07-19 13:47:49 +02:00
class Stat :
name = None
def __init__ ( self , name ) :
self . name = name
def addMsg ( self , msgnum , msg , chat ) :
pass
def getValue ( self , count ) :
out = ' '
all = self . getAll ( count )
if len ( all ) == 0 :
return ' '
2020-07-01 11:38:05 +02:00
if isinstance ( all , list ) and len ( all ) == 1 :
2019-07-19 13:47:49 +02:00
return next ( iter ( all ) )
for name , val in all . items ( ) :
if out :
out = out + " , "
out = out + name + " : " + str ( val )
return out
2020-07-01 11:38:05 +02:00
def getAll ( self , msgcount ) :
2019-07-19 13:47:49 +02:00
pass
def parse ( self , val ) :
return str ( val )
class Countable ( Stat ) :
acc = 0
2020-07-01 11:38:05 +02:00
#mult = 0
2019-07-19 13:47:49 +02:00
min = 1 << 31
max = 0
2019-07-19 18:29:21 +02:00
def __init__ ( self , name ) :
self . acc = 0
2020-07-01 11:38:05 +02:00
#self.mult = 0
2019-07-19 18:29:21 +02:00
self . min = 1 << 31
self . max = 0
super ( ) . __init__ ( name )
2019-07-19 13:47:49 +02:00
def getMin ( self ) :
return self . parse ( self . min )
def getMax ( self ) :
return self . parse ( self . max )
def getAcc ( self ) :
return self . parse ( self . acc )
def getAvg ( self , count ) :
return self . parse ( int ( self . acc / count ) )
2020-07-01 11:38:05 +02:00
# def getGAvg(self):
# return self.parse(math.sqrt(self.mult))
2019-07-19 13:47:49 +02:00
2020-07-01 11:38:05 +02:00
def getAll ( self , msgcount ) :
return { ' min ' : self . getMin ( ) , ' max ' : self . getMax ( ) , ' count ' : self . getAcc ( ) , ' avg ' : self . getAvg ( msgcount ) }
# ,'gavg': self.getGAvg()}
2019-07-19 13:47:49 +02:00
def addMsg ( self , msgnum , msg , chat ) :
count = self . count ( msgnum , msg , chat )
if count > self . max :
self . max = count
if count < self . min :
self . min = count
self . acc = self . acc + count
2020-07-01 11:38:05 +02:00
# self.mult = self.mult + (count * count)
2019-07-19 13:47:49 +02:00
def count ( self , msgnum , msg , chat ) :
pass
class CharCount ( Countable ) :
def __init__ ( self , name : str = " CharCount " ) :
super ( ) . __init__ ( name )
def count ( self , msgnum , msg , chat ) :
if msg . message != None :
return len ( msg . message )
return 0
class Dist ( Countable ) :
prev = 0
def __init__ ( self , name : str = " Distanz " ) :
2019-07-19 18:29:21 +02:00
self . prev = 0
2019-07-19 13:47:49 +02:00
super ( ) . __init__ ( name )
def count ( self , msgnum , msg , chat ) :
if msg . date != None :
old = self . prev
self . prev = msg . date
if old == 0 :
return 0
return int ( ( old - msg . date ) . total_seconds ( ) )
def parse ( self , val ) :
return str ( datetime . timedelta ( seconds = val ) )
def getAvg ( self , count ) :
if count < 2 :
return - 1
2019-07-19 18:29:21 +02:00
return self . parse ( int (
self . acc / ( count - 1 ) ) ) # da hier die abstände zwischen nachrichten betrachtet werden ist es um 1 kleiner
2019-07-19 13:47:49 +02:00
class DiscreteCount ( Stat ) :
list = { }
2019-07-19 18:29:21 +02:00
def __init__ ( self , name ) :
self . list = { }
super ( ) . __init__ ( name )
2019-07-19 13:47:49 +02:00
def getAll ( self , count ) :
return self . list
2020-07-01 11:38:05 +02:00
def inc ( self , key ) :
self . list [ key ] = self . list . get ( key , 0 ) + 1
2019-07-19 13:47:49 +02:00
class UserCount ( DiscreteCount ) :
def __init__ ( self , name : str = " Msg/User " ) :
super ( ) . __init__ ( name )
def addMsg ( self , msgnum , msg , chat ) :
2020-07-01 11:38:05 +02:00
self . inc ( msg . from_id )
2019-07-19 13:47:49 +02:00
2020-07-01 11:38:05 +02:00
def getAll ( self , msgcount ) :
2019-07-19 13:47:49 +02:00
# replace ids with names
newdict = { }
for id , count in self . list . items ( ) :
newdict [ getUsernamebyID ( id ) ] = count
return newdict
2020-07-01 11:38:05 +02:00
# prozentuale discrete metric
class UserProp ( UserCount ) :
def __init__ ( self , name : str = " % /User " ) :
super ( ) . __init__ ( name )
self . counter = UserCount ( " " )
def addMsg ( self , msgnum , msg , chat ) :
self . counter . addMsg ( msgnum , msg , chat )
def getAll ( self , msgcount ) :
# replace ids with names
newdict = { }
for id , count in self . list . items ( ) :
name = getUsernamebyID ( id )
if count :
newdict [ name ] = count / self . counter . list . get ( id )
else :
newdict [ name ] = 0
return newdict
# user welche selten nachrichten hinter einander schreiben
# 1 = der user schreibt nie 2 Nachrichten hintereinander, 0 = der User schreibt immer alle Nachrichten hintereinander, ohne das einer dazwischen quatscht
class Interactivity ( UserProp ) :
def __init__ ( self , name : str = " User Interactivity " ) :
super ( ) . __init__ ( name )
self . lastmsg = None
# nachrichten werden von jüngster zu ältester durch gegangen
def addMsg ( self , msgnum , msg , chat ) :
super ( ) . addMsg ( msgnum , msg , chat )
fromid = msg . from_id
if self . lastmsg :
if self . lastmsg != fromid :
self . inc ( fromid )
self . lastmsg = fromid
class UserEdits ( UserCount ) :
def __init__ ( self ) :
super ( ) . __init__ ( " Edits/User " )
def addMsg ( self , msgnum , msg , chat ) :
if msg . edit_date :
self . inc ( msg . from_id )
class UserAwnsers ( UserCount ) :
def __init__ ( self ) :
super ( ) . __init__ ( " AwnsersFromUser " )
def addMsg ( self , msgnum , msg , chat ) :
if msg . is_reply :
self . inc ( msg . from_id )
class MediaCounter ( UserCount ) :
def __init__ ( self ) :
super ( ) . __init__ ( " Media/User " )
def addMsg ( self , msgnum , msg , chat ) :
if msg . media :
self . inc ( msg . from_id )
2021-01-22 23:11:48 +01:00
# How many messages from a user are scheduled?
# class ScheduledCounter(UserCount):
# def __init__(self):
# super().__init__("Schedules/User")
#
# def addMsg(self, msgnum, msg, chat):
# if msg.from_scheduled:
# self.inc(msg.from_id)
2020-07-01 11:38:05 +02:00
class MaxCounter ( DiscreteCount ) :
count = 0
def __init__ ( self , name , c : int = 5 ) :
super ( ) . __init__ ( name )
self . count = c
def getAll ( self , msgcount ) :
out = Counter ( self . list )
return dict ( out . most_common ( self . count ) )
class WordCounter ( MaxCounter ) :
def __init__ ( self , name : str = " WordCounts " , c : int = 5 ) :
super ( ) . __init__ ( name , c )
def addMsg ( self , msgnum , msg , chat ) :
txt = msg . message
if txt is not None :
txt = re . findall ( r ' \ w+ ' , txt . lower ( ) )
for i in txt :
i = i . strip ( ) . lower ( )
if i :
self . inc ( i )
class MentionCounter ( MaxCounter ) :
def __init__ ( self , name : str = " MentionCounter " , c : int = 5 ) :
super ( ) . __init__ ( name , c )
def addMsg ( self , msgnum , msg , chat ) :
txt = msg . message
if txt is not None :
txt = re . findall ( r ' \ s@ \ w { 5,} ' , txt . lower ( ) )
for i in txt :
i = i . strip ( ) . lower ( )
if i :
self . inc ( i )
class MaxCharCounter ( MaxCounter ) :
Blacklist = " "
def __init__ ( self , name : str = " MaxCharCounter " , c : int = 5 ) :
super ( ) . __init__ ( name , c )
def addMsg ( self , msgnum , msg , chat ) :
txt = msg . message
if txt is not None :
txt = txt . replace ( " " , " " ) . lower ( )
for i in txt :
if self . checkChar ( i ) :
if i not in self . Blacklist :
self . inc ( i )
def checkChar ( self , char ) :
return True
class EmojiCounter ( MaxCharCounter ) :
Blacklist = " äÄöÖüÜßẞ̒ ͎ ' ^°~`…·–÷× "
def __init__ ( self , name : str = " EmojiCounter " , c : int = 5 ) :
super ( ) . __init__ ( name , c )
def checkChar ( self , char ) :
return ord ( char ) > 0x7F
2019-07-19 13:47:49 +02:00
def printDialog ( interid , d ) : # debug only
color = ' '
if interid & 1 :
color = Fore . LIGHTBLACK_EX
2020-07-01 11:38:05 +02:00
print ( color + ' {0:3d} | {1:14d} | {2:30} | {3:1} ' . format ( interid , d . id , getDialogType ( d ) + d . name , d . pinned ) + Fore . RESET )
2019-07-19 18:29:21 +02:00
def getDialogType ( dialog ) :
if dialog . id < 0 : # group
return ' 👥 '
ent = dialog . entity
if isinstance ( ent , User ) :
if ent . bot :
2020-07-01 11:38:05 +02:00
return ' BOT '
2019-07-19 18:29:21 +02:00
return ' 👤 '
2019-07-19 13:47:49 +02:00
def getUsernamebyID ( userid ) :
2020-07-01 11:38:05 +02:00
global idlookup
try :
return idlookup [ userid ]
except KeyError :
global dialogs
for d in dialogs :
if d . id == userid :
return d . name
return " Unknown "
2019-07-19 13:47:49 +02:00
2019-07-19 18:29:21 +02:00
def dialogByTgID ( tgid ) :
global dialogs
for d in dialogs :
if d . id == tgid :
return d
return None
def dialogByTgName ( name ) :
name = name . lower ( )
global dialogs
for d in dialogs :
if d . name . lower ( ) == name :
return d
return None
2019-07-19 13:47:49 +02:00
def analyseChat ( dialog , output ) :
print ( Fore . GREEN + ' Lade kompletten Chat: ' + Fore . RESET , dialog . name , sep = ' ' )
# chat = client.get_messages(selectedDialog)
chat = client . iter_messages ( dialog , limit = None )
2020-07-01 11:38:05 +02:00
statList = {
2019-07-19 18:29:21 +02:00
CharCount ( ) ,
Dist ( ) ,
2020-07-01 11:38:05 +02:00
UserCount ( ) , # oldest msg?, media types, most linked site
UserEdits ( ) ,
WordCounter ( ) ,
MentionCounter ( ) ,
2021-01-22 23:11:48 +01:00
# ScheduledCounter(),
2020-07-01 11:38:05 +02:00
EmojiCounter ( ) ,
UserAwnsers ( ) ,
MediaCounter ( ) ,
Interactivity ( )
2019-07-19 18:29:21 +02:00
}
""" print( ' before ' + Fore.MAGENTA)
outputs [ ' stdout ' ] . print ( stats , 10 , dialog )
print ( Fore . RESET + ' after ' + Fore . BLUE ) """
2019-07-19 13:47:49 +02:00
# run messure
msgnum = 0
2020-07-01 11:38:05 +02:00
for msg in chat : # reinfolge ist sehr wichtig! jüngste -< älteste
2019-07-19 13:47:49 +02:00
if msg != None :
msgnum = msgnum + 1
2020-07-01 11:38:05 +02:00
for stat in statList :
2019-07-19 13:47:49 +02:00
stat . addMsg ( msgnum , msg , dialog )
print ( Fore . BLUE , msgnum , Fore . MAGENTA + ' Nachrichten Analysiert ' + Fore . RESET )
# write to output
2020-07-01 11:38:05 +02:00
output . print ( statList , msgnum , dialog )
2019-07-19 13:47:49 +02:00
2019-07-19 18:29:21 +02:00
def parseInput ( uinput ) :
selected = [ ]
if uinput == ' all ' :
selected = dialogs
else :
splited = uinput . split ( ' , ' )
for entry in splited :
entry = entry . strip ( )
if not entry : # empty string
continue
if entry . isnumeric ( ) :
get = int ( entry )
if get > = 0 and get < dialogCount : # use id
selected . append ( dialogs [ get ] )
continue
# search by id
bytgid = dialogByTgID ( get )
if bytgid is not None :
selected . append ( bytgid )
continue
# search by name
bytgname = dialogByTgName ( entry )
if bytgname is not None :
selected . append ( bytgname )
continue
print ( Fore . YELLOW + ' Kein Chat für Eingabe gefunden: ' + Fore . BLUE , entry , Fore . RESET )
raise ValueError ( ' unknown chat ' )
return selected
2020-07-01 11:38:05 +02:00
#returns a dict with a id -> name maping
def getUsers ( dialog ) :
global client
parts = client . get_participants ( dialog )
outdict = { }
for p in parts :
name = p . first_name
if ( p . last_name != None ) :
name + = " " + p . last_name
outdict [ p . id ] = name
return outdict
2019-07-19 13:47:49 +02:00
# ==================================
# MAIN program
if __name__ == " __main__ " :
outputs = {
" stdout " : out . STDOUT ( ) ,
" json " : out . jsonOut ( )
# TODO: add csv,xml,yaml maybe sqlite or mysql?
}
# read config
try :
config = configparser . ConfigParser ( )
config . read ( ' config.ini ' )
api_id = int ( config . get ( ' Main ' , ' api_id ' ) )
api_hash = config . get ( ' Main ' , ' api_hash ' )
session_name = config . get ( ' Main ' , ' user ' )
except ( configparser . NoSectionError , configparser . NoOptionError , ValueError ) :
print ( ' invalid config.ini ' )
exit ( 3 )
client = TelegramClient ( session_name , api_id , api_hash )
client . start ( )
me = client . get_me ( )
print ( ' me.id: ' , me . id )
# get dialogs
dialogs = client . get_dialogs ( )
dialogCount = len ( dialogs )
print ( Fore . YELLOW , dialogCount , Fore . GREEN + ' Chats geladen. ' + Fore . RESET )
2019-07-19 18:29:21 +02:00
# select output
2019-07-19 13:47:49 +02:00
while True :
uinput = input ( " Bitte Ausgabemethode wählen: [stdout,json] (stdout): " ) . strip ( ) . lower ( ) ;
if uinput == " " :
uinput = " stdout "
try :
out = outputs [ uinput ]
break
except KeyError :
pass # try again
outfile = None
if out . needsFilename :
outfile = input ( " Bitte Dateinamen eingeben: " ) . strip ( )
out . open ( outfile )
2019-07-19 18:29:21 +02:00
# select chat
print (
' ID| Internal ID | Username | pinned \n ———+————————————————+————————————————————————————————+——————— ' )
2019-07-19 13:47:49 +02:00
interid = 0
for d in dialogs :
printDialog ( interid , d )
interid = interid + 1
2019-07-19 18:29:21 +02:00
while True :
uinput = input (
' Bitte ' + Fore . BLUE + ' chatid1,chatid2,... ' + Fore . RESET + ' oder " ' + Fore . BLUE + ' all ' + Fore . RESET + ' " eingeben: ' ) . strip ( ) . lower ( )
try :
selected = parseInput ( uinput )
break
except :
pass # some error, retry
2019-07-19 13:47:49 +02:00
start = timer ( )
2020-07-01 11:38:05 +02:00
idlookup = { }
2019-07-19 18:29:21 +02:00
for d in selected :
try :
2020-07-01 11:38:05 +02:00
newusers = getUsers ( d )
idlookup . update ( newusers )
2019-07-19 13:47:49 +02:00
analyseChat ( d , out )
2019-07-19 18:29:21 +02:00
except Exception as e :
2020-07-01 11:38:05 +02:00
print ( Fore . RED + ' Fehler beim bearbeiten von: ' + Fore . RESET , d . name , ' \n ' , str ( e ) )
2019-07-19 13:47:49 +02:00
out . close ( )
2019-07-19 18:29:21 +02:00
took = datetime . timedelta ( seconds = ( timer ( ) - start ) )
2019-07-19 13:47:49 +02:00
took = datetime . timedelta ( seconds = int ( took . total_seconds ( ) ) )
print ( Fore . GREEN + " Fertig. Benötigte Zeit: " + Fore . YELLOW , took , Style . RESET_ALL )