TelegramChatDownloader/telegramApi.py

151 lines
4.3 KiB
Python
Executable File

#!/usr/bin/env python3
import configparser
import os
import sys
from datetime import timedelta
from telethon import TelegramClient, events
from telethon.tl.types import User, Channel
def strip(str):
return ''.join(e for e in str if e.isalnum())
def isBot(dialog):
if not isinstance(d.entity, User):
return False
return d.entity.bot
def isChannel(dialog):
return isinstance(dialog, Channel)
def printDialog(id, d):
#print(d)
# print (d.id, " ", d.name," ", d.pinned)
print('{0:2d} | {1:14d} | {2:30} | {3:1}'.format(id, d.id, d.name, d.pinned))
def saveMessage(message, file, dlid, ownid):
out = None
if message.message:
# print(message)
file.write(' <message>\n <date>')
file.write(message.date.strftime("%s"))
file.write('</date>\n <msg>')
file.write(message.message)
file.write('</msg>\n <me>')
file.write(str(int(ownid == message.from_id)))
file.write('</me>\n')
# dl and write media if exist
if message.media != None:
file.write(' <media>')
file.write(str(dlid))
file.write('</media>\n')
out = message
file.write(' </message>\n')
return out
def createFolder(path):
if not os.path.exists(path):
os.makedirs(path)
class DelayedDownload:
"""A class to store the data, for other methods to download media later"""
message = None
id = 1
def __init__(self, _id, msg):
self.id = _id
self.message = msg
def downloadChat(dialog, filename, downloadmedia):
print ("selected: ", dialog.name, 'retriving chat!')
chat = client.get_messages(dialog, limit=2000000)
print("retrived ", len(chat), " Messages.")
toDL = [] # list of DelayedDownload
createFolder('out/')
while True:
try:
fout = open('out/' + filename + '.xml', 'x')
break
except FileExistsError:
get = input('Override out/' + filename + '.xml [y/n]?')
if get == 'y' or get == 'Y':
# delete
os.remove('out/' + filename + '.xml')
elif get == 'n' or get == 'N':
# exit
print ("Bye.")
exit(2)
fout.write('<chat>\n')
for c in chat:
dl = saveMessage(c, fout, len(toDL), me.id)
if dl != None:
if dl.media != None:
# here is something to download later
toDL.append(DelayedDownload(len(toDL), c))
fout.write('</chat>\n')
fout.close()
if(downloadmedia):
createFolder('out/media/')
print('Chat structure stored. Downloading Media - this may take a while!')
for dl in toDL:
print ('.', end='')
client.download_media(dl.message, "out/media/" + str(dl.id))
print('\n', len(toDL), " Media Files Downloaded.")
# ==================================
# MAIN program
# read config
try:
config = configparser.ConfigParser()
config.read('config.ini')
api_id = int(config.get('Main', 'api_id'))
api_hash = config.get('Main', 'api_hash')
workers = int(config.get('Main', 'workers'))
session_name = config.get('Main', 'user')
except (configparser.NoSectionError, configparser.NoOptionError, ValueError):
print('invalid config.ini')
exit(3)
# create connection
client = TelegramClient(session_name, api_id, api_hash, update_workers=workers, spawn_read_thread=True)
client.start()
me = client.get_me()
# get dialogs
dialogs = client.get_dialogs(limit=100)
print ("chats loaded. (", len(dialogs), ")")
# print chats
id = 0
# table header
print (
'ID | Internal ID | Username | pinned\n———+————————————————+————————————————————————————————+———————')
# content
for d in dialogs:
printDialog(id, d)
id = id + 1
get = input("Please Enter Chat ID or all: ")
if get == 'all':
for d in dialogs:
if not isBot(d) and d.entity.id != me.id and not isChannel(d):
downloadChat(d, strip(d.name), False)
elif int(get) < 0 or int(get) >= id:
print ("Unknown Chat ID!")
exit(1)
else:
downloadChat(dialogs[int(get)], 'chat', True)
print ('End.')