From d32af73933649f48f1856bfefdad1d0f057ddddb Mon Sep 17 00:00:00 2001 From: MrBesen Date: Thu, 7 Jun 2018 21:09:49 +0200 Subject: [PATCH] download all chats at once. --- .gitignore | 3 + LICENSE | 0 README.md | 35 +++++++- telegramApi.py | 216 +++++++++++++++++++++++++++---------------------- 4 files changed, 157 insertions(+), 97 deletions(-) mode change 100644 => 100755 .gitignore mode change 100644 => 100755 LICENSE mode change 100644 => 100755 README.md diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index 8605e00..93d2cec --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ out/chat.xml out/media/* *.session +config.ini +.idea +TODO \ No newline at end of file diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 index bddd99f..f98c41c --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ requirements: * python3 * [telethon](https://github.com/LonamiWebs/Telethon) * [TelegramAPI-ID](https://core.telegram.org/api/obtaining_api_id) + install requirements: ``` sudo apt install python3-pip git @@ -12,7 +13,9 @@ pip3 install telethon ``` get the downloader: ```git clone https://github.com/mrbesen/TelegramChatDownloader.git``` -open the file telegramApi.py and enter your API ID, token and username. + + +open the file config.ini and enter your API ID, hash and username. Then run: ``` @@ -21,7 +24,35 @@ cd TelegramChatDownloader/ ``` ##### Output Format The chat structure is stored as "chat.xml" in the folder "out/" -All Media-files are Stored in the Folder out/media/ +All Media-files are Stored in the folder out/media/ Stickers get downloaded too, but they wont get a file-postfix. They are stored in the [WEBP](https://de.wikipedia.org/wiki/WebP) format. + +A sample chat.xml could look like this: +``` + + + 1470214908 + Hi! + 1 + + + 1470214882 + Heyo + 0 + + + 1470214756 + Look + 1 + 0 + + + 1470070908 + Thats sick + 0 + + +``` + diff --git a/telegramApi.py b/telegramApi.py index f1053c3..55c1fb7 100755 --- a/telegramApi.py +++ b/telegramApi.py @@ -1,124 +1,150 @@ #!/usr/bin/env python3 -from telethon import TelegramClient, events -from datetime import timedelta import configparser -import sys import os +import sys +from datetime import timedelta +from telethon import TelegramClient, events +from telethon.tl.types import User, Channel + + +def strip(str): + return ''.join(e for e in str if e.isalnum()) + +def isBot(dialog): + if not isinstance(d.entity, User): + return False + return d.entity.bot + +def isChannel(dialog): + return isinstance(dialog, Channel) + +def printDialog(id, d): + #print(d) + # print (d.id, " ", d.name," ", d.pinned) + print('{0:2d} | {1:14d} | {2:30} | {3:1}'.format(id, d.id, d.name, d.pinned)) -def printDialog(id , d): -# print (d.id, " ", d.name," ", d.pinned) - print('{0:2d} | {1:14d} | {2:30} | {3:1}'.format(id, d.id,d.name,d.pinned)) def saveMessage(message, file, dlid, ownid): -# print(message) - file.write(' \n ') - file.write(message.date.strftime("%s")) - file.write('\n ') - file.write(message.message) - file.write('\n ') - file.write(str(int(ownid==message.from_id))) - file.write('\n') - #dl and write media if exist - out = None - if message.media != None: - file.write(' ') - file.write(str(dlid)) - file.write('\n') - out = message + out = None + if message.message: + # print(message) + file.write(' \n ') + file.write(message.date.strftime("%s")) + file.write('\n ') + file.write(message.message) + file.write('\n ') + file.write(str(int(ownid == message.from_id))) + file.write('\n') + # dl and write media if exist + if message.media != None: + file.write(' ') + file.write(str(dlid)) + file.write('\n') + out = message + + file.write(' \n') + return out - file.write(' \n') - return out def createFolder(path): - if not os.path.exists(path): - os.makedirs(path) + if not os.path.exists(path): + os.makedirs(path) + class DelayedDownload: - """A class to store the data, for other methods to download media later""" - message=None - id=1 - def __init__(self, _id, msg): - self.id = _id - self.message = msg + """A class to store the data, for other methods to download media later""" + message = None + id = 1 + + def __init__(self, _id, msg): + self.id = _id + self.message = msg -#================================== -#MAIN program +def downloadChat(dialog, filename, downloadmedia): + print ("selected: ", dialog.name, 'retriving chat!') -#read config + chat = client.get_messages(dialog, limit=2000000) + print("retrived ", len(chat), " Messages.") + + toDL = [] # list of DelayedDownload + + createFolder('out/') + + while True: + try: + fout = open('out/' + filename + '.xml', 'x') + break + except FileExistsError: + get = input('Override out/' + filename + '.xml [y/n]?') + if get == 'y' or get == 'Y': + # delete + os.remove('out/' + filename + '.xml') + elif get == 'n' or get == 'N': + # exit + print ("Bye.") + exit(2) + + fout.write('\n') + for c in chat: + dl = saveMessage(c, fout, len(toDL), me.id) + if dl != None: + if dl.media != None: + # here is something to download later + toDL.append(DelayedDownload(len(toDL), c)) + fout.write('\n') + fout.close() + if(downloadmedia): + createFolder('out/media/') + print('Chat structure stored. Downloading Media - this may take a while!') + for dl in toDL: + print ('.', end='') + client.download_media(dl.message, "out/media/" + str(dl.id)) + print('\n', len(toDL), " Media Files Downloaded.") + +# ================================== +# MAIN program + +# read config try: - config = configparser.ConfigParser() - config.read('config.ini') - api_id = int(config.get('Main','api_id')) - api_hash = config.get('Main','api_hash') - workers = int(config.get('Main','workers')) - session_name = config.get('Main','user') + config = configparser.ConfigParser() + config.read('config.ini') + api_id = int(config.get('Main', 'api_id')) + api_hash = config.get('Main', 'api_hash') + workers = int(config.get('Main', 'workers')) + session_name = config.get('Main', 'user') except (configparser.NoSectionError, configparser.NoOptionError, ValueError): - print('invalid config.ini') - exit(3) + print('invalid config.ini') + exit(3) # create connection client = TelegramClient(session_name, api_id, api_hash, update_workers=workers, spawn_read_thread=True) client.start() me = client.get_me() -#get dialogs +# get dialogs dialogs = client.get_dialogs(limit=100) -print ("chats loaded. (" , len(dialogs), ")") -#print chats +print ("chats loaded. (", len(dialogs), ")") +# print chats id = 0 -#table header -print ('ID | Internal ID | Username | pinned\n———+————————————————+————————————————————————————————+———————') -#content +# table header +print ( + 'ID | Internal ID | Username | pinned\n———+————————————————+————————————————————————————————+———————') +# content for d in dialogs: - printDialog(id,d) - id = id+1 -get = int(input("Please Enter Chat ID: ")) -if get < 0 or get >= id: - print ("Unknown Chat ID!") - exit(1) + printDialog(id, d) + id = id + 1 +get = input("Please Enter Chat ID or all: ") +if get == 'all': + for d in dialogs: + if not isBot(d) and d.entity.id != me.id and not isChannel(d): + downloadChat(d, strip(d.name), False) +elif int(get) < 0 or int(get) >= id: + print ("Unknown Chat ID!") + exit(1) +else: + downloadChat(dialogs[int(get)], 'chat', True) -selectedDialog = dialogs[get] - -print ("selected: ", selectedDialog.name, 'retriving chat!') - -chat = client.get_messages(selectedDialog, limit=2000000) -print("retrived ", len(chat), " Messages.") - -toDL = []#list of DelayedDownload - -createFolder('out/') - -while True: - try: - fout = open('out/chat.xml','x') - break - except FileExistsError: - get = input("Override out/chat.xml [y/n]?") - if get == 'y' or get == 'Y': - #delete - os.remove('out/chat.xml') - elif get == 'n' or get == 'N': - #exit - print ("Bye.") - exit(2) - -fout.write('\n') -for c in chat: - dl = saveMessage(c, fout, len(toDL),me.id) - if dl != None: - if dl.media != None: - #here is something to download later - toDL.append(DelayedDownload(len(toDL),c)) -fout.write('\n') -fout.close() - -createFolder('out/media/') -print('Chat structure stored. Downloading Media - this may take a while!') -for dl in toDL: - print ('.', end='') - client.download_media(dl.message, "out/media/" + str(dl.id)) -print('\n', len(toDL), " Media Files Downloaded.") print ('End.')