commit 025c2ff9cab48b2d144af2dd166f098afe6bf3de Author: mrbesen Date: Tue May 29 08:33:37 2018 +0200 Inital Commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8605e00 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +out/chat.xml +out/media/* +*.session diff --git a/README.md b/README.md new file mode 100644 index 0000000..f40bfd6 --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +### TelegramChatDownloader +A CLI program to download a chat from telegram. +##### How to install (linux only) +requirements: +* python3 +* [telethon](https://github.com/LonamiWebs/Telethon) +* [TelegramAPI-ID](https://core.telegram.org/api/obtaining_api_id) +install requirements: +```sudo apt install python3-pip git +pip3 install telethon``` +get the downloader: +```git clone https://github.com/mrbesen/TelegramChatDownloader.git``` +open the file telegramApi.py and enter your API ID, token and username. +run: +```cd TelegramChatDownloader/ +./telegramApi.py``` +##### Output Format +The chat structure is stored as "chat.xml" in the folder "out/" +All Media-files are Stored in the Folder out/media/ +Stickers get downloaded too, but they wont get a file-postfix. +They are stored in the [WEBP](https://de.wikipedia.org/wiki/WebP) format. diff --git a/telegramApi.py b/telegramApi.py new file mode 100755 index 0000000..a29969e --- /dev/null +++ b/telegramApi.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +from telethon import TelegramClient, events +from datetime import timedelta +import sys + +# EDIT HERE ↓ +api_id = api_id +api_hash = 'api_hash' +workers = 4 +session_name = 'user_name' +# EDIT HERE ↑ + +def printDialog(d): + print (d.id, " ", d.name," ", d.pinned) + +def findDialog(dialogs, id): + for d in dialogs: + if d.id == id: + return d + return None + +def saveMessage(message, file, dlid, ownid): +# print(message) + file.write(' \n ') + file.write(message.date.strftime("%s")) + file.write('\n ') + file.write(message.message) + file.write('\n ') + file.write(str(int(ownid==message.from_id))) + file.write('\n') + #dl and write media if exist + out = None + if message.media != None: + file.write(' ') + file.write(str(dlid)) + file.write('\n') + out = message + + file.write(' \n') + return out + +class DelayedDownload: + """A class to store the data, for other methods to download media later""" + message=None + id=1 + def __init__(self, _id, msg): + self.id = _id + self.message = msg + +# create connection +client = TelegramClient(session_name, api_id, api_hash, update_workers=workers, spawn_read_thread=True) +client.start() + +me = client.get_me() +#get dialogs +dialogs = client.get_dialogs(limit=100) + +print ("chats loaded. (" , len(dialogs), ")") +#print chats +for d in dialogs: + printDialog(d) +get = int(input("Please Enter Chat ID: ")) +selectedDialog = findDialog(dialogs, get) +if selectedDialog == None: + print ("Unknown Chat ID!") + exit(1) + +print ("selected: ", selectedDialog.name, 'retriving chat!') + +chat = client.get_messages(selectedDialog, limit=2000000) +print("retrived ", len(chat), " Messages.") + +toDL = []#list of messages, where media should be downloaded + +fout = open('out/chat.xml','x') +fout.write('\n') +for c in chat: + dl = saveMessage(c, fout, len(toDL),me.id) + if dl != None: + if dl.media != None: + #here is something to download later + toDL.append(DelayedDownload(len(toDL),c)) +fout.write('\n') +fout.close() + +print('Chat structure stored. Downloading Media - this may take a while!') +for dl in toDL: + print ('.', end='') + client.download_media(dl.message, "out/media/" + str(dl.id)) +print('\n', len(toDL), " Media Files Downloaded.") +print ('End.')