download all chats at once.

This commit is contained in:
MrBesen 2018-06-07 21:09:49 +02:00
parent 16a6f4e7d7
commit d32af73933
4 changed files with 157 additions and 97 deletions

3
.gitignore vendored Normal file → Executable file
View File

@ -1,3 +1,6 @@
out/chat.xml out/chat.xml
out/media/* out/media/*
*.session *.session
config.ini
.idea
TODO

0
LICENSE Normal file → Executable file
View File

35
README.md Normal file → Executable file
View File

@ -5,6 +5,7 @@ requirements:
* python3 * python3
* [telethon](https://github.com/LonamiWebs/Telethon) * [telethon](https://github.com/LonamiWebs/Telethon)
* [TelegramAPI-ID](https://core.telegram.org/api/obtaining_api_id) * [TelegramAPI-ID](https://core.telegram.org/api/obtaining_api_id)
install requirements: install requirements:
``` ```
sudo apt install python3-pip git sudo apt install python3-pip git
@ -12,7 +13,9 @@ pip3 install telethon
``` ```
get the downloader: get the downloader:
```git clone https://github.com/mrbesen/TelegramChatDownloader.git``` ```git clone https://github.com/mrbesen/TelegramChatDownloader.git```
open the file telegramApi.py and enter your API ID, token and username.
open the file config.ini and enter your API ID, hash and username.
Then run: Then run:
``` ```
@ -21,7 +24,35 @@ cd TelegramChatDownloader/
``` ```
##### Output Format ##### Output Format
The chat structure is stored as "chat.xml" in the folder "out/" The chat structure is stored as "chat.xml" in the folder "out/"
All Media-files are Stored in the Folder out/media/ All Media-files are Stored in the folder out/media/
Stickers get downloaded too, but they wont get a file-postfix. Stickers get downloaded too, but they wont get a file-postfix.
They are stored in the [WEBP](https://de.wikipedia.org/wiki/WebP) format. They are stored in the [WEBP](https://de.wikipedia.org/wiki/WebP) format.
A sample chat.xml could look like this:
```
<chat>
<message>
<date>1470214908</date>
<msg>Hi!</msg>
<me>1</me>
</message>
<message>
<date>1470214882</date>
<msg>Heyo</msg>
<me>0</me>
</message>
<message>
<date>1470214756</date>
<msg>Look</msg>
<me>1</me>
<media>0</media>
</message>
<message>
<date>1470070908</date>
<msg>Thats sick</msg>
<me>0</me>
</message>
</chat>
```

View File

@ -1,124 +1,150 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from telethon import TelegramClient, events
from datetime import timedelta
import configparser import configparser
import sys
import os import os
import sys
from datetime import timedelta
from telethon import TelegramClient, events
from telethon.tl.types import User, Channel
def strip(str):
return ''.join(e for e in str if e.isalnum())
def isBot(dialog):
if not isinstance(d.entity, User):
return False
return d.entity.bot
def isChannel(dialog):
return isinstance(dialog, Channel)
def printDialog(id, d):
#print(d)
# print (d.id, " ", d.name," ", d.pinned)
print('{0:2d} | {1:14d} | {2:30} | {3:1}'.format(id, d.id, d.name, d.pinned))
def printDialog(id , d):
# print (d.id, " ", d.name," ", d.pinned)
print('{0:2d} | {1:14d} | {2:30} | {3:1}'.format(id, d.id,d.name,d.pinned))
def saveMessage(message, file, dlid, ownid): def saveMessage(message, file, dlid, ownid):
# print(message) out = None
file.write(' <message>\n <date>') if message.message:
file.write(message.date.strftime("%s")) # print(message)
file.write('</date>\n <msg>') file.write(' <message>\n <date>')
file.write(message.message) file.write(message.date.strftime("%s"))
file.write('</msg>\n <me>') file.write('</date>\n <msg>')
file.write(str(int(ownid==message.from_id))) file.write(message.message)
file.write('</me>\n') file.write('</msg>\n <me>')
#dl and write media if exist file.write(str(int(ownid == message.from_id)))
out = None file.write('</me>\n')
if message.media != None: # dl and write media if exist
file.write(' <media>') if message.media != None:
file.write(str(dlid)) file.write(' <media>')
file.write('</media>\n') file.write(str(dlid))
out = message file.write('</media>\n')
out = message
file.write(' </message>\n')
return out
file.write(' </message>\n')
return out
def createFolder(path): def createFolder(path):
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
class DelayedDownload: class DelayedDownload:
"""A class to store the data, for other methods to download media later""" """A class to store the data, for other methods to download media later"""
message=None message = None
id=1 id = 1
def __init__(self, _id, msg):
self.id = _id def __init__(self, _id, msg):
self.message = msg self.id = _id
self.message = msg
#================================== def downloadChat(dialog, filename, downloadmedia):
#MAIN program print ("selected: ", dialog.name, 'retriving chat!')
#read config chat = client.get_messages(dialog, limit=2000000)
print("retrived ", len(chat), " Messages.")
toDL = [] # list of DelayedDownload
createFolder('out/')
while True:
try:
fout = open('out/' + filename + '.xml', 'x')
break
except FileExistsError:
get = input('Override out/' + filename + '.xml [y/n]?')
if get == 'y' or get == 'Y':
# delete
os.remove('out/' + filename + '.xml')
elif get == 'n' or get == 'N':
# exit
print ("Bye.")
exit(2)
fout.write('<chat>\n')
for c in chat:
dl = saveMessage(c, fout, len(toDL), me.id)
if dl != None:
if dl.media != None:
# here is something to download later
toDL.append(DelayedDownload(len(toDL), c))
fout.write('</chat>\n')
fout.close()
if(downloadmedia):
createFolder('out/media/')
print('Chat structure stored. Downloading Media - this may take a while!')
for dl in toDL:
print ('.', end='')
client.download_media(dl.message, "out/media/" + str(dl.id))
print('\n', len(toDL), " Media Files Downloaded.")
# ==================================
# MAIN program
# read config
try: try:
config = configparser.ConfigParser() config = configparser.ConfigParser()
config.read('config.ini') config.read('config.ini')
api_id = int(config.get('Main','api_id')) api_id = int(config.get('Main', 'api_id'))
api_hash = config.get('Main','api_hash') api_hash = config.get('Main', 'api_hash')
workers = int(config.get('Main','workers')) workers = int(config.get('Main', 'workers'))
session_name = config.get('Main','user') session_name = config.get('Main', 'user')
except (configparser.NoSectionError, configparser.NoOptionError, ValueError): except (configparser.NoSectionError, configparser.NoOptionError, ValueError):
print('invalid config.ini') print('invalid config.ini')
exit(3) exit(3)
# create connection # create connection
client = TelegramClient(session_name, api_id, api_hash, update_workers=workers, spawn_read_thread=True) client = TelegramClient(session_name, api_id, api_hash, update_workers=workers, spawn_read_thread=True)
client.start() client.start()
me = client.get_me() me = client.get_me()
#get dialogs # get dialogs
dialogs = client.get_dialogs(limit=100) dialogs = client.get_dialogs(limit=100)
print ("chats loaded. (" , len(dialogs), ")") print ("chats loaded. (", len(dialogs), ")")
#print chats # print chats
id = 0 id = 0
#table header # table header
print ('ID | Internal ID | Username | pinned\n———+————————————————+————————————————————————————————+———————') print (
#content 'ID | Internal ID | Username | pinned\n———+————————————————+————————————————————————————————+———————')
# content
for d in dialogs: for d in dialogs:
printDialog(id,d) printDialog(id, d)
id = id+1 id = id + 1
get = int(input("Please Enter Chat ID: ")) get = input("Please Enter Chat ID or all: ")
if get < 0 or get >= id: if get == 'all':
print ("Unknown Chat ID!") for d in dialogs:
exit(1) if not isBot(d) and d.entity.id != me.id and not isChannel(d):
downloadChat(d, strip(d.name), False)
elif int(get) < 0 or int(get) >= id:
print ("Unknown Chat ID!")
exit(1)
else:
downloadChat(dialogs[int(get)], 'chat', True)
selectedDialog = dialogs[get]
print ("selected: ", selectedDialog.name, 'retriving chat!')
chat = client.get_messages(selectedDialog, limit=2000000)
print("retrived ", len(chat), " Messages.")
toDL = []#list of DelayedDownload
createFolder('out/')
while True:
try:
fout = open('out/chat.xml','x')
break
except FileExistsError:
get = input("Override out/chat.xml [y/n]?")
if get == 'y' or get == 'Y':
#delete
os.remove('out/chat.xml')
elif get == 'n' or get == 'N':
#exit
print ("Bye.")
exit(2)
fout.write('<chat>\n')
for c in chat:
dl = saveMessage(c, fout, len(toDL),me.id)
if dl != None:
if dl.media != None:
#here is something to download later
toDL.append(DelayedDownload(len(toDL),c))
fout.write('</chat>\n')
fout.close()
createFolder('out/media/')
print('Chat structure stored. Downloading Media - this may take a while!')
for dl in toDL:
print ('.', end='')
client.download_media(dl.message, "out/media/" + str(dl.id))
print('\n', len(toDL), " Media Files Downloaded.")
print ('End.') print ('End.')