download all chats at once.

This commit is contained in:
MrBesen 2018-06-07 21:09:49 +02:00
父節點 16a6f4e7d7
當前提交 d32af73933
共有 4 個文件被更改,包括 157 次插入97 次删除

3
.gitignore vendored Normal file → Executable file
查看文件

@ -1,3 +1,6 @@
out/chat.xml
out/media/*
*.session
config.ini
.idea
TODO

0
LICENSE Normal file → Executable file
查看文件

35
README.md Normal file → Executable file
查看文件

@ -5,6 +5,7 @@ requirements:
* python3
* [telethon](https://github.com/LonamiWebs/Telethon)
* [TelegramAPI-ID](https://core.telegram.org/api/obtaining_api_id)
install requirements:
```
sudo apt install python3-pip git
@ -12,7 +13,9 @@ pip3 install telethon
```
get the downloader:
```git clone https://github.com/mrbesen/TelegramChatDownloader.git```
open the file telegramApi.py and enter your API ID, token and username.
open the file config.ini and enter your API ID, hash and username.
Then run:
```
@ -21,7 +24,35 @@ cd TelegramChatDownloader/
```
##### Output Format
The chat structure is stored as "chat.xml" in the folder "out/"
All Media-files are Stored in the Folder out/media/
All Media-files are Stored in the folder out/media/
Stickers get downloaded too, but they wont get a file-postfix.
They are stored in the [WEBP](https://de.wikipedia.org/wiki/WebP) format.
A sample chat.xml could look like this:
```
<chat>
<message>
<date>1470214908</date>
<msg>Hi!</msg>
<me>1</me>
</message>
<message>
<date>1470214882</date>
<msg>Heyo</msg>
<me>0</me>
</message>
<message>
<date>1470214756</date>
<msg>Look</msg>
<me>1</me>
<media>0</media>
</message>
<message>
<date>1470070908</date>
<msg>Thats sick</msg>
<me>0</me>
</message>
</chat>
```

查看文件

@ -1,124 +1,150 @@
#!/usr/bin/env python3
from telethon import TelegramClient, events
from datetime import timedelta
import configparser
import sys
import os
import sys
from datetime import timedelta
from telethon import TelegramClient, events
from telethon.tl.types import User, Channel
def strip(str):
return ''.join(e for e in str if e.isalnum())
def isBot(dialog):
if not isinstance(d.entity, User):
return False
return d.entity.bot
def isChannel(dialog):
return isinstance(dialog, Channel)
def printDialog(id, d):
#print(d)
# print (d.id, " ", d.name," ", d.pinned)
print('{0:2d} | {1:14d} | {2:30} | {3:1}'.format(id, d.id, d.name, d.pinned))
def printDialog(id , d):
# print (d.id, " ", d.name," ", d.pinned)
print('{0:2d} | {1:14d} | {2:30} | {3:1}'.format(id, d.id,d.name,d.pinned))
def saveMessage(message, file, dlid, ownid):
# print(message)
file.write(' <message>\n <date>')
file.write(message.date.strftime("%s"))
file.write('</date>\n <msg>')
file.write(message.message)
file.write('</msg>\n <me>')
file.write(str(int(ownid==message.from_id)))
file.write('</me>\n')
#dl and write media if exist
out = None
if message.media != None:
file.write(' <media>')
file.write(str(dlid))
file.write('</media>\n')
out = message
out = None
if message.message:
# print(message)
file.write(' <message>\n <date>')
file.write(message.date.strftime("%s"))
file.write('</date>\n <msg>')
file.write(message.message)
file.write('</msg>\n <me>')
file.write(str(int(ownid == message.from_id)))
file.write('</me>\n')
# dl and write media if exist
if message.media != None:
file.write(' <media>')
file.write(str(dlid))
file.write('</media>\n')
out = message
file.write(' </message>\n')
return out
file.write(' </message>\n')
return out
def createFolder(path):
if not os.path.exists(path):
os.makedirs(path)
if not os.path.exists(path):
os.makedirs(path)
class DelayedDownload:
"""A class to store the data, for other methods to download media later"""
message=None
id=1
def __init__(self, _id, msg):
self.id = _id
self.message = msg
"""A class to store the data, for other methods to download media later"""
message = None
id = 1
def __init__(self, _id, msg):
self.id = _id
self.message = msg
#==================================
#MAIN program
def downloadChat(dialog, filename, downloadmedia):
print ("selected: ", dialog.name, 'retriving chat!')
#read config
chat = client.get_messages(dialog, limit=2000000)
print("retrived ", len(chat), " Messages.")
toDL = [] # list of DelayedDownload
createFolder('out/')
while True:
try:
fout = open('out/' + filename + '.xml', 'x')
break
except FileExistsError:
get = input('Override out/' + filename + '.xml [y/n]?')
if get == 'y' or get == 'Y':
# delete
os.remove('out/' + filename + '.xml')
elif get == 'n' or get == 'N':
# exit
print ("Bye.")
exit(2)
fout.write('<chat>\n')
for c in chat:
dl = saveMessage(c, fout, len(toDL), me.id)
if dl != None:
if dl.media != None:
# here is something to download later
toDL.append(DelayedDownload(len(toDL), c))
fout.write('</chat>\n')
fout.close()
if(downloadmedia):
createFolder('out/media/')
print('Chat structure stored. Downloading Media - this may take a while!')
for dl in toDL:
print ('.', end='')
client.download_media(dl.message, "out/media/" + str(dl.id))
print('\n', len(toDL), " Media Files Downloaded.")
# ==================================
# MAIN program
# read config
try:
config = configparser.ConfigParser()
config.read('config.ini')
api_id = int(config.get('Main','api_id'))
api_hash = config.get('Main','api_hash')
workers = int(config.get('Main','workers'))
session_name = config.get('Main','user')
config = configparser.ConfigParser()
config.read('config.ini')
api_id = int(config.get('Main', 'api_id'))
api_hash = config.get('Main', 'api_hash')
workers = int(config.get('Main', 'workers'))
session_name = config.get('Main', 'user')
except (configparser.NoSectionError, configparser.NoOptionError, ValueError):
print('invalid config.ini')
exit(3)
print('invalid config.ini')
exit(3)
# create connection
client = TelegramClient(session_name, api_id, api_hash, update_workers=workers, spawn_read_thread=True)
client.start()
me = client.get_me()
#get dialogs
# get dialogs
dialogs = client.get_dialogs(limit=100)
print ("chats loaded. (" , len(dialogs), ")")
#print chats
print ("chats loaded. (", len(dialogs), ")")
# print chats
id = 0
#table header
print ('ID | Internal ID | Username | pinned\n———+————————————————+————————————————————————————————+———————')
#content
# table header
print (
'ID | Internal ID | Username | pinned\n———+————————————————+————————————————————————————————+———————')
# content
for d in dialogs:
printDialog(id,d)
id = id+1
get = int(input("Please Enter Chat ID: "))
if get < 0 or get >= id:
print ("Unknown Chat ID!")
exit(1)
printDialog(id, d)
id = id + 1
get = input("Please Enter Chat ID or all: ")
if get == 'all':
for d in dialogs:
if not isBot(d) and d.entity.id != me.id and not isChannel(d):
downloadChat(d, strip(d.name), False)
elif int(get) < 0 or int(get) >= id:
print ("Unknown Chat ID!")
exit(1)
else:
downloadChat(dialogs[int(get)], 'chat', True)
selectedDialog = dialogs[get]
print ("selected: ", selectedDialog.name, 'retriving chat!')
chat = client.get_messages(selectedDialog, limit=2000000)
print("retrived ", len(chat), " Messages.")
toDL = []#list of DelayedDownload
createFolder('out/')
while True:
try:
fout = open('out/chat.xml','x')
break
except FileExistsError:
get = input("Override out/chat.xml [y/n]?")
if get == 'y' or get == 'Y':
#delete
os.remove('out/chat.xml')
elif get == 'n' or get == 'N':
#exit
print ("Bye.")
exit(2)
fout.write('<chat>\n')
for c in chat:
dl = saveMessage(c, fout, len(toDL),me.id)
if dl != None:
if dl.media != None:
#here is something to download later
toDL.append(DelayedDownload(len(toDL),c))
fout.write('</chat>\n')
fout.close()
createFolder('out/media/')
print('Chat structure stored. Downloading Media - this may take a while!')
for dl in toDL:
print ('.', end='')
client.download_media(dl.message, "out/media/" + str(dl.id))
print('\n', len(toDL), " Media Files Downloaded.")
print ('End.')