download all chats at once.

master
MrBesen 4 years ago
parent 16a6f4e7d7
commit d32af73933
  1. 3
      .gitignore
  2. 0
      LICENSE
  3. 35
      README.md
  4. 226
      telegramApi.py

3
.gitignore vendored

@ -1,3 +1,6 @@
out/chat.xml
out/media/*
*.session
config.ini
.idea
TODO

@ -5,6 +5,7 @@ requirements:
* python3
* [telethon](https://github.com/LonamiWebs/Telethon)
* [TelegramAPI-ID](https://core.telegram.org/api/obtaining_api_id)
install requirements:
```
sudo apt install python3-pip git
@ -12,7 +13,9 @@ pip3 install telethon
```
get the downloader:
```git clone https://github.com/mrbesen/TelegramChatDownloader.git```
open the file telegramApi.py and enter your API ID, token and username.
open the file config.ini and enter your API ID, hash and username.
Then run:
```
@ -21,7 +24,35 @@ cd TelegramChatDownloader/
```
##### Output Format
The chat structure is stored as "chat.xml" in the folder "out/"
All Media-files are Stored in the Folder out/media/
All Media-files are Stored in the folder out/media/
Stickers get downloaded too, but they wont get a file-postfix.
They are stored in the [WEBP](https://de.wikipedia.org/wiki/WebP) format.
A sample chat.xml could look like this:
```
<chat>
<message>
<date>1470214908</date>
<msg>Hi!</msg>
<me>1</me>
</message>
<message>
<date>1470214882</date>
<msg>Heyo</msg>
<me>0</me>
</message>
<message>
<date>1470214756</date>
<msg>Look</msg>
<me>1</me>
<media>0</media>
</message>
<message>
<date>1470070908</date>
<msg>Thats sick</msg>
<me>0</me>
</message>
</chat>
```

@ -1,124 +1,150 @@
#!/usr/bin/env python3
from telethon import TelegramClient, events
from datetime import timedelta
import configparser
import sys
import os
import sys
from datetime import timedelta
from telethon import TelegramClient, events
from telethon.tl.types import User, Channel
def printDialog(id , d):
# print (d.id, " ", d.name," ", d.pinned)
print('{0:2d} | {1:14d} | {2:30} | {3:1}'.format(id, d.id,d.name,d.pinned))
def saveMessage(message, file, dlid, ownid):
# print(message)
file.write(' <message>\n <date>')
file.write(message.date.strftime("%s"))
file.write('</date>\n <msg>')
file.write(message.message)
file.write('</msg>\n <me>')
file.write(str(int(ownid==message.from_id)))
file.write('</me>\n')
#dl and write media if exist
out = None
if message.media != None:
file.write(' <media>')
file.write(str(dlid))
file.write('</media>\n')
out = message
file.write(' </message>\n')
return out
def strip(str):
return ''.join(e for e in str if e.isalnum())
def createFolder(path):
if not os.path.exists(path):
os.makedirs(path)
def isBot(dialog):
if not isinstance(d.entity, User):
return False
return d.entity.bot
class DelayedDownload:
"""A class to store the data, for other methods to download media later"""
message=None
id=1
def __init__(self, _id, msg):
self.id = _id
self.message = msg
def isChannel(dialog):
return isinstance(dialog, Channel)
def printDialog(id, d):
#print(d)
# print (d.id, " ", d.name," ", d.pinned)
print('{0:2d} | {1:14d} | {2:30} | {3:1}'.format(id, d.id, d.name, d.pinned))
#==================================
#MAIN program
#read config
def saveMessage(message, file, dlid, ownid):
out = None
if message.message:
# print(message)
file.write(' <message>\n <date>')
file.write(message.date.strftime("%s"))
file.write('</date>\n <msg>')
file.write(message.message)
file.write('</msg>\n <me>')
file.write(str(int(ownid == message.from_id)))
file.write('</me>\n')
# dl and write media if exist
if message.media != None:
file.write(' <media>')
file.write(str(dlid))
file.write('</media>\n')
out = message
file.write(' </message>\n')
return out
def createFolder(path):
if not os.path.exists(path):
os.makedirs(path)
class DelayedDownload:
"""A class to store the data, for other methods to download media later"""
message = None
id = 1
def __init__(self, _id, msg):
self.id = _id
self.message = msg
def downloadChat(dialog, filename, downloadmedia):
print ("selected: ", dialog.name, 'retriving chat!')
chat = client.get_messages(dialog, limit=2000000)
print("retrived ", len(chat), " Messages.")
toDL = [] # list of DelayedDownload
createFolder('out/')
while True:
try:
fout = open('out/' + filename + '.xml', 'x')
break
except FileExistsError:
get = input('Override out/' + filename + '.xml [y/n]?')
if get == 'y' or get == 'Y':
# delete
os.remove('out/' + filename + '.xml')
elif get == 'n' or get == 'N':
# exit
print ("Bye.")
exit(2)
fout.write('<chat>\n')
for c in chat:
dl = saveMessage(c, fout, len(toDL), me.id)
if dl != None:
if dl.media != None:
# here is something to download later
toDL.append(DelayedDownload(len(toDL), c))
fout.write('</chat>\n')
fout.close()
if(downloadmedia):
createFolder('out/media/')
print('Chat structure stored. Downloading Media - this may take a while!')
for dl in toDL:
print ('.', end='')
client.download_media(dl.message, "out/media/" + str(dl.id))
print('\n', len(toDL), " Media Files Downloaded.")
# ==================================
# MAIN program
# read config
try:
config = configparser.ConfigParser()
config.read('config.ini')
api_id = int(config.get('Main','api_id'))
api_hash = config.get('Main','api_hash')
workers = int(config.get('Main','workers'))
session_name = config.get('Main','user')
config = configparser.ConfigParser()
config.read('config.ini')
api_id = int(config.get('Main', 'api_id'))
api_hash = config.get('Main', 'api_hash')
workers = int(config.get('Main', 'workers'))
session_name = config.get('Main', 'user')
except (configparser.NoSectionError, configparser.NoOptionError, ValueError):
print('invalid config.ini')
exit(3)
print('invalid config.ini')
exit(3)
# create connection
client = TelegramClient(session_name, api_id, api_hash, update_workers=workers, spawn_read_thread=True)
client.start()
me = client.get_me()
#get dialogs
# get dialogs
dialogs = client.get_dialogs(limit=100)
print ("chats loaded. (" , len(dialogs), ")")
#print chats
print ("chats loaded. (", len(dialogs), ")")
# print chats
id = 0
#table header
print ('ID | Internal ID | Username | pinned\n———+————————————————+————————————————————————————————+———————')
#content
# table header
print (
'ID | Internal ID | Username | pinned\n———+————————————————+————————————————————————————————+———————')
# content
for d in dialogs:
printDialog(id,d)
id = id+1
get = int(input("Please Enter Chat ID: "))
if get < 0 or get >= id:
print ("Unknown Chat ID!")
exit(1)
selectedDialog = dialogs[get]
print ("selected: ", selectedDialog.name, 'retriving chat!')
chat = client.get_messages(selectedDialog, limit=2000000)
print("retrived ", len(chat), " Messages.")
toDL = []#list of DelayedDownload
createFolder('out/')
while True:
try:
fout = open('out/chat.xml','x')
break
except FileExistsError:
get = input("Override out/chat.xml [y/n]?")
if get == 'y' or get == 'Y':
#delete
os.remove('out/chat.xml')
elif get == 'n' or get == 'N':
#exit
print ("Bye.")
exit(2)
fout.write('<chat>\n')
for c in chat:
dl = saveMessage(c, fout, len(toDL),me.id)
if dl != None:
if dl.media != None:
#here is something to download later
toDL.append(DelayedDownload(len(toDL),c))
fout.write('</chat>\n')
fout.close()
createFolder('out/media/')
print('Chat structure stored. Downloading Media - this may take a while!')
for dl in toDL:
print ('.', end='')
client.download_media(dl.message, "out/media/" + str(dl.id))
print('\n', len(toDL), " Media Files Downloaded.")
printDialog(id, d)
id = id + 1
get = input("Please Enter Chat ID or all: ")
if get == 'all':
for d in dialogs:
if not isBot(d) and d.entity.id != me.id and not isChannel(d):
downloadChat(d, strip(d.name), False)
elif int(get) < 0 or int(get) >= id:
print ("Unknown Chat ID!")
exit(1)
else:
downloadChat(dialogs[int(get)], 'chat', True)
print ('End.')

Loading…
Cancel
Save