Browse Source

download all chats at once.

master
MrBesen 1 year ago
parent
commit
d32af73933
4 changed files with 162 additions and 102 deletions
  1. +3
    -0
      .gitignore
  2. +0
    -0
      LICENSE
  3. +33
    -2
      README.md
  4. +126
    -100
      telegramApi.py

+ 3
- 0
.gitignore View File

@@ -1,3 +1,6 @@
out/chat.xml
out/media/*
*.session
config.ini
.idea
TODO

+ 0
- 0
LICENSE View File


+ 33
- 2
README.md View File

@@ -5,6 +5,7 @@ requirements:
* python3
* [telethon](https://github.com/LonamiWebs/Telethon)
* [TelegramAPI-ID](https://core.telegram.org/api/obtaining_api_id)

install requirements:
```
sudo apt install python3-pip git
@@ -12,7 +13,9 @@ pip3 install telethon
```
get the downloader:
```git clone https://github.com/mrbesen/TelegramChatDownloader.git```
open the file telegramApi.py and enter your API ID, token and username.


open the file config.ini and enter your API ID, hash and username.

Then run:
```
@@ -21,7 +24,35 @@ cd TelegramChatDownloader/
```
##### Output Format
The chat structure is stored as "chat.xml" in the folder "out/"
All Media-files are Stored in the Folder out/media/
All Media-files are Stored in the folder out/media/

Stickers get downloaded too, but they wont get a file-postfix.
They are stored in the [WEBP](https://de.wikipedia.org/wiki/WebP) format.

A sample chat.xml could look like this:
```
<chat>
<message>
<date>1470214908</date>
<msg>Hi!</msg>
<me>1</me>
</message>
<message>
<date>1470214882</date>
<msg>Heyo</msg>
<me>0</me>
</message>
<message>
<date>1470214756</date>
<msg>Look</msg>
<me>1</me>
<media>0</media>
</message>
<message>
<date>1470070908</date>
<msg>Thats sick</msg>
<me>0</me>
</message>
</chat>
```


+ 126
- 100
telegramApi.py View File

@@ -1,124 +1,150 @@
#!/usr/bin/env python3
from telethon import TelegramClient, events
from datetime import timedelta
import configparser
import sys
import os
import sys
from datetime import timedelta
from telethon import TelegramClient, events
from telethon.tl.types import User, Channel

def printDialog(id , d):
# print (d.id, " ", d.name," ", d.pinned)
print('{0:2d} | {1:14d} | {2:30} | {3:1}'.format(id, d.id,d.name,d.pinned))

def saveMessage(message, file, dlid, ownid):
# print(message)
file.write(' <message>\n <date>')
file.write(message.date.strftime("%s"))
file.write('</date>\n <msg>')
file.write(message.message)
file.write('</msg>\n <me>')
file.write(str(int(ownid==message.from_id)))
file.write('</me>\n')
#dl and write media if exist
out = None
if message.media != None:
file.write(' <media>')
file.write(str(dlid))
file.write('</media>\n')
out = message

file.write(' </message>\n')
return out
def strip(str):
return ''.join(e for e in str if e.isalnum())

def createFolder(path):
if not os.path.exists(path):
os.makedirs(path)
def isBot(dialog):
if not isinstance(d.entity, User):
return False
return d.entity.bot

class DelayedDownload:
"""A class to store the data, for other methods to download media later"""
message=None
id=1
def __init__(self, _id, msg):
self.id = _id
self.message = msg
def isChannel(dialog):
return isinstance(dialog, Channel)

def printDialog(id, d):
#print(d)
# print (d.id, " ", d.name," ", d.pinned)
print('{0:2d} | {1:14d} | {2:30} | {3:1}'.format(id, d.id, d.name, d.pinned))

#==================================
#MAIN program

#read config
def saveMessage(message, file, dlid, ownid):
out = None
if message.message:
# print(message)
file.write(' <message>\n <date>')
file.write(message.date.strftime("%s"))
file.write('</date>\n <msg>')
file.write(message.message)
file.write('</msg>\n <me>')
file.write(str(int(ownid == message.from_id)))
file.write('</me>\n')
# dl and write media if exist
if message.media != None:
file.write(' <media>')
file.write(str(dlid))
file.write('</media>\n')
out = message

file.write(' </message>\n')
return out


def createFolder(path):
if not os.path.exists(path):
os.makedirs(path)


class DelayedDownload:
"""A class to store the data, for other methods to download media later"""
message = None
id = 1

def __init__(self, _id, msg):
self.id = _id
self.message = msg


def downloadChat(dialog, filename, downloadmedia):
print ("selected: ", dialog.name, 'retriving chat!')

chat = client.get_messages(dialog, limit=2000000)
print("retrived ", len(chat), " Messages.")

toDL = [] # list of DelayedDownload

createFolder('out/')

while True:
try:
fout = open('out/' + filename + '.xml', 'x')
break
except FileExistsError:
get = input('Override out/' + filename + '.xml [y/n]?')
if get == 'y' or get == 'Y':
# delete
os.remove('out/' + filename + '.xml')
elif get == 'n' or get == 'N':
# exit
print ("Bye.")
exit(2)

fout.write('<chat>\n')
for c in chat:
dl = saveMessage(c, fout, len(toDL), me.id)
if dl != None:
if dl.media != None:
# here is something to download later
toDL.append(DelayedDownload(len(toDL), c))
fout.write('</chat>\n')
fout.close()
if(downloadmedia):
createFolder('out/media/')
print('Chat structure stored. Downloading Media - this may take a while!')
for dl in toDL:
print ('.', end='')
client.download_media(dl.message, "out/media/" + str(dl.id))
print('\n', len(toDL), " Media Files Downloaded.")

# ==================================
# MAIN program

# read config
try:
config = configparser.ConfigParser()
config.read('config.ini')
api_id = int(config.get('Main','api_id'))
api_hash = config.get('Main','api_hash')
workers = int(config.get('Main','workers'))
session_name = config.get('Main','user')
config = configparser.ConfigParser()
config.read('config.ini')
api_id = int(config.get('Main', 'api_id'))
api_hash = config.get('Main', 'api_hash')
workers = int(config.get('Main', 'workers'))
session_name = config.get('Main', 'user')
except (configparser.NoSectionError, configparser.NoOptionError, ValueError):
print('invalid config.ini')
exit(3)
print('invalid config.ini')
exit(3)

# create connection
client = TelegramClient(session_name, api_id, api_hash, update_workers=workers, spawn_read_thread=True)
client.start()

me = client.get_me()
#get dialogs
# get dialogs
dialogs = client.get_dialogs(limit=100)

print ("chats loaded. (" , len(dialogs), ")")
#print chats
print ("chats loaded. (", len(dialogs), ")")
# print chats
id = 0

#table header
print ('ID | Internal ID | Username | pinned\n———+————————————————+————————————————————————————————+———————')
#content
# table header
print (
'ID | Internal ID | Username | pinned\n———+————————————————+————————————————————————————————+———————')
# content
for d in dialogs:
printDialog(id,d)
id = id+1
get = int(input("Please Enter Chat ID: "))
if get < 0 or get >= id:
print ("Unknown Chat ID!")
exit(1)

selectedDialog = dialogs[get]

print ("selected: ", selectedDialog.name, 'retriving chat!')

chat = client.get_messages(selectedDialog, limit=2000000)
print("retrived ", len(chat), " Messages.")

toDL = []#list of DelayedDownload

createFolder('out/')

while True:
try:
fout = open('out/chat.xml','x')
break
except FileExistsError:
get = input("Override out/chat.xml [y/n]?")
if get == 'y' or get == 'Y':
#delete
os.remove('out/chat.xml')
elif get == 'n' or get == 'N':
#exit
print ("Bye.")
exit(2)

fout.write('<chat>\n')
for c in chat:
dl = saveMessage(c, fout, len(toDL),me.id)
if dl != None:
if dl.media != None:
#here is something to download later
toDL.append(DelayedDownload(len(toDL),c))
fout.write('</chat>\n')
fout.close()

createFolder('out/media/')
print('Chat structure stored. Downloading Media - this may take a while!')
for dl in toDL:
print ('.', end='')
client.download_media(dl.message, "out/media/" + str(dl.id))
print('\n', len(toDL), " Media Files Downloaded.")
printDialog(id, d)
id = id + 1
get = input("Please Enter Chat ID or all: ")
if get == 'all':
for d in dialogs:
if not isBot(d) and d.entity.id != me.id and not isChannel(d):
downloadChat(d, strip(d.name), False)
elif int(get) < 0 or int(get) >= id:
print ("Unknown Chat ID!")
exit(1)
else:
downloadChat(dialogs[int(get)], 'chat', True)

print ('End.')

Loading…
Cancel
Save