Merge pull request #204 from emcifuntik:master
Speech to text transcription
This commit is contained in:
commit
7ff73c2d7f
|
@ -163,6 +163,13 @@ async function wrapVoiceMessage(audioEl: AudioElement) {
|
|||
|
||||
const {svg, container: svgContainer, availW} = createWaveformBars(waveform, doc.duration);
|
||||
|
||||
const audioControlsDiv = document.createElement('div');
|
||||
audioControlsDiv.classList.add('audio-controls');
|
||||
|
||||
const audioTimelineDiv = document.createElement('div');
|
||||
audioTimelineDiv.classList.add('audio-timeline');
|
||||
audioControlsDiv.append(audioTimelineDiv);
|
||||
|
||||
const fakeSvgContainer = svgContainer.cloneNode(true) as HTMLElement;
|
||||
fakeSvgContainer.classList.add('audio-waveform-fake');
|
||||
svgContainer.classList.add('audio-waveform-background');
|
||||
|
@ -173,7 +180,67 @@ async function wrapVoiceMessage(audioEl: AudioElement) {
|
|||
|
||||
const timeDiv = document.createElement('div');
|
||||
timeDiv.classList.add('audio-time');
|
||||
audioEl.append(waveformContainer, timeDiv);
|
||||
audioTimelineDiv.append(waveformContainer, timeDiv);
|
||||
|
||||
audioEl.append(audioControlsDiv);
|
||||
|
||||
const isPremium: boolean = rootScope.premium;
|
||||
if (isPremium) {
|
||||
const speechRecognitionDiv = document.createElement('div');
|
||||
speechRecognitionDiv.classList.add('audio-to-text-button');
|
||||
const speechRecognitionIcon = document.createElement('span');
|
||||
speechRecognitionIcon.innerHTML = '→A';
|
||||
const speechRecognitionLoader = document.createElement('div');
|
||||
speechRecognitionLoader.classList.add('loader');
|
||||
speechRecognitionLoader.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 24"><style></style><rect fill="transparent" stroke-width="3" stroke-linejoin="round" rx="6" ry="6" stroke="var(--message-out-primary-color)" stroke-dashoffset="1" stroke-dasharray="32,68" width="32" height="24"></rect></svg>'
|
||||
speechRecognitionDiv.append(speechRecognitionIcon, speechRecognitionLoader);
|
||||
|
||||
const speechTextDiv = document.createElement('div');
|
||||
speechTextDiv.innerHTML = '';
|
||||
speechTextDiv.classList.add('audio-to-text');
|
||||
speechTextDiv.style.display = 'none';
|
||||
|
||||
speechRecognitionDiv.onclick = async () => {
|
||||
if (audioEl.transcriptionState == 0) {
|
||||
if (speechTextDiv.innerHTML !== '') {
|
||||
speechTextDiv.style.display = 'block';
|
||||
speechRecognitionIcon.innerHTML = '^';
|
||||
//TODO: State to enum
|
||||
audioEl.transcriptionState = 2;
|
||||
} else {
|
||||
audioEl.transcriptionState = 1;
|
||||
speechRecognitionLoader.classList.add('active');
|
||||
let transcription;
|
||||
|
||||
try {
|
||||
transcription = await audioEl.managers.appMessagesManager.transcribeAudio(message);
|
||||
} catch(err) {
|
||||
speechRecognitionLoader.classList.remove('active');
|
||||
audioEl.transcriptionState = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (transcription.pFlags.pending === true) {
|
||||
return;
|
||||
}
|
||||
|
||||
speechTextDiv.innerHTML = transcription.text;
|
||||
speechTextDiv.style.display = 'block';
|
||||
speechRecognitionIcon.innerHTML = '^';
|
||||
speechRecognitionLoader.classList.remove('active');
|
||||
audioEl.transcriptionState = 2;
|
||||
}
|
||||
} else if (audioEl.transcriptionState == 2) {
|
||||
//Hide transcription
|
||||
speechRecognitionIcon.innerHTML = '→A';
|
||||
speechTextDiv.style.display = 'none';
|
||||
audioEl.transcriptionState = 0;
|
||||
}
|
||||
};
|
||||
|
||||
audioControlsDiv.append(speechRecognitionDiv);
|
||||
audioEl.append(speechTextDiv);
|
||||
}
|
||||
|
||||
let progress = svg as any as HTMLElement;
|
||||
|
||||
|
@ -426,6 +493,7 @@ export default class AudioElement extends HTMLElement {
|
|||
public lazyLoadQueue: LazyLoadQueue;
|
||||
public loadPromises: Promise<any>[];
|
||||
public managers: AppManagers;
|
||||
public transcriptionState: number = 0;
|
||||
|
||||
private listenerSetter = new ListenerSetter();
|
||||
private onTypeDisconnect: () => void;
|
||||
|
|
|
@ -620,6 +620,29 @@ export default class ChatBubbles {
|
|||
bubble.classList.add('is-error');
|
||||
});
|
||||
|
||||
this.listenerSetter.add(rootScope)('message_transcribed', async({peerId, mid, text}) => {
|
||||
console.log(peerId, mid, text);
|
||||
if(peerId !== this.peerId) return;
|
||||
|
||||
const bubble = this.bubbles[mid];
|
||||
if(!bubble) return;
|
||||
|
||||
//TODO: Move it to AudioElement method `finishVoiceTranscription`
|
||||
const audioElement = bubble.querySelector('audio-element') as AudioElement;
|
||||
if (audioElement) {
|
||||
const speechTextDiv = audioElement.querySelector('.audio-to-text') as HTMLElement;
|
||||
const speechRecognitionIcon = audioElement.querySelector('.audio-to-text-button span');
|
||||
const speechRecognitionLoader = audioElement.querySelector('.loader');
|
||||
if (speechTextDiv && speechRecognitionIcon) {
|
||||
speechTextDiv.innerHTML = text;
|
||||
speechTextDiv.style.display = 'block';
|
||||
speechRecognitionIcon.innerHTML = '^';
|
||||
speechRecognitionLoader.classList.remove('active');
|
||||
audioElement.transcriptionState = 2;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
this.listenerSetter.add(rootScope)('album_edit', ({peerId, messages, deletedMids}) => {
|
||||
if(peerId !== this.peerId) return;
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ import LazyLoadQueueBase from '../../components/lazyLoadQueueBase';
|
|||
import deferredPromise, {CancellablePromise} from '../../helpers/cancellablePromise';
|
||||
import tsNow from '../../helpers/tsNow';
|
||||
import {randomLong} from '../../helpers/random';
|
||||
import {Chat, ChatFull, Dialog as MTDialog, DialogPeer, DocumentAttribute, InputMedia, InputMessage, InputPeerNotifySettings, InputSingleMedia, Message, MessageAction, MessageEntity, MessageFwdHeader, MessageMedia, MessageReplies, MessageReplyHeader, MessagesDialogs, MessagesFilter, MessagesMessages, MethodDeclMap, NotifyPeer, PeerNotifySettings, PhotoSize, SendMessageAction, Update, Photo, Updates, ReplyMarkup, InputPeer, InputPhoto, InputDocument, InputGeoPoint, WebPage, GeoPoint, ReportReason, MessagesGetDialogs, InputChannel, InputDialogPeer, ReactionCount, MessagePeerReaction, MessagesSearchCounter, Peer, MessageReactions, Document, InputFile, Reaction, ForumTopic as MTForumTopic, MessagesForumTopics, MessagesGetReplies, MessagesGetHistory, MessagesAffectedHistory, UrlAuthResult} from '../../layer';
|
||||
import {Chat, ChatFull, Dialog as MTDialog, DialogPeer, DocumentAttribute, InputMedia, InputMessage, InputPeerNotifySettings, InputSingleMedia, Message, MessageAction, MessageEntity, MessageFwdHeader, MessageMedia, MessageReplies, MessageReplyHeader, MessagesDialogs, MessagesFilter, MessagesMessages, MethodDeclMap, NotifyPeer, PeerNotifySettings, PhotoSize, SendMessageAction, Update, Photo, Updates, ReplyMarkup, InputPeer, InputPhoto, InputDocument, InputGeoPoint, WebPage, GeoPoint, ReportReason, MessagesGetDialogs, InputChannel, InputDialogPeer, ReactionCount, MessagePeerReaction, MessagesSearchCounter, Peer, MessageReactions, Document, InputFile, Reaction, ForumTopic as MTForumTopic, MessagesForumTopics, MessagesGetReplies, MessagesGetHistory, MessagesAffectedHistory, UrlAuthResult, MessagesTranscribedAudio} from '../../layer';
|
||||
import {ArgumentTypes, InvokeApiOptions} from '../../types';
|
||||
import {logger, LogTypes} from '../logger';
|
||||
import {ReferenceContext} from '../mtproto/referenceDatabase';
|
||||
|
@ -313,7 +313,9 @@ export class AppMessagesManager extends AppManager {
|
|||
|
||||
updateDeleteScheduledMessages: this.onUpdateDeleteScheduledMessages,
|
||||
|
||||
updateMessageExtendedMedia: this.onUpdateMessageExtendedMedia
|
||||
updateMessageExtendedMedia: this.onUpdateMessageExtendedMedia,
|
||||
|
||||
updateTranscribedAudio: this.onUpdateTranscribedAudio
|
||||
});
|
||||
|
||||
// ! Invalidate notify settings, can optimize though
|
||||
|
@ -522,6 +524,27 @@ export class AppMessagesManager extends AppManager {
|
|||
});
|
||||
}
|
||||
|
||||
public async transcribeAudio(message: any): Promise<MessagesTranscribedAudio> {
|
||||
console.log('Method called');
|
||||
const {id, peerId} = message;
|
||||
|
||||
let promise: Promise<MessagesTranscribedAudio>, params: any;
|
||||
if(peerId) {
|
||||
promise = this.apiManager.invokeApiSingleProcess({
|
||||
method: 'messages.transcribeAudio',
|
||||
params: params = {
|
||||
peer: this.appPeersManager.getInputPeerById(peerId),
|
||||
msg_id: id
|
||||
},
|
||||
processResult: (result) => {
|
||||
console.log(result);
|
||||
return result;
|
||||
}
|
||||
});
|
||||
}
|
||||
return promise;
|
||||
}
|
||||
|
||||
public async sendText(peerId: PeerId, text: string, options: MessageSendingParams & Partial<{
|
||||
entities: MessageEntity[],
|
||||
viaBotId: BotId,
|
||||
|
@ -5248,6 +5271,16 @@ export class AppMessagesManager extends AppManager {
|
|||
});
|
||||
};
|
||||
|
||||
private onUpdateTranscribedAudio = (update: Update.updateTranscribedAudio) => {
|
||||
if (update.pFlags.pending === true) return;
|
||||
|
||||
const peerId = this.appPeersManager.getPeerId(update.peer);
|
||||
const text = update.text;
|
||||
const mid = generateMessageId(update.msg_id);
|
||||
|
||||
this.rootScope.dispatchEvent('message_transcribed', {peerId, mid, text});
|
||||
};
|
||||
|
||||
public setDialogToStateIfMessageIsTop(message: MyMessage) {
|
||||
if(this.isMessageIsTopMessage(message)) {
|
||||
this.dialogsStorage.setDialogToState(this.getDialogOnly(message.peerId));
|
||||
|
|
|
@ -73,6 +73,7 @@ export type BroadcastEvents = {
|
|||
'message_edit': {storageKey: MessagesStorageKey, peerId: PeerId, mid: number, message: MyMessage},
|
||||
'message_sent': {storageKey: MessagesStorageKey, tempId: number, tempMessage: any, mid: number, message: MyMessage},
|
||||
'message_error': {storageKey: MessagesStorageKey, tempId: number, error: ApiError},
|
||||
'message_transcribed': {peerId: PeerId, mid: number, text: string},
|
||||
'messages_views': {peerId: PeerId, mid: number, views: number}[],
|
||||
'messages_reactions': {message: Message.message, changedResults: ReactionCount[]}[],
|
||||
'messages_pending': void,
|
||||
|
|
|
@ -428,7 +428,7 @@
|
|||
// &.audio-48 {
|
||||
--icon-size: 3rem;
|
||||
--icon-margin: .5625rem;
|
||||
height: var(--icon-size);
|
||||
min-height: var(--icon-size);
|
||||
|
||||
.audio-details {
|
||||
margin-top: 3px;
|
||||
|
@ -439,6 +439,66 @@
|
|||
margin-bottom: -2px;
|
||||
}
|
||||
|
||||
.audio-controls {
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
gap: .25em;
|
||||
|
||||
.audio-to-text-button {
|
||||
background: var(--message-transcribe-button);
|
||||
width: max-content;
|
||||
margin-top: 0.2rem;
|
||||
font-size: .8em;
|
||||
border-radius: 0.3rem;
|
||||
width: 2rem;
|
||||
height: 1.5rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
|
||||
span {
|
||||
line-height: 0;
|
||||
letter-spacing: .15em;
|
||||
}
|
||||
|
||||
.loader {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
opacity: 0;
|
||||
transition: opacity .2s;
|
||||
|
||||
&.active {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
svg {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
@keyframes loading {
|
||||
from { stroke-dashoffset: 100 }
|
||||
to { stroke-dashoffset: 0 }
|
||||
}
|
||||
rect {
|
||||
animation: 1s ease-in-out loading infinite;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
.audio-to-text {
|
||||
margin-left: calc(var(--icon-size)*-1 - var(--icon-margin) - var(--padding));
|
||||
margin-top: 1em;
|
||||
margin-bottom: 0.6em;
|
||||
}
|
||||
|
||||
.part {
|
||||
height: 112px !important;
|
||||
width: 112px !important;
|
||||
|
@ -584,4 +644,4 @@
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1411,7 +1411,7 @@ $bubble-border-radius-big: 12px;
|
|||
}
|
||||
|
||||
@include respond-to(handhelds) {
|
||||
height: 2.375rem;
|
||||
min-height: 2.375rem;
|
||||
--icon-margin: .6875rem;
|
||||
|
||||
.audio-details {
|
||||
|
@ -1440,7 +1440,7 @@ $bubble-border-radius-big: 12px;
|
|||
.message.audio-message,
|
||||
.message.voice-message {
|
||||
// width: 335px;
|
||||
max-width: unquote("min(100%, 335px)") !important;
|
||||
max-width: unquote("min(100%, 364px)") !important;
|
||||
|
||||
@include respond-to(handhelds) {
|
||||
// width: 280px;
|
||||
|
@ -2718,6 +2718,10 @@ $bubble-border-radius-big: 12px;
|
|||
color: var(--message-out-primary-color);
|
||||
}
|
||||
|
||||
.audio-to-text-button {
|
||||
background: var(--message-transcribe-button-out);
|
||||
}
|
||||
|
||||
/* html:not(.is-firefox) */ &-wrapper {
|
||||
@include respond-to(medium-screens) {
|
||||
transform: scale(1) translateX(calc((var(--chat-input-size) + #{$btn-send-margin}) * -1));
|
||||
|
|
|
@ -202,7 +202,6 @@
|
|||
padding-inline-start: var(--padding-left);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
cursor: pointer;
|
||||
// position: relative;
|
||||
user-select: none;
|
||||
|
|
|
@ -256,6 +256,8 @@ $chat-input-inner-padding-handhelds: .25rem;
|
|||
--light-filled-message-primary-color: var(--light-filled-primary-color);
|
||||
--message-secondary-color: var(--secondary-color);
|
||||
--message-error-color: var(--danger-color);
|
||||
--message-transcribe-button: #e8f3ff;
|
||||
--message-transcribe-button-out: #cceebf;
|
||||
|
||||
--message-out-link-color: var(--link-color);
|
||||
--message-out-status-color: var(--message-out-primary-color);
|
||||
|
@ -322,6 +324,8 @@ $chat-input-inner-padding-handhelds: .25rem;
|
|||
--message-checkbox-border-color: #fff;
|
||||
--message-secondary-color: var(--secondary-color);
|
||||
--message-error-color: #fff;
|
||||
--message-transcribe-button: #2a2a3c;
|
||||
--message-transcribe-button-out: #8373d3;
|
||||
|
||||
--message-out-link-color: #fff;
|
||||
--message-out-status-color: #fff;
|
||||
|
|
Loading…
Reference in New Issue