mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 04:39:06 +00:00
return both en and fr in transcriptio
This commit is contained in:
@@ -6,6 +6,7 @@ Reflector GPU backend - transcriber
|
|||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
from fastapi import File
|
||||||
from modal import Image, Secret, Stub, asgi_app, method
|
from modal import Image, Secret, Stub, asgi_app, method
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
@@ -18,7 +19,7 @@ WHISPER_CACHE_DIR: str = "/cache/whisper"
|
|||||||
# Translation Model
|
# Translation Model
|
||||||
TRANSLATION_MODEL = "facebook/m2m100_418M"
|
TRANSLATION_MODEL = "facebook/m2m100_418M"
|
||||||
|
|
||||||
stub = Stub(name="reflector-transcriber")
|
stub = Stub(name="reflector-lang")
|
||||||
|
|
||||||
|
|
||||||
def download_whisper():
|
def download_whisper():
|
||||||
@@ -129,6 +130,8 @@ class Whisper:
|
|||||||
translation = result[0].strip()
|
translation = result[0].strip()
|
||||||
multilingual_transcript[target_language] = translation
|
multilingual_transcript[target_language] = translation
|
||||||
|
|
||||||
|
print(multilingual_transcript)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"text": multilingual_transcript,
|
"text": multilingual_transcript,
|
||||||
"words": words
|
"words": words
|
||||||
@@ -149,7 +152,9 @@ class Whisper:
|
|||||||
)
|
)
|
||||||
@asgi_app()
|
@asgi_app()
|
||||||
def web():
|
def web():
|
||||||
from fastapi import Depends, FastAPI, Form, HTTPException, UploadFile, status
|
from typing import List
|
||||||
|
|
||||||
|
from fastapi import Body, Depends, FastAPI, Form, HTTPException, UploadFile, status
|
||||||
from fastapi.security import OAuth2PasswordBearer
|
from fastapi.security import OAuth2PasswordBearer
|
||||||
from typing_extensions import Annotated
|
from typing_extensions import Annotated
|
||||||
|
|
||||||
@@ -174,9 +179,9 @@ def web():
|
|||||||
@app.post("/transcribe", dependencies=[Depends(apikey_auth)])
|
@app.post("/transcribe", dependencies=[Depends(apikey_auth)])
|
||||||
async def transcribe(
|
async def transcribe(
|
||||||
file: UploadFile,
|
file: UploadFile,
|
||||||
timestamp: Annotated[float, Form()] = 0,
|
|
||||||
source_language: Annotated[str, Form()] = "en",
|
source_language: Annotated[str, Form()] = "en",
|
||||||
target_language: Annotated[str, Form()] = "en"
|
target_language: Annotated[str, Form()] = "fr",
|
||||||
|
timestamp: Annotated[float, Form()] = 0.0
|
||||||
) -> TranscriptResponse:
|
) -> TranscriptResponse:
|
||||||
audio_data = await file.read()
|
audio_data = await file.read()
|
||||||
audio_suffix = file.filename.split(".")[-1]
|
audio_suffix = file.filename.split(".")[-1]
|
||||||
|
|||||||
@@ -58,7 +58,10 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
|||||||
# Update code here once this is possible.
|
# Update code here once this is possible.
|
||||||
# i.e) extract from context/session objects
|
# i.e) extract from context/session objects
|
||||||
source_language = "en"
|
source_language = "en"
|
||||||
target_language = "en"
|
|
||||||
|
# TODO: target lang is set to "fr" for demo purposes
|
||||||
|
# Revert back once language selection is implemented
|
||||||
|
target_language = "fr"
|
||||||
languages = TranslationLanguages()
|
languages = TranslationLanguages()
|
||||||
|
|
||||||
# Only way to set the target should be the UI element like dropdown.
|
# Only way to set the target should be the UI element like dropdown.
|
||||||
@@ -74,7 +77,7 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
|||||||
files=files,
|
files=files,
|
||||||
timeout=self.timeout,
|
timeout=self.timeout,
|
||||||
headers=self.headers,
|
headers=self.headers,
|
||||||
json=json_payload,
|
data=json_payload,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.logger.debug(
|
self.logger.debug(
|
||||||
@@ -84,12 +87,14 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
|||||||
result = response.json()
|
result = response.json()
|
||||||
|
|
||||||
# Sanity check for translation status in the result
|
# Sanity check for translation status in the result
|
||||||
|
translation = ""
|
||||||
if target_language in result["text"]:
|
if target_language in result["text"]:
|
||||||
text = result["text"][target_language]
|
translation = result["text"][target_language]
|
||||||
else:
|
|
||||||
text = result["text"][source_language]
|
text = result["text"][source_language]
|
||||||
|
|
||||||
transcript = Transcript(
|
transcript = Transcript(
|
||||||
text=text,
|
text=text,
|
||||||
|
translation=translation,
|
||||||
words=[
|
words=[
|
||||||
Word(
|
Word(
|
||||||
text=word["text"],
|
text=word["text"],
|
||||||
|
|||||||
@@ -34,12 +34,12 @@ class TranscriptLinerProcessor(Processor):
|
|||||||
if "." not in word.text:
|
if "." not in word.text:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
partial.translation = self.transcript.translation
|
||||||
# emit line
|
# emit line
|
||||||
await self.emit(partial)
|
await self.emit(partial)
|
||||||
|
|
||||||
# create new transcript
|
# create new transcript
|
||||||
partial = Transcript(words=[])
|
partial = Transcript(words=[])
|
||||||
|
|
||||||
self.transcript = partial
|
self.transcript = partial
|
||||||
|
|
||||||
async def _flush(self):
|
async def _flush(self):
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ class Word(BaseModel):
|
|||||||
|
|
||||||
class Transcript(BaseModel):
|
class Transcript(BaseModel):
|
||||||
text: str = ""
|
text: str = ""
|
||||||
|
translation: str = ""
|
||||||
words: list[Word] = None
|
words: list[Word] = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -84,7 +85,7 @@ class Transcript(BaseModel):
|
|||||||
words = [
|
words = [
|
||||||
Word(text=word.text, start=word.start, end=word.end) for word in self.words
|
Word(text=word.text, start=word.start, end=word.end) for word in self.words
|
||||||
]
|
]
|
||||||
return Transcript(text=self.text, words=words)
|
return Transcript(text=self.text, translation=self.translation, words=words)
|
||||||
|
|
||||||
|
|
||||||
class TitleSummary(BaseModel):
|
class TitleSummary(BaseModel):
|
||||||
|
|||||||
@@ -1,25 +1,27 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
from fastapi import Request, APIRouter
|
|
||||||
from reflector.events import subscribers_shutdown
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from reflector.logger import logger
|
|
||||||
from aiortc import RTCPeerConnection, RTCSessionDescription, MediaStreamTrack
|
|
||||||
from json import loads, dumps
|
|
||||||
from enum import StrEnum
|
from enum import StrEnum
|
||||||
|
from json import dumps, loads
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import av
|
import av
|
||||||
|
from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription
|
||||||
|
from fastapi import APIRouter, Request
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from reflector.events import subscribers_shutdown
|
||||||
|
from reflector.logger import logger
|
||||||
from reflector.processors import (
|
from reflector.processors import (
|
||||||
Pipeline,
|
|
||||||
AudioChunkerProcessor,
|
AudioChunkerProcessor,
|
||||||
|
AudioFileWriterProcessor,
|
||||||
AudioMergeProcessor,
|
AudioMergeProcessor,
|
||||||
AudioTranscriptAutoProcessor,
|
AudioTranscriptAutoProcessor,
|
||||||
AudioFileWriterProcessor,
|
FinalSummary,
|
||||||
|
Pipeline,
|
||||||
|
TitleSummary,
|
||||||
|
Transcript,
|
||||||
|
TranscriptFinalSummaryProcessor,
|
||||||
TranscriptLinerProcessor,
|
TranscriptLinerProcessor,
|
||||||
TranscriptTopicDetectorProcessor,
|
TranscriptTopicDetectorProcessor,
|
||||||
TranscriptFinalSummaryProcessor,
|
|
||||||
Transcript,
|
|
||||||
TitleSummary,
|
|
||||||
FinalSummary,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
sessions = []
|
sessions = []
|
||||||
@@ -108,6 +110,7 @@ async def rtc_offer_base(
|
|||||||
result = {
|
result = {
|
||||||
"cmd": "SHOW_TRANSCRIPTION",
|
"cmd": "SHOW_TRANSCRIPTION",
|
||||||
"text": transcript.text,
|
"text": transcript.text,
|
||||||
|
"translation": transcript.translation,
|
||||||
}
|
}
|
||||||
ctx.data_channel.send(dumps(result))
|
ctx.data_channel.send(dumps(result))
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ from typing import Annotated, Optional
|
|||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
import av
|
import av
|
||||||
import reflector.auth as auth
|
|
||||||
from fastapi import (
|
from fastapi import (
|
||||||
APIRouter,
|
APIRouter,
|
||||||
Depends,
|
Depends,
|
||||||
@@ -18,11 +17,13 @@ from fastapi import (
|
|||||||
)
|
)
|
||||||
from fastapi_pagination import Page, paginate
|
from fastapi_pagination import Page, paginate
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
from starlette.concurrency import run_in_threadpool
|
||||||
|
|
||||||
|
import reflector.auth as auth
|
||||||
from reflector.db import database, transcripts
|
from reflector.db import database, transcripts
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
from reflector.utils.audio_waveform import get_audio_waveform
|
from reflector.utils.audio_waveform import get_audio_waveform
|
||||||
from starlette.concurrency import run_in_threadpool
|
|
||||||
|
|
||||||
from ._range_requests_response import range_requests_response
|
from ._range_requests_response import range_requests_response
|
||||||
from .rtc_offer import PipelineEvent, RtcOffer, rtc_offer_base
|
from .rtc_offer import PipelineEvent, RtcOffer, rtc_offer_base
|
||||||
@@ -49,6 +50,7 @@ class AudioWaveform(BaseModel):
|
|||||||
|
|
||||||
class TranscriptText(BaseModel):
|
class TranscriptText(BaseModel):
|
||||||
text: str
|
text: str
|
||||||
|
translation: str
|
||||||
|
|
||||||
|
|
||||||
class TranscriptTopic(BaseModel):
|
class TranscriptTopic(BaseModel):
|
||||||
@@ -491,7 +493,10 @@ async def handle_rtc_event(event: PipelineEvent, args, data):
|
|||||||
|
|
||||||
# FIXME don't do copy
|
# FIXME don't do copy
|
||||||
if event == PipelineEvent.TRANSCRIPT:
|
if event == PipelineEvent.TRANSCRIPT:
|
||||||
resp = transcript.add_event(event=event, data=TranscriptText(text=data.text))
|
resp = transcript.add_event(
|
||||||
|
event=event,
|
||||||
|
data=TranscriptText(text=data.text, translation=data.translation),
|
||||||
|
)
|
||||||
await transcripts_controller.update(
|
await transcripts_controller.update(
|
||||||
transcript,
|
transcript,
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user