mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
update
This commit is contained in:
@@ -10,7 +10,7 @@ from modal import Image, Secret, Stub, asgi_app, method
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
# Whisper
|
# Whisper
|
||||||
WHISPER_MODEL: str = "tiny"
|
WHISPER_MODEL: str = "large-v2"
|
||||||
WHISPER_COMPUTE_TYPE: str = "float16"
|
WHISPER_COMPUTE_TYPE: str = "float16"
|
||||||
WHISPER_NUM_WORKERS: int = 1
|
WHISPER_NUM_WORKERS: int = 1
|
||||||
WHISPER_CACHE_DIR: str = "/cache/whisper"
|
WHISPER_CACHE_DIR: str = "/cache/whisper"
|
||||||
@@ -18,7 +18,7 @@ WHISPER_CACHE_DIR: str = "/cache/whisper"
|
|||||||
# Translation Model
|
# Translation Model
|
||||||
TRANSLATION_MODEL = "facebook/m2m100_418M"
|
TRANSLATION_MODEL = "facebook/m2m100_418M"
|
||||||
|
|
||||||
stub = Stub(name="reflector-translator")
|
stub = Stub(name="reflector-transcriber")
|
||||||
|
|
||||||
|
|
||||||
def download_whisper():
|
def download_whisper():
|
||||||
@@ -82,9 +82,9 @@ class Whisper:
|
|||||||
self,
|
self,
|
||||||
audio_data: str,
|
audio_data: str,
|
||||||
audio_suffix: str,
|
audio_suffix: str,
|
||||||
timestamp: float = 0,
|
source_language: str,
|
||||||
source_language: str = "en",
|
target_language: str,
|
||||||
target_language: str = "fr"
|
timestamp: float = 0
|
||||||
):
|
):
|
||||||
with tempfile.NamedTemporaryFile("wb+", suffix=f".{audio_suffix}") as fp:
|
with tempfile.NamedTemporaryFile("wb+", suffix=f".{audio_suffix}") as fp:
|
||||||
fp.write(audio_data)
|
fp.write(audio_data)
|
||||||
@@ -126,7 +126,8 @@ class Whisper:
|
|||||||
forced_bos_token_id=forced_bos_token_id
|
forced_bos_token_id=forced_bos_token_id
|
||||||
)
|
)
|
||||||
result = self.translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
result = self.translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
||||||
multilingual_transcript[target_language] = result[0].strip()
|
translation = result[0].strip()
|
||||||
|
multilingual_transcript[target_language] = translation
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"text": multilingual_transcript,
|
"text": multilingual_transcript,
|
||||||
@@ -167,29 +168,26 @@ def web():
|
|||||||
headers={"WWW-Authenticate": "Bearer"},
|
headers={"WWW-Authenticate": "Bearer"},
|
||||||
)
|
)
|
||||||
|
|
||||||
class TranscriptionRequest(BaseModel):
|
|
||||||
file: UploadFile
|
|
||||||
timestamp: Annotated[float, Form()] = 0
|
|
||||||
source_language: Annotated[str, Form()] = "en"
|
|
||||||
target_language: Annotated[str, Form()] = "en"
|
|
||||||
|
|
||||||
class TranscriptResponse(BaseModel):
|
class TranscriptResponse(BaseModel):
|
||||||
result: dict
|
result: dict
|
||||||
|
|
||||||
@app.post("/transcribe", dependencies=[Depends(apikey_auth)])
|
@app.post("/transcribe", dependencies=[Depends(apikey_auth)])
|
||||||
async def transcribe(
|
async def transcribe(
|
||||||
req
|
file: UploadFile,
|
||||||
):
|
timestamp: Annotated[float, Form()] = 0,
|
||||||
print(req)
|
source_language: Annotated[str, Form()] = "en",
|
||||||
audio_data = await req.file.read()
|
target_language: Annotated[str, Form()] = "en"
|
||||||
audio_suffix = req.file.filename.split(".")[-1]
|
) -> TranscriptResponse:
|
||||||
|
audio_data = await file.read()
|
||||||
|
audio_suffix = file.filename.split(".")[-1]
|
||||||
assert audio_suffix in supported_audio_file_types
|
assert audio_suffix in supported_audio_file_types
|
||||||
|
|
||||||
func = transcriberstub.transcribe_segment.spawn(
|
func = transcriberstub.transcribe_segment.spawn(
|
||||||
audio_data=audio_data,
|
audio_data=audio_data,
|
||||||
audio_suffix=audio_suffix,
|
audio_suffix=audio_suffix,
|
||||||
source_language="en",
|
source_language=source_language,
|
||||||
timestamp=req.timestamp
|
target_language=target_language,
|
||||||
|
timestamp=timestamp
|
||||||
)
|
)
|
||||||
result = func.get()
|
result = func.get()
|
||||||
return result
|
return result
|
||||||
|
|||||||
@@ -55,28 +55,28 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
|||||||
files = {
|
files = {
|
||||||
"file": (data.name, data.fd),
|
"file": (data.name, data.fd),
|
||||||
}
|
}
|
||||||
|
|
||||||
# TODO: Get the source / target language from the UI preferences dynamically
|
# TODO: Get the source / target language from the UI preferences dynamically
|
||||||
# like context, session objects
|
# Update code here once this is possible.
|
||||||
|
# i.e) extract from context/session objects
|
||||||
source_language = "en"
|
source_language = "en"
|
||||||
target_language = "fr"
|
target_language = "en"
|
||||||
languages = TranslationLanguages()
|
languages = TranslationLanguages()
|
||||||
|
|
||||||
# Only way to set the target should be the UI element like dropdown.
|
# Only way to set the target should be the UI element like dropdown.
|
||||||
# Hence, this assert should never fail.
|
# Hence, this assert should never fail.
|
||||||
assert languages.is_supported(target_language)
|
assert languages.is_supported(target_language)
|
||||||
data = {
|
json_payload = {
|
||||||
"source_language": source_language,
|
"source_language": source_language,
|
||||||
"target_language": target_language,
|
"target_language": target_language,
|
||||||
}
|
}
|
||||||
|
|
||||||
print("TRYING TO TRANSCRIBE")
|
|
||||||
|
|
||||||
response = await retry(client.post)(
|
response = await retry(client.post)(
|
||||||
self.transcript_url,
|
self.transcript_url,
|
||||||
files=files,
|
files=files,
|
||||||
timeout=self.timeout,
|
timeout=self.timeout,
|
||||||
headers=self.headers,
|
headers=self.headers,
|
||||||
# data=data
|
json=json_payload.values(),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.logger.debug(
|
self.logger.debug(
|
||||||
@@ -85,9 +85,9 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
|||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
result = response.json()
|
result = response.json()
|
||||||
|
|
||||||
# Sanity check for translation status in result
|
# Sanity check for translation status in the result
|
||||||
if "target_language" in result["text"]:
|
if target_language in result["text"]:
|
||||||
text = result["text"]["target_language"]
|
text = result["text"][target_language]
|
||||||
else:
|
else:
|
||||||
text = result["text"]["en"]
|
text = result["text"]["en"]
|
||||||
transcript = Transcript(
|
transcript = Transcript(
|
||||||
|
|||||||
Reference in New Issue
Block a user