mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
OpenAI compatible transcription api
This commit is contained in:
@@ -1,89 +1,64 @@
|
||||
"""
|
||||
Reflector GPU backend - transcriber
|
||||
===================================
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import threading
|
||||
|
||||
from modal import Image, Secret, App, asgi_app, method, enter
|
||||
import modal
|
||||
from pydantic import BaseModel
|
||||
|
||||
# Whisper
|
||||
WHISPER_MODEL: str = "large-v2"
|
||||
WHISPER_COMPUTE_TYPE: str = "float16"
|
||||
WHISPER_NUM_WORKERS: int = 1
|
||||
MODELS_DIR = "/models"
|
||||
|
||||
MODEL_NAME = "large-v2"
|
||||
MODEL_COMPUTE_TYPE: str = "float16"
|
||||
MODEL_NUM_WORKERS: int = 1
|
||||
|
||||
MINUTES = 60 # seconds
|
||||
|
||||
volume = modal.Volume.from_name("models", create_if_missing=True)
|
||||
|
||||
app = modal.App("reflector-transcriber")
|
||||
|
||||
|
||||
WHISPER_MODEL_DIR = "/root/transcription_models"
|
||||
def download_model():
|
||||
from faster_whisper import download_model
|
||||
|
||||
app = App(name="reflector-transcriber")
|
||||
volume.reload()
|
||||
|
||||
download_model(MODEL_NAME, cache_dir=MODELS_DIR)
|
||||
|
||||
volume.commit()
|
||||
|
||||
|
||||
def download_whisper():
|
||||
from faster_whisper.utils import download_model
|
||||
|
||||
print("Downloading Whisper model")
|
||||
download_model(WHISPER_MODEL, cache_dir=WHISPER_MODEL_DIR)
|
||||
print("Whisper model downloaded")
|
||||
|
||||
|
||||
def migrate_cache_llm():
|
||||
"""
|
||||
XXX The cache for model files in Transformers v4.22.0 has been updated.
|
||||
Migrating your old cache. This is a one-time only operation. You can
|
||||
interrupt this and resume the migration later on by calling
|
||||
`transformers.utils.move_cache()`.
|
||||
"""
|
||||
from transformers.utils.hub import move_cache
|
||||
|
||||
print("Moving LLM cache")
|
||||
move_cache(cache_dir=WHISPER_MODEL_DIR, new_cache_dir=WHISPER_MODEL_DIR)
|
||||
print("LLM cache moved")
|
||||
|
||||
|
||||
transcriber_image = (
|
||||
Image.debian_slim(python_version="3.10.8")
|
||||
.apt_install("git")
|
||||
.apt_install("wget")
|
||||
.apt_install("libsndfile-dev")
|
||||
image = (
|
||||
modal.Image.debian_slim(python_version="3.12")
|
||||
.pip_install(
|
||||
"faster-whisper",
|
||||
"requests",
|
||||
"torch",
|
||||
"transformers==4.34.0",
|
||||
"sentencepiece",
|
||||
"protobuf",
|
||||
"huggingface_hub==0.16.4",
|
||||
"gitpython",
|
||||
"torchaudio",
|
||||
"fairseq2",
|
||||
"pyyaml",
|
||||
"hf-transfer~=0.1"
|
||||
"huggingface_hub==0.27.1",
|
||||
"hf-transfer==0.1.9",
|
||||
"torch==2.5.1",
|
||||
"faster-whisper==1.1.1",
|
||||
)
|
||||
.run_function(download_whisper)
|
||||
.run_function(migrate_cache_llm)
|
||||
.env(
|
||||
{
|
||||
"HF_HUB_ENABLE_HF_TRANSFER": "1",
|
||||
"LD_LIBRARY_PATH": (
|
||||
"/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib/:"
|
||||
"/opt/conda/lib/python3.10/site-packages/nvidia/cublas/lib/"
|
||||
)
|
||||
"/usr/local/lib/python3.12/site-packages/nvidia/cudnn/lib/:"
|
||||
"/opt/conda/lib/python3.12/site-packages/nvidia/cublas/lib/"
|
||||
),
|
||||
}
|
||||
)
|
||||
.run_function(download_model, volumes={MODELS_DIR: volume})
|
||||
)
|
||||
|
||||
|
||||
@app.cls(
|
||||
gpu="A10G",
|
||||
timeout=60 * 5,
|
||||
container_idle_timeout=60 * 5,
|
||||
timeout=5 * MINUTES,
|
||||
container_idle_timeout=5 * MINUTES,
|
||||
allow_concurrent_inputs=6,
|
||||
image=transcriber_image,
|
||||
image=image,
|
||||
volumes={MODELS_DIR: volume},
|
||||
)
|
||||
class Transcriber:
|
||||
@enter()
|
||||
@modal.enter()
|
||||
def enter(self):
|
||||
import faster_whisper
|
||||
import torch
|
||||
@@ -92,21 +67,20 @@ class Transcriber:
|
||||
self.use_gpu = torch.cuda.is_available()
|
||||
self.device = "cuda" if self.use_gpu else "cpu"
|
||||
self.model = faster_whisper.WhisperModel(
|
||||
WHISPER_MODEL,
|
||||
MODEL_NAME,
|
||||
device=self.device,
|
||||
compute_type=WHISPER_COMPUTE_TYPE,
|
||||
num_workers=WHISPER_NUM_WORKERS,
|
||||
download_root=WHISPER_MODEL_DIR,
|
||||
local_files_only=True
|
||||
compute_type=MODEL_COMPUTE_TYPE,
|
||||
num_workers=MODEL_NUM_WORKERS,
|
||||
download_root=MODELS_DIR,
|
||||
local_files_only=True,
|
||||
)
|
||||
|
||||
@method()
|
||||
@modal.method()
|
||||
def transcribe_segment(
|
||||
self,
|
||||
audio_data: str,
|
||||
audio_suffix: str,
|
||||
source_language: str,
|
||||
timestamp: float = 0
|
||||
language: str,
|
||||
):
|
||||
with tempfile.NamedTemporaryFile("wb+", suffix=f".{audio_suffix}") as fp:
|
||||
fp.write(audio_data)
|
||||
@@ -114,40 +88,23 @@ class Transcriber:
|
||||
with self.lock:
|
||||
segments, _ = self.model.transcribe(
|
||||
fp.name,
|
||||
language=source_language,
|
||||
language=language,
|
||||
beam_size=5,
|
||||
word_timestamps=True,
|
||||
vad_filter=True,
|
||||
vad_parameters={"min_silence_duration_ms": 500},
|
||||
)
|
||||
|
||||
multilingual_transcript = {}
|
||||
transcript_source_lang = ""
|
||||
text = ""
|
||||
words = []
|
||||
if segments:
|
||||
segments = list(segments)
|
||||
|
||||
for segment in segments:
|
||||
transcript_source_lang += segment.text
|
||||
for word in segment.words:
|
||||
words.append(
|
||||
{
|
||||
"text": word.word,
|
||||
"start": round(timestamp + word.start, 3),
|
||||
"end": round(timestamp + word.end, 3),
|
||||
}
|
||||
text += segment.text
|
||||
words.extend(
|
||||
{"word": word.word, "start": word.start, "end": word.end}
|
||||
for word in segment.words
|
||||
)
|
||||
|
||||
multilingual_transcript[source_language] = transcript_source_lang
|
||||
|
||||
return {
|
||||
"text": multilingual_transcript,
|
||||
"words": words
|
||||
}
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Web API
|
||||
# -------------------------------------------------------------------
|
||||
return {"text": text, "words": words}
|
||||
|
||||
|
||||
@app.function(
|
||||
@@ -155,21 +112,23 @@ class Transcriber:
|
||||
timeout=60,
|
||||
allow_concurrent_inputs=40,
|
||||
secrets=[
|
||||
Secret.from_name("reflector-gpu"),
|
||||
modal.Secret.from_name("reflector-gpu"),
|
||||
],
|
||||
volumes={MODELS_DIR: volume},
|
||||
)
|
||||
@asgi_app()
|
||||
@modal.asgi_app()
|
||||
def web():
|
||||
from fastapi import Body, Depends, FastAPI, HTTPException, UploadFile, status
|
||||
from fastapi.security import OAuth2PasswordBearer
|
||||
from typing_extensions import Annotated
|
||||
|
||||
transcriberstub = Transcriber()
|
||||
transcriber = Transcriber()
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
|
||||
supported_audio_file_types = ["wav", "mp3", "ogg", "flac"]
|
||||
|
||||
supported_file_types = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
|
||||
|
||||
def apikey_auth(apikey: str = Depends(oauth2_scheme)):
|
||||
if apikey != os.environ["REFLECTOR_GPU_APIKEY"]:
|
||||
@@ -182,21 +141,20 @@ def web():
|
||||
class TranscriptResponse(BaseModel):
|
||||
result: dict
|
||||
|
||||
@app.post("/transcribe", dependencies=[Depends(apikey_auth)])
|
||||
@app.post("/v1/audio/transcriptions", dependencies=[Depends(apikey_auth)])
|
||||
def transcribe(
|
||||
file: UploadFile,
|
||||
source_language: Annotated[str, Body(...)] = "en",
|
||||
timestamp: Annotated[float, Body()] = 0.0
|
||||
model: str = "whisper-1",
|
||||
language: Annotated[str, Body(...)] = "en",
|
||||
) -> TranscriptResponse:
|
||||
audio_data = file.file.read()
|
||||
audio_suffix = file.filename.split(".")[-1]
|
||||
assert audio_suffix in supported_audio_file_types
|
||||
assert audio_suffix in supported_file_types
|
||||
|
||||
func = transcriberstub.transcribe_segment.spawn(
|
||||
func = transcriber.transcribe_segment.spawn(
|
||||
audio_data=audio_data,
|
||||
audio_suffix=audio_suffix,
|
||||
source_language=source_language,
|
||||
timestamp=timestamp
|
||||
language=language,
|
||||
)
|
||||
result = func.get()
|
||||
return result
|
||||
|
||||
129
server/poetry.lock
generated
129
server/poetry.lock
generated
@@ -1068,6 +1068,17 @@ wrapt = ">=1.10,<2"
|
||||
[package.extras]
|
||||
dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"]
|
||||
|
||||
[[package]]
|
||||
name = "distro"
|
||||
version = "1.9.0"
|
||||
description = "Distro - an OS platform information API"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
|
||||
{file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dnspython"
|
||||
version = "2.4.2"
|
||||
@@ -1696,6 +1707,91 @@ files = [
|
||||
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jiter"
|
||||
version = "0.8.2"
|
||||
description = "Fast iterable JSON parser."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "jiter-0.8.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ca8577f6a413abe29b079bc30f907894d7eb07a865c4df69475e868d73e71c7b"},
|
||||
{file = "jiter-0.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b25bd626bde7fb51534190c7e3cb97cee89ee76b76d7585580e22f34f5e3f393"},
|
||||
{file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5c826a221851a8dc028eb6d7d6429ba03184fa3c7e83ae01cd6d3bd1d4bd17d"},
|
||||
{file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d35c864c2dff13dfd79fb070fc4fc6235d7b9b359efe340e1261deb21b9fcb66"},
|
||||
{file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f557c55bc2b7676e74d39d19bcb8775ca295c7a028246175d6a8b431e70835e5"},
|
||||
{file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:580ccf358539153db147e40751a0b41688a5ceb275e6f3e93d91c9467f42b2e3"},
|
||||
{file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af102d3372e917cffce49b521e4c32c497515119dc7bd8a75665e90a718bbf08"},
|
||||
{file = "jiter-0.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cadcc978f82397d515bb2683fc0d50103acff2a180552654bb92d6045dec2c49"},
|
||||
{file = "jiter-0.8.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ba5bdf56969cad2019d4e8ffd3f879b5fdc792624129741d3d83fc832fef8c7d"},
|
||||
{file = "jiter-0.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3b94a33a241bee9e34b8481cdcaa3d5c2116f575e0226e421bed3f7a6ea71cff"},
|
||||
{file = "jiter-0.8.2-cp310-cp310-win32.whl", hash = "sha256:6e5337bf454abddd91bd048ce0dca5134056fc99ca0205258766db35d0a2ea43"},
|
||||
{file = "jiter-0.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:4a9220497ca0cb1fe94e3f334f65b9b5102a0b8147646118f020d8ce1de70105"},
|
||||
{file = "jiter-0.8.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2dd61c5afc88a4fda7d8b2cf03ae5947c6ac7516d32b7a15bf4b49569a5c076b"},
|
||||
{file = "jiter-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a6c710d657c8d1d2adbbb5c0b0c6bfcec28fd35bd6b5f016395f9ac43e878a15"},
|
||||
{file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9584de0cd306072635fe4b89742bf26feae858a0683b399ad0c2509011b9dc0"},
|
||||
{file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5a90a923338531b7970abb063cfc087eebae6ef8ec8139762007188f6bc69a9f"},
|
||||
{file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21974d246ed0181558087cd9f76e84e8321091ebfb3a93d4c341479a736f099"},
|
||||
{file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32475a42b2ea7b344069dc1e81445cfc00b9d0e3ca837f0523072432332e9f74"},
|
||||
{file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b9931fd36ee513c26b5bf08c940b0ac875de175341cbdd4fa3be109f0492586"},
|
||||
{file = "jiter-0.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce0820f4a3a59ddced7fce696d86a096d5cc48d32a4183483a17671a61edfddc"},
|
||||
{file = "jiter-0.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8ffc86ae5e3e6a93765d49d1ab47b6075a9c978a2b3b80f0f32628f39caa0c88"},
|
||||
{file = "jiter-0.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5127dc1abd809431172bc3fbe8168d6b90556a30bb10acd5ded41c3cfd6f43b6"},
|
||||
{file = "jiter-0.8.2-cp311-cp311-win32.whl", hash = "sha256:66227a2c7b575720c1871c8800d3a0122bb8ee94edb43a5685aa9aceb2782d44"},
|
||||
{file = "jiter-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:cde031d8413842a1e7501e9129b8e676e62a657f8ec8166e18a70d94d4682855"},
|
||||
{file = "jiter-0.8.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e6ec2be506e7d6f9527dae9ff4b7f54e68ea44a0ef6b098256ddf895218a2f8f"},
|
||||
{file = "jiter-0.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76e324da7b5da060287c54f2fabd3db5f76468006c811831f051942bf68c9d44"},
|
||||
{file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:180a8aea058f7535d1c84183c0362c710f4750bef66630c05f40c93c2b152a0f"},
|
||||
{file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:025337859077b41548bdcbabe38698bcd93cfe10b06ff66617a48ff92c9aec60"},
|
||||
{file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecff0dc14f409599bbcafa7e470c00b80f17abc14d1405d38ab02e4b42e55b57"},
|
||||
{file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffd9fee7d0775ebaba131f7ca2e2d83839a62ad65e8e02fe2bd8fc975cedeb9e"},
|
||||
{file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14601dcac4889e0a1c75ccf6a0e4baf70dbc75041e51bcf8d0e9274519df6887"},
|
||||
{file = "jiter-0.8.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92249669925bc1c54fcd2ec73f70f2c1d6a817928480ee1c65af5f6b81cdf12d"},
|
||||
{file = "jiter-0.8.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e725edd0929fa79f8349ab4ec7f81c714df51dc4e991539a578e5018fa4a7152"},
|
||||
{file = "jiter-0.8.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bf55846c7b7a680eebaf9c3c48d630e1bf51bdf76c68a5f654b8524335b0ad29"},
|
||||
{file = "jiter-0.8.2-cp312-cp312-win32.whl", hash = "sha256:7efe4853ecd3d6110301665a5178b9856be7e2a9485f49d91aa4d737ad2ae49e"},
|
||||
{file = "jiter-0.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:83c0efd80b29695058d0fd2fa8a556490dbce9804eac3e281f373bbc99045f6c"},
|
||||
{file = "jiter-0.8.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:ca1f08b8e43dc3bd0594c992fb1fd2f7ce87f7bf0d44358198d6da8034afdf84"},
|
||||
{file = "jiter-0.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5672a86d55416ccd214c778efccf3266b84f87b89063b582167d803246354be4"},
|
||||
{file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58dc9bc9767a1101f4e5e22db1b652161a225874d66f0e5cb8e2c7d1c438b587"},
|
||||
{file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:37b2998606d6dadbb5ccda959a33d6a5e853252d921fec1792fc902351bb4e2c"},
|
||||
{file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ab9a87f3784eb0e098f84a32670cfe4a79cb6512fd8f42ae3d0709f06405d18"},
|
||||
{file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79aec8172b9e3c6d05fd4b219d5de1ac616bd8da934107325a6c0d0e866a21b6"},
|
||||
{file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:711e408732d4e9a0208008e5892c2966b485c783cd2d9a681f3eb147cf36c7ef"},
|
||||
{file = "jiter-0.8.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:653cf462db4e8c41995e33d865965e79641ef45369d8a11f54cd30888b7e6ff1"},
|
||||
{file = "jiter-0.8.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:9c63eaef32b7bebac8ebebf4dabebdbc6769a09c127294db6babee38e9f405b9"},
|
||||
{file = "jiter-0.8.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:eb21aaa9a200d0a80dacc7a81038d2e476ffe473ffdd9c91eb745d623561de05"},
|
||||
{file = "jiter-0.8.2-cp313-cp313-win32.whl", hash = "sha256:789361ed945d8d42850f919342a8665d2dc79e7e44ca1c97cc786966a21f627a"},
|
||||
{file = "jiter-0.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:ab7f43235d71e03b941c1630f4b6e3055d46b6cb8728a17663eaac9d8e83a865"},
|
||||
{file = "jiter-0.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b426f72cd77da3fec300ed3bc990895e2dd6b49e3bfe6c438592a3ba660e41ca"},
|
||||
{file = "jiter-0.8.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2dd880785088ff2ad21ffee205e58a8c1ddabc63612444ae41e5e4b321b39c0"},
|
||||
{file = "jiter-0.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:3ac9f578c46f22405ff7f8b1f5848fb753cc4b8377fbec8470a7dc3997ca7566"},
|
||||
{file = "jiter-0.8.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9e1fa156ee9454642adb7e7234a383884452532bc9d53d5af2d18d98ada1d79c"},
|
||||
{file = "jiter-0.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0cf5dfa9956d96ff2efb0f8e9c7d055904012c952539a774305aaaf3abdf3d6c"},
|
||||
{file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e52bf98c7e727dd44f7c4acb980cb988448faeafed8433c867888268899b298b"},
|
||||
{file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a2ecaa3c23e7a7cf86d00eda3390c232f4d533cd9ddea4b04f5d0644faf642c5"},
|
||||
{file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:08d4c92bf480e19fc3f2717c9ce2aa31dceaa9163839a311424b6862252c943e"},
|
||||
{file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99d9a1eded738299ba8e106c6779ce5c3893cffa0e32e4485d680588adae6db8"},
|
||||
{file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d20be8b7f606df096e08b0b1b4a3c6f0515e8dac296881fe7461dfa0fb5ec817"},
|
||||
{file = "jiter-0.8.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d33f94615fcaf872f7fd8cd98ac3b429e435c77619777e8a449d9d27e01134d1"},
|
||||
{file = "jiter-0.8.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:317b25e98a35ffec5c67efe56a4e9970852632c810d35b34ecdd70cc0e47b3b6"},
|
||||
{file = "jiter-0.8.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fc9043259ee430ecd71d178fccabd8c332a3bf1e81e50cae43cc2b28d19e4cb7"},
|
||||
{file = "jiter-0.8.2-cp38-cp38-win32.whl", hash = "sha256:fc5adda618205bd4678b146612ce44c3cbfdee9697951f2c0ffdef1f26d72b63"},
|
||||
{file = "jiter-0.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:cd646c827b4f85ef4a78e4e58f4f5854fae0caf3db91b59f0d73731448a970c6"},
|
||||
{file = "jiter-0.8.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:e41e75344acef3fc59ba4765df29f107f309ca9e8eace5baacabd9217e52a5ee"},
|
||||
{file = "jiter-0.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7f22b16b35d5c1df9dfd58843ab2cd25e6bf15191f5a236bed177afade507bfc"},
|
||||
{file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7200b8f7619d36aa51c803fd52020a2dfbea36ffec1b5e22cab11fd34d95a6d"},
|
||||
{file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:70bf4c43652cc294040dbb62256c83c8718370c8b93dd93d934b9a7bf6c4f53c"},
|
||||
{file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f9d471356dc16f84ed48768b8ee79f29514295c7295cb41e1133ec0b2b8d637d"},
|
||||
{file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:859e8eb3507894093d01929e12e267f83b1d5f6221099d3ec976f0c995cb6bd9"},
|
||||
{file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaa58399c01db555346647a907b4ef6d4f584b123943be6ed5588c3f2359c9f4"},
|
||||
{file = "jiter-0.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8f2d5ed877f089862f4c7aacf3a542627c1496f972a34d0474ce85ee7d939c27"},
|
||||
{file = "jiter-0.8.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:03c9df035d4f8d647f8c210ddc2ae0728387275340668fb30d2421e17d9a0841"},
|
||||
{file = "jiter-0.8.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8bd2a824d08d8977bb2794ea2682f898ad3d8837932e3a74937e93d62ecbb637"},
|
||||
{file = "jiter-0.8.2-cp39-cp39-win32.whl", hash = "sha256:ca29b6371ebc40e496995c94b988a101b9fbbed48a51190a4461fcb0a68b4a36"},
|
||||
{file = "jiter-0.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:1c0dfbd1be3cbefc7510102370d86e35d1d53e5a93d48519688b1bf0f761160a"},
|
||||
{file = "jiter-0.8.2.tar.gz", hash = "sha256:cd73d3e740666d0e639f678adb176fad25c1bcbdae88d8d7b857e1783bb4212d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jiwer"
|
||||
version = "3.0.3"
|
||||
@@ -2267,6 +2363,31 @@ packaging = "*"
|
||||
protobuf = "*"
|
||||
sympy = "*"
|
||||
|
||||
[[package]]
|
||||
name = "openai"
|
||||
version = "1.59.7"
|
||||
description = "The official Python library for the openai API"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "openai-1.59.7-py3-none-any.whl", hash = "sha256:cfa806556226fa96df7380ab2e29814181d56fea44738c2b0e581b462c268692"},
|
||||
{file = "openai-1.59.7.tar.gz", hash = "sha256:043603def78c00befb857df9f0a16ee76a3af5984ba40cb7ee5e2f40db4646bf"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
anyio = ">=3.5.0,<5"
|
||||
distro = ">=1.7.0,<2"
|
||||
httpx = ">=0.23.0,<1"
|
||||
jiter = ">=0.4.0,<1"
|
||||
pydantic = ">=1.9.0,<3"
|
||||
sniffio = "*"
|
||||
tqdm = ">4"
|
||||
typing-extensions = ">=4.11,<5"
|
||||
|
||||
[package.extras]
|
||||
datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
|
||||
realtime = ["websockets (>=13,<15)"]
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "23.2"
|
||||
@@ -4041,13 +4162,13 @@ vision = ["Pillow (>=10.0.1,<=15.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.8.0"
|
||||
version = "4.12.2"
|
||||
description = "Backported and Experimental Type Hints for Python 3.8+"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"},
|
||||
{file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"},
|
||||
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
|
||||
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4483,4 +4604,4 @@ multidict = ">=4.0"
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "3ba5402ab9fbec271f255c345c89c67385c722735b1d79a959e887c7c34a4047"
|
||||
content-hash = "6f5213b520e038f396de4a6b6e742a54eec1e6e9ea67cb1fa36e134c8a946cbe"
|
||||
|
||||
@@ -39,6 +39,7 @@ faster-whisper = "^0.10.0"
|
||||
transformers = "^4.36.2"
|
||||
black = "24.1.1"
|
||||
jsonschema = "^4.23.0"
|
||||
openai = "^1.59.7"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
|
||||
@@ -12,51 +12,46 @@ API will be a POST request to TRANSCRIPT_URL:
|
||||
|
||||
"""
|
||||
|
||||
import httpx
|
||||
from openai import AsyncOpenAI
|
||||
from reflector.processors.audio_transcript import AudioTranscriptProcessor
|
||||
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
|
||||
from reflector.processors.types import AudioFile, Transcript, Word
|
||||
from reflector.settings import settings
|
||||
from reflector.utils.retry import retry
|
||||
|
||||
|
||||
class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
||||
def __init__(self, modal_api_key: str):
|
||||
super().__init__()
|
||||
self.transcript_url = settings.TRANSCRIPT_URL + "/transcribe"
|
||||
self.transcript_url = settings.TRANSCRIPT_URL + "/v1"
|
||||
self.timeout = settings.TRANSCRIPT_TIMEOUT
|
||||
self.api_key = settings.TRANSCRIPT_MODAL_API_KEY
|
||||
self.headers = {"Authorization": f"Bearer {modal_api_key}"}
|
||||
|
||||
async def _transcript(self, data: AudioFile):
|
||||
async with httpx.AsyncClient() as client:
|
||||
self.logger.debug(f"Try to transcribe audio {data.name}")
|
||||
files = {
|
||||
"file": (data.name, data.fd),
|
||||
}
|
||||
source_language = self.get_pref("audio:source_language", "en")
|
||||
json_payload = {"source_language": source_language}
|
||||
response = await retry(client.post)(
|
||||
self.transcript_url,
|
||||
files=files,
|
||||
async with AsyncOpenAI(
|
||||
base_url=self.transcript_url,
|
||||
api_key=self.api_key,
|
||||
timeout=self.timeout,
|
||||
headers=self.headers,
|
||||
params=json_payload,
|
||||
follow_redirects=True,
|
||||
)
|
||||
) as client:
|
||||
self.logger.debug(f"Try to transcribe audio {data.name}")
|
||||
|
||||
self.logger.debug(
|
||||
f"Transcript response: {response.status_code} {response.content}"
|
||||
audio_file = open(data.path, "rb")
|
||||
transcription = await client.audio.transcriptions.create(
|
||||
file=audio_file,
|
||||
model="whisper-1",
|
||||
response_format="verbose_json",
|
||||
language=self.get_pref("audio:source_language", "en"),
|
||||
timestamp_granularities=["word"],
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
self.logger.debug(f"Transcription: {transcription}")
|
||||
transcript = Transcript(
|
||||
words=[
|
||||
Word(
|
||||
text=word["text"],
|
||||
start=word["start"],
|
||||
end=word["end"],
|
||||
text=word.word,
|
||||
start=word.start,
|
||||
end=word.end,
|
||||
)
|
||||
for word in result["words"]
|
||||
for word in transcription.words
|
||||
],
|
||||
)
|
||||
transcript.add_offset(data.timestamp)
|
||||
|
||||
Reference in New Issue
Block a user