mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-04-01 11:56:47 +00:00
* fix: local processing instead of http server for cpu * add fallback token if service worker doesnt work * chore: rename processors to keep processor pattern up to date and allow other processors to be createed and used with env vars
40 lines
1.4 KiB
Python
40 lines
1.4 KiB
Python
"""
|
|
Pyannote file diarization processor using pyannote.audio in-process.
|
|
|
|
Downloads audio from URL, runs pyannote diarization locally,
|
|
and returns speaker segments. No HTTP backend needed.
|
|
"""
|
|
|
|
import asyncio
|
|
import os
|
|
|
|
from reflector.processors._audio_download import download_audio_to_temp
|
|
from reflector.processors._pyannote_diarization_service import diarization_service
|
|
from reflector.processors.file_diarization import (
|
|
FileDiarizationInput,
|
|
FileDiarizationOutput,
|
|
FileDiarizationProcessor,
|
|
)
|
|
from reflector.processors.file_diarization_auto import FileDiarizationAutoProcessor
|
|
|
|
|
|
class FileDiarizationPyannoteProcessor(FileDiarizationProcessor):
|
|
async def _diarize(self, data: FileDiarizationInput):
|
|
"""Run pyannote diarization on file from URL."""
|
|
self.logger.info(f"Starting pyannote diarization from {data.audio_url}")
|
|
tmp_path = await download_audio_to_temp(data.audio_url)
|
|
try:
|
|
loop = asyncio.get_event_loop()
|
|
result = await loop.run_in_executor(
|
|
None, diarization_service.diarize_file, str(tmp_path)
|
|
)
|
|
return FileDiarizationOutput(diarization=result["diarization"])
|
|
finally:
|
|
try:
|
|
os.unlink(tmp_path)
|
|
except OSError:
|
|
pass
|
|
|
|
|
|
FileDiarizationAutoProcessor.register("pyannote", FileDiarizationPyannoteProcessor)
|