mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
Fix transcript reprocessing
This commit is contained in:
@@ -538,18 +538,29 @@ class TranscriptController:
|
|||||||
Move mp3 file to storage
|
Move mp3 file to storage
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# store the audio on external storage
|
if transcript.audio_location == "local":
|
||||||
await get_storage().put_file(
|
# store the audio on external storage if it's not already there
|
||||||
transcript.storage_audio_path,
|
await get_storage().put_file(
|
||||||
transcript.audio_mp3_filename.read_bytes(),
|
transcript.storage_audio_path,
|
||||||
)
|
transcript.audio_mp3_filename.read_bytes(),
|
||||||
|
)
|
||||||
|
|
||||||
# indicate on the transcript that the audio is now on storage
|
# indicate on the transcript that the audio is now on storage
|
||||||
await self.update(transcript, {"audio_location": "storage"})
|
await self.update(transcript, {"audio_location": "storage"})
|
||||||
|
|
||||||
# unlink the local file
|
# unlink the local file
|
||||||
transcript.audio_mp3_filename.unlink(missing_ok=True)
|
transcript.audio_mp3_filename.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
async def download_mp3_from_storage(self, transcript: Transcript):
|
||||||
|
"""
|
||||||
|
Download audio from storage
|
||||||
|
"""
|
||||||
|
transcript.audio_mp3_filename.write_bytes(
|
||||||
|
await get_storage().get_file(
|
||||||
|
transcript.storage_audio_path,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
async def upsert_participant(
|
async def upsert_participant(
|
||||||
self,
|
self,
|
||||||
transcript: Transcript,
|
transcript: Transcript,
|
||||||
|
|||||||
@@ -663,6 +663,9 @@ async def pipeline_process(transcript: Transcript, logger: Logger):
|
|||||||
import av
|
import av
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if transcript.audio_location == "storage":
|
||||||
|
await transcripts_controller.download_mp3_from_storage(transcript)
|
||||||
|
|
||||||
# open audio
|
# open audio
|
||||||
audio_filename = next(transcript.data_path.glob("upload.*"), None)
|
audio_filename = next(transcript.data_path.glob("upload.*"), None)
|
||||||
if audio_filename and transcript.status != "uploaded":
|
if audio_filename and transcript.status != "uploaded":
|
||||||
|
|||||||
@@ -52,3 +52,9 @@ class Storage:
|
|||||||
|
|
||||||
async def _get_file_url(self, filename: str) -> str:
|
async def _get_file_url(self, filename: str) -> str:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
async def get_file(self, filename: str):
|
||||||
|
return await self._get_file(filename)
|
||||||
|
|
||||||
|
async def _get_file(self, filename: str):
|
||||||
|
raise NotImplementedError
|
||||||
|
|||||||
@@ -65,5 +65,14 @@ class AwsStorage(Storage):
|
|||||||
async with self.session.client("s3") as client:
|
async with self.session.client("s3") as client:
|
||||||
await client.delete_object(Bucket=bucket, Key=s3filename)
|
await client.delete_object(Bucket=bucket, Key=s3filename)
|
||||||
|
|
||||||
|
async def _get_file(self, filename: str):
|
||||||
|
bucket = self.aws_bucket_name
|
||||||
|
folder = self.aws_folder
|
||||||
|
logger.info(f"Downloading {filename} from S3 {bucket}/{folder}")
|
||||||
|
s3filename = f"{folder}/{filename}" if folder else filename
|
||||||
|
async with self.session.client("s3") as client:
|
||||||
|
response = await client.get_object(Bucket=bucket, Key=s3filename)
|
||||||
|
return await response["Body"].read()
|
||||||
|
|
||||||
|
|
||||||
Storage.register("aws", AwsStorage)
|
Storage.register("aws", AwsStorage)
|
||||||
|
|||||||
Reference in New Issue
Block a user