From df5b735959d2cd6befcbc73f4d7f0d9adf14bcbe Mon Sep 17 00:00:00 2001 From: Sergey Mankovsky Date: Fri, 12 Jul 2024 22:57:54 +0200 Subject: [PATCH] Chunked filed upload --- server/reflector/views/transcripts_upload.py | 45 ++++++++++++------ server/tests/test_transcripts_upload.py | 9 ++-- .../[domain]/transcripts/fileUploadButton.tsx | 46 ++++++++++++++----- www/app/api/schemas.gen.ts | 6 +-- www/app/api/services.gen.ts | 6 +++ www/app/api/types.gen.ts | 4 +- 6 files changed, 82 insertions(+), 34 deletions(-) diff --git a/server/reflector/views/transcripts_upload.py b/server/reflector/views/transcripts_upload.py index 96b82d78..4fa45e3e 100644 --- a/server/reflector/views/transcripts_upload.py +++ b/server/reflector/views/transcripts_upload.py @@ -17,7 +17,9 @@ class UploadStatus(BaseModel): @router.post("/transcripts/{transcript_id}/record/upload") async def transcript_record_upload( transcript_id: str, - file: UploadFile, + chunk_number: int, + total_chunks: int, + chunk: UploadFile, user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)], ): user_id = user["sub"] if user else None @@ -34,22 +36,37 @@ async def transcript_record_upload( status_code=400, detail="There is already an upload in progress" ) - # save the file to the transcript folder - extension = file.filename.split(".")[-1] - upload_filename = transcript.data_path / f"upload.{extension}" - upload_filename.parent.mkdir(parents=True, exist_ok=True) + # save the chunk to the transcript folder + extension = chunk.filename.split(".")[-1] + chunk_filename = transcript.data_path / f"upload_{chunk_number}.{extension}" + chunk_filename.parent.mkdir(parents=True, exist_ok=True) - # ensure the file is back to the beginning - await file.seek(0) + # ensure the chunk is back to the beginning + await chunk.seek(0) - # save the file to the transcript folder + # save the chunk to the transcript folder try: - with open(upload_filename, "wb") as f: - while True: - chunk = await file.read(16384) - if not chunk: - break - f.write(chunk) + with open(chunk_filename, "wb") as f: + f.write(await chunk.read()) + except Exception: + chunk_filename.unlink() + raise + + # return if it's not the last chunk + if chunk_number < total_chunks - 1: + return UploadStatus(status="ok") + + # merge chunks to a single file + upload_filename = transcript.data_path / f"upload.{extension}" + try: + with open(upload_filename, "ab") as f: + for chunk_number in range(0, total_chunks): + chunk_filename = ( + transcript.data_path / f"upload_{chunk_number}.{extension}" + ) + with open(chunk_filename, "rb") as chunk: + f.write(chunk.read()) + chunk_filename.unlink() except Exception: upload_filename.unlink() raise diff --git a/server/tests/test_transcripts_upload.py b/server/tests/test_transcripts_upload.py index 3cb482c1..fab21321 100644 --- a/server/tests/test_transcripts_upload.py +++ b/server/tests/test_transcripts_upload.py @@ -1,5 +1,6 @@ -import pytest import asyncio + +import pytest from httpx import AsyncClient @@ -27,13 +28,13 @@ async def test_transcript_upload_file( # upload mp3 response = await ac.post( - f"/transcripts/{tid}/record/upload", + f"/transcripts/{tid}/record/upload?chunk_number=0&total_chunks=1", files={ - "file": ( + "chunk": ( "test_short.wav", open("tests/records/test_short.wav", "rb"), "audio/mpeg", - ) + ), }, ) assert response.status_code == 200 diff --git a/www/app/[domain]/transcripts/fileUploadButton.tsx b/www/app/[domain]/transcripts/fileUploadButton.tsx index 3faba127..66246bf7 100644 --- a/www/app/[domain]/transcripts/fileUploadButton.tsx +++ b/www/app/[domain]/transcripts/fileUploadButton.tsx @@ -18,24 +18,46 @@ export default function FileUploadButton(props: FileUploadButton) { const file = event.target.files?.[0]; if (file) { - console.log("Calling api.v1TranscriptRecordUpload()..."); - - // Create an object of the expected type - const uploadData = { - file: file, - // Add other properties if required by the type definition - }; + const maxChunkSize = 50 * 1024 * 1024; // 50 MB + const totalChunks = Math.ceil(file.size / maxChunkSize); + let chunkNumber = 0; + let start = 0; + let uploadedSize = 0; api?.httpRequest.config.interceptors.request.use((request) => { request.onUploadProgress = (progressEvent) => { - setProgress((progressEvent.progress || 0) * 100); + const currentProgress = Math.floor( + ((uploadedSize + progressEvent.loaded) / file.size) * 100, + ); + setProgress(currentProgress); }; return request; }); - api?.v1TranscriptRecordUpload({ - transcriptId: props.transcriptId, - formData: uploadData, - }); + + const uploadNextChunk = async () => { + if (chunkNumber == totalChunks) return; + + const chunkSize = Math.min(maxChunkSize, file.size - start); + const end = start + chunkSize; + const chunk = file.slice(start, end); + + await api?.v1TranscriptRecordUpload({ + transcriptId: props.transcriptId, + formData: { + chunk, + }, + chunkNumber, + totalChunks, + }); + + uploadedSize += chunkSize; + chunkNumber++; + start = end; + + uploadNextChunk(); + }; + + uploadNextChunk(); } }; diff --git a/www/app/api/schemas.gen.ts b/www/app/api/schemas.gen.ts index 01e3f6ba..afa34829 100644 --- a/www/app/api/schemas.gen.ts +++ b/www/app/api/schemas.gen.ts @@ -18,14 +18,14 @@ export const $AudioWaveform = { export const $Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post = { properties: { - file: { + chunk: { type: "string", format: "binary", - title: "File", + title: "Chunk", }, }, type: "object", - required: ["file"], + required: ["chunk"], title: "Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post", } as const; diff --git a/www/app/api/services.gen.ts b/www/app/api/services.gen.ts index b546e7b4..e5a18eb8 100644 --- a/www/app/api/services.gen.ts +++ b/www/app/api/services.gen.ts @@ -497,6 +497,8 @@ export class DefaultService { * Transcript Record Upload * @param data The data for the request. * @param data.transcriptId + * @param data.chunkNumber + * @param data.totalChunks * @param data.formData * @returns unknown Successful Response * @throws ApiError @@ -510,6 +512,10 @@ export class DefaultService { path: { transcript_id: data.transcriptId, }, + query: { + chunk_number: data.chunkNumber, + total_chunks: data.totalChunks, + }, formData: data.formData, mediaType: "multipart/form-data", errors: { diff --git a/www/app/api/types.gen.ts b/www/app/api/types.gen.ts index db50d253..bc06d3c1 100644 --- a/www/app/api/types.gen.ts +++ b/www/app/api/types.gen.ts @@ -6,7 +6,7 @@ export type AudioWaveform = { export type Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post = { - file: Blob | File; + chunk: Blob | File; }; export type CreateParticipant = { @@ -296,7 +296,9 @@ export type V1TranscriptMergeSpeakerData = { export type V1TranscriptMergeSpeakerResponse = SpeakerAssignmentStatus; export type V1TranscriptRecordUploadData = { + chunkNumber: number; formData: Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post; + totalChunks: number; transcriptId: string; };