Chunked filed upload

This commit is contained in:
2024-07-12 22:57:54 +02:00
parent 74a1c69e1f
commit df5b735959
6 changed files with 82 additions and 34 deletions

View File

@@ -17,7 +17,9 @@ class UploadStatus(BaseModel):
@router.post("/transcripts/{transcript_id}/record/upload") @router.post("/transcripts/{transcript_id}/record/upload")
async def transcript_record_upload( async def transcript_record_upload(
transcript_id: str, transcript_id: str,
file: UploadFile, chunk_number: int,
total_chunks: int,
chunk: UploadFile,
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)], user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
): ):
user_id = user["sub"] if user else None user_id = user["sub"] if user else None
@@ -34,22 +36,37 @@ async def transcript_record_upload(
status_code=400, detail="There is already an upload in progress" status_code=400, detail="There is already an upload in progress"
) )
# save the file to the transcript folder # save the chunk to the transcript folder
extension = file.filename.split(".")[-1] extension = chunk.filename.split(".")[-1]
upload_filename = transcript.data_path / f"upload.{extension}" chunk_filename = transcript.data_path / f"upload_{chunk_number}.{extension}"
upload_filename.parent.mkdir(parents=True, exist_ok=True) chunk_filename.parent.mkdir(parents=True, exist_ok=True)
# ensure the file is back to the beginning # ensure the chunk is back to the beginning
await file.seek(0) await chunk.seek(0)
# save the file to the transcript folder # save the chunk to the transcript folder
try: try:
with open(upload_filename, "wb") as f: with open(chunk_filename, "wb") as f:
while True: f.write(await chunk.read())
chunk = await file.read(16384) except Exception:
if not chunk: chunk_filename.unlink()
break raise
f.write(chunk)
# return if it's not the last chunk
if chunk_number < total_chunks - 1:
return UploadStatus(status="ok")
# merge chunks to a single file
upload_filename = transcript.data_path / f"upload.{extension}"
try:
with open(upload_filename, "ab") as f:
for chunk_number in range(0, total_chunks):
chunk_filename = (
transcript.data_path / f"upload_{chunk_number}.{extension}"
)
with open(chunk_filename, "rb") as chunk:
f.write(chunk.read())
chunk_filename.unlink()
except Exception: except Exception:
upload_filename.unlink() upload_filename.unlink()
raise raise

View File

@@ -1,5 +1,6 @@
import pytest
import asyncio import asyncio
import pytest
from httpx import AsyncClient from httpx import AsyncClient
@@ -27,13 +28,13 @@ async def test_transcript_upload_file(
# upload mp3 # upload mp3
response = await ac.post( response = await ac.post(
f"/transcripts/{tid}/record/upload", f"/transcripts/{tid}/record/upload?chunk_number=0&total_chunks=1",
files={ files={
"file": ( "chunk": (
"test_short.wav", "test_short.wav",
open("tests/records/test_short.wav", "rb"), open("tests/records/test_short.wav", "rb"),
"audio/mpeg", "audio/mpeg",
) ),
}, },
) )
assert response.status_code == 200 assert response.status_code == 200

View File

@@ -18,24 +18,46 @@ export default function FileUploadButton(props: FileUploadButton) {
const file = event.target.files?.[0]; const file = event.target.files?.[0];
if (file) { if (file) {
console.log("Calling api.v1TranscriptRecordUpload()..."); const maxChunkSize = 50 * 1024 * 1024; // 50 MB
const totalChunks = Math.ceil(file.size / maxChunkSize);
// Create an object of the expected type let chunkNumber = 0;
const uploadData = { let start = 0;
file: file, let uploadedSize = 0;
// Add other properties if required by the type definition
};
api?.httpRequest.config.interceptors.request.use((request) => { api?.httpRequest.config.interceptors.request.use((request) => {
request.onUploadProgress = (progressEvent) => { request.onUploadProgress = (progressEvent) => {
setProgress((progressEvent.progress || 0) * 100); const currentProgress = Math.floor(
((uploadedSize + progressEvent.loaded) / file.size) * 100,
);
setProgress(currentProgress);
}; };
return request; return request;
}); });
api?.v1TranscriptRecordUpload({
const uploadNextChunk = async () => {
if (chunkNumber == totalChunks) return;
const chunkSize = Math.min(maxChunkSize, file.size - start);
const end = start + chunkSize;
const chunk = file.slice(start, end);
await api?.v1TranscriptRecordUpload({
transcriptId: props.transcriptId, transcriptId: props.transcriptId,
formData: uploadData, formData: {
chunk,
},
chunkNumber,
totalChunks,
}); });
uploadedSize += chunkSize;
chunkNumber++;
start = end;
uploadNextChunk();
};
uploadNextChunk();
} }
}; };

View File

@@ -18,14 +18,14 @@ export const $AudioWaveform = {
export const $Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post = export const $Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post =
{ {
properties: { properties: {
file: { chunk: {
type: "string", type: "string",
format: "binary", format: "binary",
title: "File", title: "Chunk",
}, },
}, },
type: "object", type: "object",
required: ["file"], required: ["chunk"],
title: title:
"Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post", "Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post",
} as const; } as const;

View File

@@ -497,6 +497,8 @@ export class DefaultService {
* Transcript Record Upload * Transcript Record Upload
* @param data The data for the request. * @param data The data for the request.
* @param data.transcriptId * @param data.transcriptId
* @param data.chunkNumber
* @param data.totalChunks
* @param data.formData * @param data.formData
* @returns unknown Successful Response * @returns unknown Successful Response
* @throws ApiError * @throws ApiError
@@ -510,6 +512,10 @@ export class DefaultService {
path: { path: {
transcript_id: data.transcriptId, transcript_id: data.transcriptId,
}, },
query: {
chunk_number: data.chunkNumber,
total_chunks: data.totalChunks,
},
formData: data.formData, formData: data.formData,
mediaType: "multipart/form-data", mediaType: "multipart/form-data",
errors: { errors: {

View File

@@ -6,7 +6,7 @@ export type AudioWaveform = {
export type Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post = export type Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post =
{ {
file: Blob | File; chunk: Blob | File;
}; };
export type CreateParticipant = { export type CreateParticipant = {
@@ -296,7 +296,9 @@ export type V1TranscriptMergeSpeakerData = {
export type V1TranscriptMergeSpeakerResponse = SpeakerAssignmentStatus; export type V1TranscriptMergeSpeakerResponse = SpeakerAssignmentStatus;
export type V1TranscriptRecordUploadData = { export type V1TranscriptRecordUploadData = {
chunkNumber: number;
formData: Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post; formData: Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post;
totalChunks: number;
transcriptId: string; transcriptId: string;
}; };