Chunked filed upload

This commit is contained in:
2024-07-12 22:57:54 +02:00
parent 74a1c69e1f
commit df5b735959
6 changed files with 82 additions and 34 deletions

View File

@@ -17,7 +17,9 @@ class UploadStatus(BaseModel):
@router.post("/transcripts/{transcript_id}/record/upload")
async def transcript_record_upload(
transcript_id: str,
file: UploadFile,
chunk_number: int,
total_chunks: int,
chunk: UploadFile,
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
):
user_id = user["sub"] if user else None
@@ -34,22 +36,37 @@ async def transcript_record_upload(
status_code=400, detail="There is already an upload in progress"
)
# save the file to the transcript folder
extension = file.filename.split(".")[-1]
upload_filename = transcript.data_path / f"upload.{extension}"
upload_filename.parent.mkdir(parents=True, exist_ok=True)
# save the chunk to the transcript folder
extension = chunk.filename.split(".")[-1]
chunk_filename = transcript.data_path / f"upload_{chunk_number}.{extension}"
chunk_filename.parent.mkdir(parents=True, exist_ok=True)
# ensure the file is back to the beginning
await file.seek(0)
# ensure the chunk is back to the beginning
await chunk.seek(0)
# save the file to the transcript folder
# save the chunk to the transcript folder
try:
with open(upload_filename, "wb") as f:
while True:
chunk = await file.read(16384)
if not chunk:
break
f.write(chunk)
with open(chunk_filename, "wb") as f:
f.write(await chunk.read())
except Exception:
chunk_filename.unlink()
raise
# return if it's not the last chunk
if chunk_number < total_chunks - 1:
return UploadStatus(status="ok")
# merge chunks to a single file
upload_filename = transcript.data_path / f"upload.{extension}"
try:
with open(upload_filename, "ab") as f:
for chunk_number in range(0, total_chunks):
chunk_filename = (
transcript.data_path / f"upload_{chunk_number}.{extension}"
)
with open(chunk_filename, "rb") as chunk:
f.write(chunk.read())
chunk_filename.unlink()
except Exception:
upload_filename.unlink()
raise

View File

@@ -1,5 +1,6 @@
import pytest
import asyncio
import pytest
from httpx import AsyncClient
@@ -27,13 +28,13 @@ async def test_transcript_upload_file(
# upload mp3
response = await ac.post(
f"/transcripts/{tid}/record/upload",
f"/transcripts/{tid}/record/upload?chunk_number=0&total_chunks=1",
files={
"file": (
"chunk": (
"test_short.wav",
open("tests/records/test_short.wav", "rb"),
"audio/mpeg",
)
),
},
)
assert response.status_code == 200