mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
Merge pull request #373 from Monadical-SAS/upload-big-files
Upload big files
This commit is contained in:
12
.github/workflows/test_server.yml
vendored
12
.github/workflows/test_server.yml
vendored
@@ -3,10 +3,10 @@ name: Unittests
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'server/**'
|
||||
- "server/**"
|
||||
push:
|
||||
paths:
|
||||
- 'server/**'
|
||||
- "server/**"
|
||||
|
||||
jobs:
|
||||
pytest:
|
||||
@@ -23,9 +23,9 @@ jobs:
|
||||
- name: Set up Python 3.x
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'poetry'
|
||||
cache-dependency-path: 'server/poetry.lock'
|
||||
python-version: "3.11"
|
||||
cache: "poetry"
|
||||
cache-dependency-path: "server/poetry.lock"
|
||||
- name: Install requirements
|
||||
run: |
|
||||
cd server
|
||||
@@ -61,7 +61,7 @@ jobs:
|
||||
run: |
|
||||
pip install ruff
|
||||
cd server
|
||||
ruff reflector tests
|
||||
ruff check reflector tests
|
||||
|
||||
docker:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -17,7 +17,9 @@ class UploadStatus(BaseModel):
|
||||
@router.post("/transcripts/{transcript_id}/record/upload")
|
||||
async def transcript_record_upload(
|
||||
transcript_id: str,
|
||||
file: UploadFile,
|
||||
chunk_number: int,
|
||||
total_chunks: int,
|
||||
chunk: UploadFile,
|
||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
||||
):
|
||||
user_id = user["sub"] if user else None
|
||||
@@ -34,22 +36,37 @@ async def transcript_record_upload(
|
||||
status_code=400, detail="There is already an upload in progress"
|
||||
)
|
||||
|
||||
# save the file to the transcript folder
|
||||
extension = file.filename.split(".")[-1]
|
||||
upload_filename = transcript.data_path / f"upload.{extension}"
|
||||
upload_filename.parent.mkdir(parents=True, exist_ok=True)
|
||||
# save the chunk to the transcript folder
|
||||
extension = chunk.filename.split(".")[-1]
|
||||
chunk_filename = transcript.data_path / f"upload_{chunk_number}.{extension}"
|
||||
chunk_filename.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ensure the file is back to the beginning
|
||||
await file.seek(0)
|
||||
# ensure the chunk is back to the beginning
|
||||
await chunk.seek(0)
|
||||
|
||||
# save the file to the transcript folder
|
||||
# save the chunk to the transcript folder
|
||||
try:
|
||||
with open(upload_filename, "wb") as f:
|
||||
while True:
|
||||
chunk = await file.read(16384)
|
||||
if not chunk:
|
||||
break
|
||||
f.write(chunk)
|
||||
with open(chunk_filename, "wb") as f:
|
||||
f.write(await chunk.read())
|
||||
except Exception:
|
||||
chunk_filename.unlink()
|
||||
raise
|
||||
|
||||
# return if it's not the last chunk
|
||||
if chunk_number < total_chunks - 1:
|
||||
return UploadStatus(status="ok")
|
||||
|
||||
# merge chunks to a single file
|
||||
upload_filename = transcript.data_path / f"upload.{extension}"
|
||||
try:
|
||||
with open(upload_filename, "ab") as f:
|
||||
for chunk_number in range(0, total_chunks):
|
||||
chunk_filename = (
|
||||
transcript.data_path / f"upload_{chunk_number}.{extension}"
|
||||
)
|
||||
with open(chunk_filename, "rb") as chunk:
|
||||
f.write(chunk.read())
|
||||
chunk_filename.unlink()
|
||||
except Exception:
|
||||
upload_filename.unlink()
|
||||
raise
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import pytest
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
from httpx import AsyncClient
|
||||
|
||||
|
||||
@@ -27,13 +28,13 @@ async def test_transcript_upload_file(
|
||||
|
||||
# upload mp3
|
||||
response = await ac.post(
|
||||
f"/transcripts/{tid}/record/upload",
|
||||
f"/transcripts/{tid}/record/upload?chunk_number=0&total_chunks=1",
|
||||
files={
|
||||
"file": (
|
||||
"chunk": (
|
||||
"test_short.wav",
|
||||
open("tests/records/test_short.wav", "rb"),
|
||||
"audio/mpeg",
|
||||
)
|
||||
),
|
||||
},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
@@ -18,24 +18,46 @@ export default function FileUploadButton(props: FileUploadButton) {
|
||||
const file = event.target.files?.[0];
|
||||
|
||||
if (file) {
|
||||
console.log("Calling api.v1TranscriptRecordUpload()...");
|
||||
|
||||
// Create an object of the expected type
|
||||
const uploadData = {
|
||||
file: file,
|
||||
// Add other properties if required by the type definition
|
||||
};
|
||||
const maxChunkSize = 50 * 1024 * 1024; // 50 MB
|
||||
const totalChunks = Math.ceil(file.size / maxChunkSize);
|
||||
let chunkNumber = 0;
|
||||
let start = 0;
|
||||
let uploadedSize = 0;
|
||||
|
||||
api?.httpRequest.config.interceptors.request.use((request) => {
|
||||
request.onUploadProgress = (progressEvent) => {
|
||||
setProgress((progressEvent.progress || 0) * 100);
|
||||
const currentProgress = Math.floor(
|
||||
((uploadedSize + progressEvent.loaded) / file.size) * 100,
|
||||
);
|
||||
setProgress(currentProgress);
|
||||
};
|
||||
return request;
|
||||
});
|
||||
api?.v1TranscriptRecordUpload({
|
||||
|
||||
const uploadNextChunk = async () => {
|
||||
if (chunkNumber == totalChunks) return;
|
||||
|
||||
const chunkSize = Math.min(maxChunkSize, file.size - start);
|
||||
const end = start + chunkSize;
|
||||
const chunk = file.slice(start, end);
|
||||
|
||||
await api?.v1TranscriptRecordUpload({
|
||||
transcriptId: props.transcriptId,
|
||||
formData: uploadData,
|
||||
formData: {
|
||||
chunk,
|
||||
},
|
||||
chunkNumber,
|
||||
totalChunks,
|
||||
});
|
||||
|
||||
uploadedSize += chunkSize;
|
||||
chunkNumber++;
|
||||
start = end;
|
||||
|
||||
uploadNextChunk();
|
||||
};
|
||||
|
||||
uploadNextChunk();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -18,14 +18,14 @@ export const $AudioWaveform = {
|
||||
export const $Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post =
|
||||
{
|
||||
properties: {
|
||||
file: {
|
||||
chunk: {
|
||||
type: "string",
|
||||
format: "binary",
|
||||
title: "File",
|
||||
title: "Chunk",
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
required: ["file"],
|
||||
required: ["chunk"],
|
||||
title:
|
||||
"Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post",
|
||||
} as const;
|
||||
|
||||
@@ -497,6 +497,8 @@ export class DefaultService {
|
||||
* Transcript Record Upload
|
||||
* @param data The data for the request.
|
||||
* @param data.transcriptId
|
||||
* @param data.chunkNumber
|
||||
* @param data.totalChunks
|
||||
* @param data.formData
|
||||
* @returns unknown Successful Response
|
||||
* @throws ApiError
|
||||
@@ -510,6 +512,10 @@ export class DefaultService {
|
||||
path: {
|
||||
transcript_id: data.transcriptId,
|
||||
},
|
||||
query: {
|
||||
chunk_number: data.chunkNumber,
|
||||
total_chunks: data.totalChunks,
|
||||
},
|
||||
formData: data.formData,
|
||||
mediaType: "multipart/form-data",
|
||||
errors: {
|
||||
|
||||
@@ -6,7 +6,7 @@ export type AudioWaveform = {
|
||||
|
||||
export type Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post =
|
||||
{
|
||||
file: Blob | File;
|
||||
chunk: Blob | File;
|
||||
};
|
||||
|
||||
export type CreateParticipant = {
|
||||
@@ -296,7 +296,9 @@ export type V1TranscriptMergeSpeakerData = {
|
||||
export type V1TranscriptMergeSpeakerResponse = SpeakerAssignmentStatus;
|
||||
|
||||
export type V1TranscriptRecordUploadData = {
|
||||
chunkNumber: number;
|
||||
formData: Body_transcript_record_upload_v1_transcripts__transcript_id__record_upload_post;
|
||||
totalChunks: number;
|
||||
transcriptId: string;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user