docs: docs website + installation (#778)

* feat: WIP doc (vibe started and iterated) * install from scratch docs * caddyfile.example * gitignore * authentik script * authentik script * authentik script * llm doc * authentik ongoing * more daily setup logs * doc website * gpu self hosted setup guide (no-mistakes) * doc review round * doc review round * doc review round * update doc site sidebars * feat(docs): add mermaid diagram support * docs polishing * live pipeline doc * move pipeline dev docs to dev docs location * doc pr review iteration * dockerfile healthcheck * docs/pr-comments * remove jwt comment * llm suggestion * pr comments * pr comments * document auto migrations * cleanup docs --------- Co-authored-by: Mathieu Virbel <mat@meltingrocks.com> Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2026-04-25 14:45:19 +00:00 · 2026-01-06 17:25:02 -05:00
parent e644d6497b
commit 407c15299f
61 changed files with 32653 additions and 26 deletions
--- a/gpu/modal_deployments/reflector_transcriber.py
+++ b/gpu/modal_deployments/reflector_transcriber.py
@@ -89,6 +89,7 @@ image = (
        "torch==2.5.1",
        "faster-whisper==1.1.1",
        "fastapi==0.115.12",
+        "python-multipart",
        "requests",
        "librosa==0.10.1",
        "numpy<2",
@@ -98,6 +99,12 @@ image = (
 )


+# IMPORTANT: This function is duplicated in multiple files for deployment isolation.
+# If you modify the audio format detection logic, you MUST update all copies:
+#   - gpu/self_hosted/app/utils.py
+#   - gpu/modal_deployments/reflector_transcriber.py (this file - 2 copies!)
+#   - gpu/modal_deployments/reflector_transcriber_parakeet.py
+#   - gpu/modal_deployments/reflector_diarizer.py
 def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtension:
    parsed_url = urlparse(url)
    url_path = parsed_url.path
@@ -113,6 +120,8 @@ def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtens
        return AudioFileExtension("wav")
    if "audio/mp4" in content_type:
        return AudioFileExtension("mp4")
+    if "audio/webm" in content_type or "video/webm" in content_type:
+        return AudioFileExtension("webm")

    raise ValueError(
        f"Unsupported audio format for URL: {url}. "
@@ -315,6 +324,11 @@ class TranscriberWhisperFile:
        import numpy as np
        from silero_vad import VADIterator

+        # IMPORTANT: This VAD segment logic is duplicated in multiple files for deployment isolation.
+        # If you modify this function, you MUST update all copies:
+        #   - gpu/modal_deployments/reflector_transcriber.py (this file)
+        #   - gpu/modal_deployments/reflector_transcriber_parakeet.py
+        #   - gpu/self_hosted/app/services/transcriber.py
        def vad_segments(
            audio_array,
            sample_rate: int = SAMPLERATE,
@@ -322,6 +336,7 @@ class TranscriberWhisperFile:
        ) -> Generator[TimeSegment, None, None]:
            """Generate speech segments as TimeSegment using Silero VAD."""
            iterator = VADIterator(self.vad_model, sampling_rate=sample_rate)
+            audio_duration = len(audio_array) / float(SAMPLERATE)
            start = None
            for i in range(0, len(audio_array), window_size):
                chunk = audio_array[i : i + window_size]
@@ -341,6 +356,9 @@ class TranscriberWhisperFile:
                        start / float(SAMPLERATE), end / float(SAMPLERATE)
                    )
                    start = None
+            # Handle case where audio ends while speech is still active
+            if start is not None:
+                yield TimeSegment(start / float(SAMPLERATE), audio_duration)
            iterator.reset_states()

        upload_volume.reload()
@@ -406,6 +424,12 @@ class TranscriberWhisperFile:
        return {"text": " ".join(all_text), "words": all_words}


+# IMPORTANT: This function is duplicated in multiple files for deployment isolation.
+# If you modify the audio format detection logic, you MUST update all copies:
+#   - gpu/self_hosted/app/utils.py
+#   - gpu/modal_deployments/reflector_transcriber.py (this file - 2 copies!)
+#   - gpu/modal_deployments/reflector_transcriber_parakeet.py
+#   - gpu/modal_deployments/reflector_diarizer.py
 def detect_audio_format(url: str, headers: dict) -> str:
    from urllib.parse import urlparse

@@ -423,6 +447,8 @@ def detect_audio_format(url: str, headers: dict) -> str:
        return "wav"
    if "audio/mp4" in content_type:
        return "mp4"
+    if "audio/webm" in content_type or "video/webm" in content_type:
+        return "webm"

    raise HTTPException(
        status_code=400,