Merge branch 'main' into UI-Meeting

2025-12-20 20:29:06 +00:00 · 2024-06-07 17:37:19 +02:00
parent 6acb2f6088 76e9842fa8
commit ac51df26a5
11 changed files with 71 additions and 55 deletions
--- a/server/gpu/modal/reflector_diarizer.py
+++ b/server/gpu/modal/reflector_diarizer.py
@@ -11,7 +11,7 @@ from pydantic import BaseModel
 PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.0"
 MODEL_DIR = "/root/diarization_models"
-
+HUGGINGFACE_TOKEN = modal.Secret.from_name("my-huggingface-secret")
 stub = Stub(name="reflector-diarizer")
@@ -34,7 +34,7 @@ def download_pyannote_audio():
    Pipeline.from_pretrained(
        "pyannote/speaker-diarization-3.0",
        cache_dir=MODEL_DIR,
-        use_auth_token="***REMOVED***"
+        use_auth_token=HUGGINGFACE_TOKEN
    )
--- a/server/gpu/modal/reflector_translator.py
+++ b/server/gpu/modal/reflector_translator.py
@@ -55,15 +55,15 @@ def configure_seamless_m4t():
    import yaml
-    ASSETS_DIR: str = "./seamless_communication/src/seamless_communication/assets/cards"
+    CARDS_DIR: str = "./seamless_communication/src/seamless_communication/cards"
-    with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file:
+    with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file:
        model_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
-    with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "r") as file:
+    with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "r") as file:
        vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
-    with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "r") as file:
+    with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "r") as file:
        unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
-    with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "r") as file:
+    with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "r") as file:
        unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
    model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots"
@@ -87,18 +87,18 @@ def configure_seamless_m4t():
        os.getcwd(), model_dir, latest_model_version, tokenizer_name
    )
-    model_yaml_data["checkpoint"] = f"file:/{model_path}"
+    model_yaml_data["checkpoint"] = f"file://{model_path}"
-    vocoder_yaml_data["checkpoint"] = f"file:/{vocoder_path}"
+    vocoder_yaml_data["checkpoint"] = f"file://{vocoder_path}"
-    unity_100_yaml_data["tokenizer"] = f"file:/{tokenizer_path}"
+    unity_100_yaml_data["tokenizer"] = f"file://{tokenizer_path}"
-    unity_200_yaml_data["tokenizer"] = f"file:/{tokenizer_path}"
+    unity_200_yaml_data["tokenizer"] = f"file://{tokenizer_path}"
-    with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file:
+    with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file:
        yaml.dump(model_yaml_data, file)
-    with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "w") as file:
+    with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "w") as file:
        yaml.dump(vocoder_yaml_data, file)
-    with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "w") as file:
+    with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "w") as file:
        yaml.dump(unity_100_yaml_data, file)
-    with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "w") as file:
+    with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "w") as file:
        yaml.dump(unity_200_yaml_data, file)
@@ -144,7 +144,7 @@ transcriber_image = (
 class Translator:
    def __enter__(self):
        import torch
-        from seamless_communication.models.inference.translator import Translator
+        from seamless_communication.inference.translator import Translator
        self.lock = threading.Lock()
        self.use_gpu = torch.cuda.is_available()
@@ -363,14 +363,15 @@ class Translator:
    @method()
    def translate_text(self, text: str, source_language: str, target_language: str):
        with self.lock:
-            translated_text, _, _ = self.translator.predict(
+            translation_result, _ = self.translator.predict(
                text,
                "t2tt",
                src_lang=self.get_seamless_lang_code(source_language),
                tgt_lang=self.get_seamless_lang_code(target_language),
-                ngram_filtering=True,
+                unit_generation_ngram_filtering=True,
            )
-        return {"text": {source_language: text, target_language: str(translated_text)}}
+        translated_text = str(translation_result[0])
        return {"text": {source_language: text, target_language: translated_text}}
 # -------------------------------------------------------------------
--- a/server/reflector/app.py
+++ b/server/reflector/app.py
@@ -45,7 +45,7 @@ if settings.SENTRY_DSN:
        logger.error("Sentry is not installed, avoided")
    else:
        logger.info("Sentry enabled")
-    sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=1.0)
+    sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=0.01)
 else:
    logger.info("Sentry disabled")
--- a/server/tests/test_transcripts_rtc_ws.py
+++ b/server/tests/test_transcripts_rtc_ws.py
@@ -70,6 +70,7 @@ async def test_transcript_rtc_and_websocket(
    dummy_storage,
    fake_mp3_upload,
    ensure_casing,
    nltk,
    appserver,
    sentence_tokenize,
 ):
@@ -227,6 +228,7 @@ async def test_transcript_rtc_and_websocket_and_fr(
    dummy_storage,
    fake_mp3_upload,
    ensure_casing,
    nltk,
    appserver,
    sentence_tokenize,
 ):
--- a/www/app/[domain]/browse/page.tsx
+++ b/www/app/[domain]/browse/page.tsx
@@ -8,7 +8,7 @@ import { FaGear } from "react-icons/fa6";
 import { FaCheck, FaTrash, FaStar, FaMicrophone } from "react-icons/fa";
 import { MdError } from "react-icons/md";
 import useTranscriptList from "../transcripts/useTranscriptList";
-import { formatTime } from "../../lib/time";
+import { formatTimeMs } from "../../lib/time";
 import useApi from "../../lib/useApi";
 import { useError } from "../../(errors)/errorContext";
 import { FaEllipsisVertical } from "react-icons/fa6";
@@ -274,7 +274,7 @@ export default function TranscriptBrowser() {
                  <Text fontSize="small">
                    {new Date(item.created_at).toLocaleString("en-US")}
                    {"\u00A0"}-{"\u00A0"}
-                    {formatTime(Math.floor(item.duration / 1000))}
+                    {formatTimeMs(item.duration)}
                  </Text>
                  <ExpandableText noOfLines={5}>
                    {item.short_summary}
--- a/www/app/[domain]/transcripts/player.tsx
+++ b/www/app/[domain]/transcripts/player.tsx
@@ -50,7 +50,6 @@ export default function Player(props: PlayerProps) {
      const _wavesurfer = WaveSurfer.create({
        container: waveformRef.current,
        peaks: [props.waveform.data],
        height: "auto",
        duration: Math.floor(props.mediaDuration / 1000),
        media: props.media,
@@ -161,7 +160,7 @@ export default function Player(props: PlayerProps) {
  const timeLabel = () => {
    if (props.mediaDuration && Math.floor(props.mediaDuration / 1000) > 0)
      return `${formatTime(currentTime)}/${formatTime(
-        Math.floor(props.mediaDuration / 1000),
+        Math.floor(props.mediaDuration / 1000)
      )}`;
    return "";
  };
--- a/www/app/lib/time.ts
+++ b/www/app/lib/time.ts
@@ -1,4 +1,6 @@
-// TODO format duraction in be ?
+export const formatTimeMs = (milliseconds: number): string => {
  return formatTime(Math.floor(milliseconds / 1000));
 };
 export const formatTime = (seconds: number): string => {
  let hours = Math.floor(seconds / 3600);
--- a/www/app/lib/zulip.ts
+++ b/www/app/lib/zulip.ts
@@ -1,11 +1,11 @@
 import { GetTranscript, GetTranscriptTopic } from "../api";
-import { formatTime } from "./time";
+import { formatTime, formatTimeMs } from "./time";
 import { extractDomain } from "./utils";
 export async function sendZulipMessage(
  stream: string,
  topic: string,
-  message: string,
+  message: string
 ) {
  console.log("Sendiing zulip message", stream, topic);
  try {
@@ -28,14 +28,14 @@ export const ZULIP_MSG_MAX_LENGTH = 10000;
 export function getZulipMessage(
  transcript: GetTranscript,
  topics: GetTranscriptTopic[] | null,
-  includeTopics: boolean,
+  includeTopics: boolean
 ) {
  const date = new Date(transcript.created_at);
  // Get the timezone offset in minutes and convert it to hours and minutes
  const timezoneOffset = -date.getTimezoneOffset();
  const offsetHours = String(
-    Math.floor(Math.abs(timezoneOffset) / 60),
+    Math.floor(Math.abs(timezoneOffset) / 60)
  ).padStart(2, "0");
  const offsetMinutes = String(Math.abs(timezoneOffset) % 60).padStart(2, "0");
  const offsetSign = timezoneOffset >= 0 ? "+" : "-";
@@ -58,7 +58,7 @@ export function getZulipMessage(
 **Date**: <time:${dateTimeString}>
 **Link**: [${extractDomain(link)}](${link})
-**Duration**: ${formatTime(transcript.duration)}
+**Duration**: ${formatTimeMs(transcript.duration)}
 `;
  let topicText = "";
--- a/www/sentry.client.config.ts
+++ b/www/sentry.client.config.ts
@@ -4,18 +4,22 @@
 import * as Sentry from "@sentry/nextjs";
-Sentry.init({
+const SENTRY_DSN = process.env.SENTRY_DSN;
  dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
-  // Adjust this value in production, or use tracesSampler for greater control
+if (SENTRY_DSN) {
-  tracesSampleRate: 0,
+  Sentry.init({
    dsn: SENTRY_DSN,
-  // Setting this option to true will print useful information to the console while you're setting up Sentry.
+    // Adjust this value in production, or use tracesSampler for greater control
-  debug: false,
+    tracesSampleRate: 0,
-  replaysOnErrorSampleRate: 0.0,
+    // Setting this option to true will print useful information to the console while you're setting up Sentry.
    debug: false,
-  // This sets the sample rate to be 10%. You may want this to be 100% while
+    replaysOnErrorSampleRate: 0.0,
-  // in development and sample at a lower rate in production
+
-  replaysSessionSampleRate: 0.0,
+    // This sets the sample rate to be 10%. You may want this to be 100% while
-});
+    // in development and sample at a lower rate in production
    replaysSessionSampleRate: 0.0,
  });
 }
--- a/www/sentry.edge.config.ts
+++ b/www/sentry.edge.config.ts
@@ -5,12 +5,16 @@
 import * as Sentry from "@sentry/nextjs";
-Sentry.init({
+const SENTRY_DSN = process.env.SENTRY_DSN;
  dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
-  // Adjust this value in production, or use tracesSampler for greater control
+if (SENTRY_DSN) {
-  tracesSampleRate: 0,
+  Sentry.init({
    dsn: SENTRY_DSN,
-  // Setting this option to true will print useful information to the console while you're setting up Sentry.
+    // Adjust this value in production, or use tracesSampler for greater control
-  debug: false,
+    tracesSampleRate: 0,
-});
+
    // Setting this option to true will print useful information to the console while you're setting up Sentry.
    debug: false,
  });
 }
--- a/www/sentry.server.config.ts
+++ b/www/sentry.server.config.ts
@@ -4,12 +4,16 @@
 import * as Sentry from "@sentry/nextjs";
-Sentry.init({
+const SENTRY_DSN = process.env.SENTRY_DSN;
  dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
-  // Adjust this value in production, or use tracesSampler for greater control
+if (SENTRY_DSN) {
-  tracesSampleRate: 0,
+  Sentry.init({
    dsn: SENTRY_DSN,
-  // Setting this option to true will print useful information to the console while you're setting up Sentry.
+    // Adjust this value in production, or use tracesSampler for greater control
-  debug: false,
+    tracesSampleRate: 0,
-});
+
    // Setting this option to true will print useful information to the console while you're setting up Sentry.
    debug: false,
  });
 }