diff --git a/server/gpu/modal/reflector_diarizer.py b/server/gpu/modal/reflector_diarizer.py index b1989a11..1a0e9f0a 100644 --- a/server/gpu/modal/reflector_diarizer.py +++ b/server/gpu/modal/reflector_diarizer.py @@ -11,7 +11,7 @@ from pydantic import BaseModel PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.0" MODEL_DIR = "/root/diarization_models" - +HUGGINGFACE_TOKEN = modal.Secret.from_name("my-huggingface-secret") stub = Stub(name="reflector-diarizer") @@ -34,7 +34,7 @@ def download_pyannote_audio(): Pipeline.from_pretrained( "pyannote/speaker-diarization-3.0", cache_dir=MODEL_DIR, - use_auth_token="***REMOVED***" + use_auth_token=HUGGINGFACE_TOKEN ) diff --git a/server/gpu/modal/reflector_translator.py b/server/gpu/modal/reflector_translator.py index cc7822fc..8e920a5a 100644 --- a/server/gpu/modal/reflector_translator.py +++ b/server/gpu/modal/reflector_translator.py @@ -55,15 +55,15 @@ def configure_seamless_m4t(): import yaml - ASSETS_DIR: str = "./seamless_communication/src/seamless_communication/assets/cards" + CARDS_DIR: str = "./seamless_communication/src/seamless_communication/cards" - with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file: + with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file: model_yaml_data = yaml.load(file, Loader=yaml.FullLoader) - with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "r") as file: + with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "r") as file: vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader) - with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "r") as file: + with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "r") as file: unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader) - with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "r") as file: + with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "r") as file: unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader) model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots" @@ -87,18 +87,18 @@ def configure_seamless_m4t(): os.getcwd(), model_dir, latest_model_version, tokenizer_name ) - model_yaml_data["checkpoint"] = f"file:/{model_path}" - vocoder_yaml_data["checkpoint"] = f"file:/{vocoder_path}" - unity_100_yaml_data["tokenizer"] = f"file:/{tokenizer_path}" - unity_200_yaml_data["tokenizer"] = f"file:/{tokenizer_path}" + model_yaml_data["checkpoint"] = f"file://{model_path}" + vocoder_yaml_data["checkpoint"] = f"file://{vocoder_path}" + unity_100_yaml_data["tokenizer"] = f"file://{tokenizer_path}" + unity_200_yaml_data["tokenizer"] = f"file://{tokenizer_path}" - with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file: + with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file: yaml.dump(model_yaml_data, file) - with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "w") as file: + with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "w") as file: yaml.dump(vocoder_yaml_data, file) - with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "w") as file: + with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "w") as file: yaml.dump(unity_100_yaml_data, file) - with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "w") as file: + with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "w") as file: yaml.dump(unity_200_yaml_data, file) @@ -144,7 +144,7 @@ transcriber_image = ( class Translator: def __enter__(self): import torch - from seamless_communication.models.inference.translator import Translator + from seamless_communication.inference.translator import Translator self.lock = threading.Lock() self.use_gpu = torch.cuda.is_available() @@ -363,14 +363,15 @@ class Translator: @method() def translate_text(self, text: str, source_language: str, target_language: str): with self.lock: - translated_text, _, _ = self.translator.predict( + translation_result, _ = self.translator.predict( text, "t2tt", src_lang=self.get_seamless_lang_code(source_language), tgt_lang=self.get_seamless_lang_code(target_language), - ngram_filtering=True, + unit_generation_ngram_filtering=True, ) - return {"text": {source_language: text, target_language: str(translated_text)}} + translated_text = str(translation_result[0]) + return {"text": {source_language: text, target_language: translated_text}} # ------------------------------------------------------------------- diff --git a/server/reflector/app.py b/server/reflector/app.py index 2a72ff44..1be71210 100644 --- a/server/reflector/app.py +++ b/server/reflector/app.py @@ -45,7 +45,7 @@ if settings.SENTRY_DSN: logger.error("Sentry is not installed, avoided") else: logger.info("Sentry enabled") - sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=1.0) + sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=0.01) else: logger.info("Sentry disabled") diff --git a/server/tests/test_transcripts_rtc_ws.py b/server/tests/test_transcripts_rtc_ws.py index c607fe06..e95839f0 100644 --- a/server/tests/test_transcripts_rtc_ws.py +++ b/server/tests/test_transcripts_rtc_ws.py @@ -70,6 +70,7 @@ async def test_transcript_rtc_and_websocket( dummy_storage, fake_mp3_upload, ensure_casing, + nltk, appserver, sentence_tokenize, ): @@ -227,6 +228,7 @@ async def test_transcript_rtc_and_websocket_and_fr( dummy_storage, fake_mp3_upload, ensure_casing, + nltk, appserver, sentence_tokenize, ): diff --git a/www/app/[domain]/browse/page.tsx b/www/app/[domain]/browse/page.tsx index 16321f9b..4a59c6dc 100644 --- a/www/app/[domain]/browse/page.tsx +++ b/www/app/[domain]/browse/page.tsx @@ -8,7 +8,7 @@ import { FaGear } from "react-icons/fa6"; import { FaCheck, FaTrash, FaStar, FaMicrophone } from "react-icons/fa"; import { MdError } from "react-icons/md"; import useTranscriptList from "../transcripts/useTranscriptList"; -import { formatTime } from "../../lib/time"; +import { formatTimeMs } from "../../lib/time"; import useApi from "../../lib/useApi"; import { useError } from "../../(errors)/errorContext"; import { FaEllipsisVertical } from "react-icons/fa6"; @@ -274,7 +274,7 @@ export default function TranscriptBrowser() { {new Date(item.created_at).toLocaleString("en-US")} {"\u00A0"}-{"\u00A0"} - {formatTime(Math.floor(item.duration / 1000))} + {formatTimeMs(item.duration)} {item.short_summary} diff --git a/www/app/[domain]/transcripts/player.tsx b/www/app/[domain]/transcripts/player.tsx index 65cc72d8..cd3703f4 100644 --- a/www/app/[domain]/transcripts/player.tsx +++ b/www/app/[domain]/transcripts/player.tsx @@ -50,7 +50,6 @@ export default function Player(props: PlayerProps) { const _wavesurfer = WaveSurfer.create({ container: waveformRef.current, peaks: [props.waveform.data], - height: "auto", duration: Math.floor(props.mediaDuration / 1000), media: props.media, @@ -161,7 +160,7 @@ export default function Player(props: PlayerProps) { const timeLabel = () => { if (props.mediaDuration && Math.floor(props.mediaDuration / 1000) > 0) return `${formatTime(currentTime)}/${formatTime( - Math.floor(props.mediaDuration / 1000), + Math.floor(props.mediaDuration / 1000) )}`; return ""; }; diff --git a/www/app/lib/time.ts b/www/app/lib/time.ts index de383f37..a632c979 100644 --- a/www/app/lib/time.ts +++ b/www/app/lib/time.ts @@ -1,4 +1,6 @@ -// TODO format duraction in be ? +export const formatTimeMs = (milliseconds: number): string => { + return formatTime(Math.floor(milliseconds / 1000)); +}; export const formatTime = (seconds: number): string => { let hours = Math.floor(seconds / 3600); diff --git a/www/app/lib/zulip.ts b/www/app/lib/zulip.ts index 48ec94fb..12e761a4 100644 --- a/www/app/lib/zulip.ts +++ b/www/app/lib/zulip.ts @@ -1,11 +1,11 @@ import { GetTranscript, GetTranscriptTopic } from "../api"; -import { formatTime } from "./time"; +import { formatTime, formatTimeMs } from "./time"; import { extractDomain } from "./utils"; export async function sendZulipMessage( stream: string, topic: string, - message: string, + message: string ) { console.log("Sendiing zulip message", stream, topic); try { @@ -28,14 +28,14 @@ export const ZULIP_MSG_MAX_LENGTH = 10000; export function getZulipMessage( transcript: GetTranscript, topics: GetTranscriptTopic[] | null, - includeTopics: boolean, + includeTopics: boolean ) { const date = new Date(transcript.created_at); // Get the timezone offset in minutes and convert it to hours and minutes const timezoneOffset = -date.getTimezoneOffset(); const offsetHours = String( - Math.floor(Math.abs(timezoneOffset) / 60), + Math.floor(Math.abs(timezoneOffset) / 60) ).padStart(2, "0"); const offsetMinutes = String(Math.abs(timezoneOffset) % 60).padStart(2, "0"); const offsetSign = timezoneOffset >= 0 ? "+" : "-"; @@ -58,7 +58,7 @@ export function getZulipMessage( **Date**: **Link**: [${extractDomain(link)}](${link}) -**Duration**: ${formatTime(transcript.duration)} +**Duration**: ${formatTimeMs(transcript.duration)} `; let topicText = ""; diff --git a/www/sentry.client.config.ts b/www/sentry.client.config.ts index 2c8fb7ce..aff65bbd 100644 --- a/www/sentry.client.config.ts +++ b/www/sentry.client.config.ts @@ -4,18 +4,22 @@ import * as Sentry from "@sentry/nextjs"; -Sentry.init({ - dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920", +const SENTRY_DSN = process.env.SENTRY_DSN; - // Adjust this value in production, or use tracesSampler for greater control - tracesSampleRate: 0, +if (SENTRY_DSN) { + Sentry.init({ + dsn: SENTRY_DSN, - // Setting this option to true will print useful information to the console while you're setting up Sentry. - debug: false, + // Adjust this value in production, or use tracesSampler for greater control + tracesSampleRate: 0, - replaysOnErrorSampleRate: 0.0, + // Setting this option to true will print useful information to the console while you're setting up Sentry. + debug: false, - // This sets the sample rate to be 10%. You may want this to be 100% while - // in development and sample at a lower rate in production - replaysSessionSampleRate: 0.0, -}); + replaysOnErrorSampleRate: 0.0, + + // This sets the sample rate to be 10%. You may want this to be 100% while + // in development and sample at a lower rate in production + replaysSessionSampleRate: 0.0, + }); +} diff --git a/www/sentry.edge.config.ts b/www/sentry.edge.config.ts index 92c2b174..c0127472 100644 --- a/www/sentry.edge.config.ts +++ b/www/sentry.edge.config.ts @@ -5,12 +5,16 @@ import * as Sentry from "@sentry/nextjs"; -Sentry.init({ - dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920", +const SENTRY_DSN = process.env.SENTRY_DSN; - // Adjust this value in production, or use tracesSampler for greater control - tracesSampleRate: 0, +if (SENTRY_DSN) { + Sentry.init({ + dsn: SENTRY_DSN, - // Setting this option to true will print useful information to the console while you're setting up Sentry. - debug: false, -}); + // Adjust this value in production, or use tracesSampler for greater control + tracesSampleRate: 0, + + // Setting this option to true will print useful information to the console while you're setting up Sentry. + debug: false, + }); +} diff --git a/www/sentry.server.config.ts b/www/sentry.server.config.ts index 7d24e518..be9622fd 100644 --- a/www/sentry.server.config.ts +++ b/www/sentry.server.config.ts @@ -4,12 +4,16 @@ import * as Sentry from "@sentry/nextjs"; -Sentry.init({ - dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920", +const SENTRY_DSN = process.env.SENTRY_DSN; - // Adjust this value in production, or use tracesSampler for greater control - tracesSampleRate: 0, +if (SENTRY_DSN) { + Sentry.init({ + dsn: SENTRY_DSN, - // Setting this option to true will print useful information to the console while you're setting up Sentry. - debug: false, -}); + // Adjust this value in production, or use tracesSampler for greater control + tracesSampleRate: 0, + + // Setting this option to true will print useful information to the console while you're setting up Sentry. + debug: false, + }); +}