From 5b3b6a80df06f6ecdb1133c91dd0050d77c21a99 Mon Sep 17 00:00:00 2001 From: Sara Date: Fri, 12 Jan 2024 14:57:41 +0100 Subject: [PATCH 1/8] fix regions --- www/app/[domain]/transcripts/player.tsx | 15 ++++++--------- www/app/[domain]/transcripts/recorder.tsx | 7 +------ www/app/lib/custom-plugins/regions.ts | 18 ------------------ www/app/styles/recorder.js | 1 - 4 files changed, 7 insertions(+), 34 deletions(-) delete mode 100644 www/app/lib/custom-plugins/regions.ts diff --git a/www/app/[domain]/transcripts/player.tsx b/www/app/[domain]/transcripts/player.tsx index 632dfd8a..c8ffd5ea 100644 --- a/www/app/[domain]/transcripts/player.tsx +++ b/www/app/[domain]/transcripts/player.tsx @@ -1,7 +1,7 @@ import React, { useRef, useEffect, useState } from "react"; import WaveSurfer from "wavesurfer.js"; -import CustomRegionsPlugin from "../../lib/custom-plugins/regions"; +import RegionsPlugin from "wavesurfer.js/dist/plugins/regions.esm.js"; import { formatTime } from "../../lib/time"; import { Topic } from "./webSocketTypes"; @@ -24,9 +24,7 @@ export default function Player(props: PlayerProps) { const [wavesurfer, setWavesurfer] = useState(null); const [isPlaying, setIsPlaying] = useState(false); const [currentTime, setCurrentTime] = useState(0); - const [waveRegions, setWaveRegions] = useState( - null, - ); + const [waveRegions, setWaveRegions] = useState(null); const [activeTopic, setActiveTopic] = props.useActiveTopic; const topicsRef = useRef(props.topics); // Waveform setup @@ -39,12 +37,13 @@ export default function Player(props: PlayerProps) { // This is not ideal, but it works for now. const _wavesurfer = WaveSurfer.create({ container: waveformRef.current, - peaks: props.waveform, + peaks: props.waveform.data, hideScrollbar: true, autoCenter: true, barWidth: 2, height: "auto", - duration: props.mediaDuration, + duration: Math.floor(props.mediaDuration / 1000), + media: props.media, ...waveSurferStyles.player, }); @@ -64,12 +63,10 @@ export default function Player(props: PlayerProps) { }); _wavesurfer.on("timeupdate", setCurrentTime); - setWaveRegions(_wavesurfer.registerPlugin(CustomRegionsPlugin.create())); + setWaveRegions(_wavesurfer.registerPlugin(RegionsPlugin.create())); _wavesurfer.toggleInteraction(true); - _wavesurfer.setMediaElement(props.media); - setWavesurfer(_wavesurfer); return () => { diff --git a/www/app/[domain]/transcripts/recorder.tsx b/www/app/[domain]/transcripts/recorder.tsx index 562f6a76..8ab6d9c1 100644 --- a/www/app/[domain]/transcripts/recorder.tsx +++ b/www/app/[domain]/transcripts/recorder.tsx @@ -1,8 +1,7 @@ import React, { useRef, useEffect, useState } from "react"; import WaveSurfer from "wavesurfer.js"; -import RecordPlugin from "../../lib/custom-plugins/record"; -import CustomRegionsPlugin from "../../lib/custom-plugins/regions"; +import RecordPlugin from "wavesurfer.js/dist/plugins/record.esm.js"; import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; import { faMicrophone } from "@fortawesome/free-solid-svg-icons"; @@ -33,9 +32,6 @@ export default function Recorder(props: RecorderProps) { const [currentTime, setCurrentTime] = useState(0); const [timeInterval, setTimeInterval] = useState(null); const [duration, setDuration] = useState(0); - const [waveRegions, setWaveRegions] = useState( - null, - ); const [deviceId, setDeviceId] = useState(null); const [recordStarted, setRecordStarted] = useState(false); const [showDevices, setShowDevices] = useState(false); @@ -119,7 +115,6 @@ export default function Recorder(props: RecorderProps) { _wavesurfer.on("timeupdate", setCurrentTime); setRecord(_wavesurfer.registerPlugin(RecordPlugin.create())); - setWaveRegions(_wavesurfer.registerPlugin(CustomRegionsPlugin.create())); setWavesurfer(_wavesurfer); diff --git a/www/app/lib/custom-plugins/regions.ts b/www/app/lib/custom-plugins/regions.ts deleted file mode 100644 index dff05f3b..00000000 --- a/www/app/lib/custom-plugins/regions.ts +++ /dev/null @@ -1,18 +0,0 @@ -// Source code: https://github.com/katspaugh/wavesurfer.js/blob/fa2bcfe/src/plugins/regions.ts - -import RegionsPlugin, { - RegionsPluginOptions, -} from "wavesurfer.js/dist/plugins/regions"; - -class CustomRegionsPlugin extends RegionsPlugin { - public static create(options?: RegionsPluginOptions) { - return new CustomRegionsPlugin(options); - } - - constructor(options?: RegionsPluginOptions) { - super(options); - this["avoidOverlapping"] = () => {}; - } -} - -export default CustomRegionsPlugin; diff --git a/www/app/styles/recorder.js b/www/app/styles/recorder.js index dc9ace60..b31b9a61 100644 --- a/www/app/styles/recorder.js +++ b/www/app/styles/recorder.js @@ -15,7 +15,6 @@ export const waveSurferStyles = { font-size: 0.7rem; border-radius: 0 3px 3px 0; - position: absolute; width: 100px; max-width: fit-content; cursor: pointer; From 7de1e1ebbd0a680d34220baf05988b3890f2a187 Mon Sep 17 00:00:00 2001 From: Sara Date: Mon, 15 Jan 2024 17:49:27 +0100 Subject: [PATCH 2/8] fix recording --- www/app/[domain]/transcripts/recorder.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/www/app/[domain]/transcripts/recorder.tsx b/www/app/[domain]/transcripts/recorder.tsx index 8ab6d9c1..3ef96d62 100644 --- a/www/app/[domain]/transcripts/recorder.tsx +++ b/www/app/[domain]/transcripts/recorder.tsx @@ -1,7 +1,7 @@ import React, { useRef, useEffect, useState } from "react"; import WaveSurfer from "wavesurfer.js"; -import RecordPlugin from "wavesurfer.js/dist/plugins/record.esm.js"; +import RecordPlugin from "../../lib/custom-plugins/record"; import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; import { faMicrophone } from "@fortawesome/free-solid-svg-icons"; From 32bb4116529597457df60904900dc9eef9272c0c Mon Sep 17 00:00:00 2001 From: Mathieu Virbel Date: Fri, 1 Mar 2024 20:35:37 +0100 Subject: [PATCH 3/8] Remove traces_sample_rate (#352) --- server/reflector/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/reflector/app.py b/server/reflector/app.py index 2a72ff44..8fbf6906 100644 --- a/server/reflector/app.py +++ b/server/reflector/app.py @@ -45,7 +45,7 @@ if settings.SENTRY_DSN: logger.error("Sentry is not installed, avoided") else: logger.info("Sentry enabled") - sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=1.0) + sentry_sdk.init(dsn=settings.SENTRY_DSN) else: logger.info("Sentry disabled") From 72b22d1005b0201099307804f96bbc513a8194ad Mon Sep 17 00:00:00 2001 From: projects-g <63178974+projects-g@users.noreply.github.com> Date: Tue, 16 Apr 2024 21:12:24 +0530 Subject: [PATCH 4/8] Update all modal deployments and change seamless configuration due to changes in src repo (#353) * update all modal deployments and change seamless configuration due to change in src repo * add fixture --- server/gpu/modal/reflector_translator.py | 35 ++++++++++++------------ server/tests/test_transcripts_rtc_ws.py | 2 ++ 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/server/gpu/modal/reflector_translator.py b/server/gpu/modal/reflector_translator.py index cc7822fc..8e920a5a 100644 --- a/server/gpu/modal/reflector_translator.py +++ b/server/gpu/modal/reflector_translator.py @@ -55,15 +55,15 @@ def configure_seamless_m4t(): import yaml - ASSETS_DIR: str = "./seamless_communication/src/seamless_communication/assets/cards" + CARDS_DIR: str = "./seamless_communication/src/seamless_communication/cards" - with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file: + with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file: model_yaml_data = yaml.load(file, Loader=yaml.FullLoader) - with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "r") as file: + with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "r") as file: vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader) - with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "r") as file: + with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "r") as file: unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader) - with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "r") as file: + with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "r") as file: unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader) model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots" @@ -87,18 +87,18 @@ def configure_seamless_m4t(): os.getcwd(), model_dir, latest_model_version, tokenizer_name ) - model_yaml_data["checkpoint"] = f"file:/{model_path}" - vocoder_yaml_data["checkpoint"] = f"file:/{vocoder_path}" - unity_100_yaml_data["tokenizer"] = f"file:/{tokenizer_path}" - unity_200_yaml_data["tokenizer"] = f"file:/{tokenizer_path}" + model_yaml_data["checkpoint"] = f"file://{model_path}" + vocoder_yaml_data["checkpoint"] = f"file://{vocoder_path}" + unity_100_yaml_data["tokenizer"] = f"file://{tokenizer_path}" + unity_200_yaml_data["tokenizer"] = f"file://{tokenizer_path}" - with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file: + with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file: yaml.dump(model_yaml_data, file) - with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "w") as file: + with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "w") as file: yaml.dump(vocoder_yaml_data, file) - with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "w") as file: + with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "w") as file: yaml.dump(unity_100_yaml_data, file) - with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "w") as file: + with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "w") as file: yaml.dump(unity_200_yaml_data, file) @@ -144,7 +144,7 @@ transcriber_image = ( class Translator: def __enter__(self): import torch - from seamless_communication.models.inference.translator import Translator + from seamless_communication.inference.translator import Translator self.lock = threading.Lock() self.use_gpu = torch.cuda.is_available() @@ -363,14 +363,15 @@ class Translator: @method() def translate_text(self, text: str, source_language: str, target_language: str): with self.lock: - translated_text, _, _ = self.translator.predict( + translation_result, _ = self.translator.predict( text, "t2tt", src_lang=self.get_seamless_lang_code(source_language), tgt_lang=self.get_seamless_lang_code(target_language), - ngram_filtering=True, + unit_generation_ngram_filtering=True, ) - return {"text": {source_language: text, target_language: str(translated_text)}} + translated_text = str(translation_result[0]) + return {"text": {source_language: text, target_language: translated_text}} # ------------------------------------------------------------------- diff --git a/server/tests/test_transcripts_rtc_ws.py b/server/tests/test_transcripts_rtc_ws.py index c607fe06..e95839f0 100644 --- a/server/tests/test_transcripts_rtc_ws.py +++ b/server/tests/test_transcripts_rtc_ws.py @@ -70,6 +70,7 @@ async def test_transcript_rtc_and_websocket( dummy_storage, fake_mp3_upload, ensure_casing, + nltk, appserver, sentence_tokenize, ): @@ -227,6 +228,7 @@ async def test_transcript_rtc_and_websocket_and_fr( dummy_storage, fake_mp3_upload, ensure_casing, + nltk, appserver, sentence_tokenize, ): From 63502becd6cfc59ceb2c15ad53451f9841cc0229 Mon Sep 17 00:00:00 2001 From: projects-g <63178974+projects-g@users.noreply.github.com> Date: Fri, 19 Apr 2024 10:30:45 +0530 Subject: [PATCH 5/8] Move HF_token to modal secret (#354) * update all modal deployments and change seamless configuration due to change in src repo * add fixture * move token to secret --- server/gpu/modal/reflector_diarizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/gpu/modal/reflector_diarizer.py b/server/gpu/modal/reflector_diarizer.py index b1989a11..1a0e9f0a 100644 --- a/server/gpu/modal/reflector_diarizer.py +++ b/server/gpu/modal/reflector_diarizer.py @@ -11,7 +11,7 @@ from pydantic import BaseModel PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.0" MODEL_DIR = "/root/diarization_models" - +HUGGINGFACE_TOKEN = modal.Secret.from_name("my-huggingface-secret") stub = Stub(name="reflector-diarizer") @@ -34,7 +34,7 @@ def download_pyannote_audio(): Pipeline.from_pretrained( "pyannote/speaker-diarization-3.0", cache_dir=MODEL_DIR, - use_auth_token="***REMOVED***" + use_auth_token=HUGGINGFACE_TOKEN ) From c21a5945fd9436af01087861668edcb2f57d00f6 Mon Sep 17 00:00:00 2001 From: Mathieu Virbel Date: Wed, 24 Apr 2024 16:00:41 +0200 Subject: [PATCH 6/8] Move sentry DSN as environment variable (#356) --- www/sentry.client.config.ts | 26 +++++++++++++++----------- www/sentry.edge.config.ts | 18 +++++++++++------- www/sentry.server.config.ts | 18 +++++++++++------- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/www/sentry.client.config.ts b/www/sentry.client.config.ts index 2c8fb7ce..aff65bbd 100644 --- a/www/sentry.client.config.ts +++ b/www/sentry.client.config.ts @@ -4,18 +4,22 @@ import * as Sentry from "@sentry/nextjs"; -Sentry.init({ - dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920", +const SENTRY_DSN = process.env.SENTRY_DSN; - // Adjust this value in production, or use tracesSampler for greater control - tracesSampleRate: 0, +if (SENTRY_DSN) { + Sentry.init({ + dsn: SENTRY_DSN, - // Setting this option to true will print useful information to the console while you're setting up Sentry. - debug: false, + // Adjust this value in production, or use tracesSampler for greater control + tracesSampleRate: 0, - replaysOnErrorSampleRate: 0.0, + // Setting this option to true will print useful information to the console while you're setting up Sentry. + debug: false, - // This sets the sample rate to be 10%. You may want this to be 100% while - // in development and sample at a lower rate in production - replaysSessionSampleRate: 0.0, -}); + replaysOnErrorSampleRate: 0.0, + + // This sets the sample rate to be 10%. You may want this to be 100% while + // in development and sample at a lower rate in production + replaysSessionSampleRate: 0.0, + }); +} diff --git a/www/sentry.edge.config.ts b/www/sentry.edge.config.ts index 92c2b174..c0127472 100644 --- a/www/sentry.edge.config.ts +++ b/www/sentry.edge.config.ts @@ -5,12 +5,16 @@ import * as Sentry from "@sentry/nextjs"; -Sentry.init({ - dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920", +const SENTRY_DSN = process.env.SENTRY_DSN; - // Adjust this value in production, or use tracesSampler for greater control - tracesSampleRate: 0, +if (SENTRY_DSN) { + Sentry.init({ + dsn: SENTRY_DSN, - // Setting this option to true will print useful information to the console while you're setting up Sentry. - debug: false, -}); + // Adjust this value in production, or use tracesSampler for greater control + tracesSampleRate: 0, + + // Setting this option to true will print useful information to the console while you're setting up Sentry. + debug: false, + }); +} diff --git a/www/sentry.server.config.ts b/www/sentry.server.config.ts index 7d24e518..be9622fd 100644 --- a/www/sentry.server.config.ts +++ b/www/sentry.server.config.ts @@ -4,12 +4,16 @@ import * as Sentry from "@sentry/nextjs"; -Sentry.init({ - dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920", +const SENTRY_DSN = process.env.SENTRY_DSN; - // Adjust this value in production, or use tracesSampler for greater control - tracesSampleRate: 0, +if (SENTRY_DSN) { + Sentry.init({ + dsn: SENTRY_DSN, - // Setting this option to true will print useful information to the console while you're setting up Sentry. - debug: false, -}); + // Adjust this value in production, or use tracesSampler for greater control + tracesSampleRate: 0, + + // Setting this option to true will print useful information to the console while you're setting up Sentry. + debug: false, + }); +} From 3e6813e7c69205f98620907dd6318e866edb8e63 Mon Sep 17 00:00:00 2001 From: Sergey Mankovsky Date: Mon, 3 Jun 2024 13:43:14 +0200 Subject: [PATCH 7/8] Reduce transaction sample rate --- server/reflector/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/reflector/app.py b/server/reflector/app.py index 8fbf6906..1be71210 100644 --- a/server/reflector/app.py +++ b/server/reflector/app.py @@ -45,7 +45,7 @@ if settings.SENTRY_DSN: logger.error("Sentry is not installed, avoided") else: logger.info("Sentry enabled") - sentry_sdk.init(dsn=settings.SENTRY_DSN) + sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=0.01) else: logger.info("Sentry disabled") From 10045ed9de192936311184f9d6366a879ed758c3 Mon Sep 17 00:00:00 2001 From: Sergey Mankovsky Date: Thu, 6 Jun 2024 15:11:03 +0200 Subject: [PATCH 8/8] Fix send to zulip duration --- www/app/[domain]/browse/page.tsx | 4 ++-- www/app/lib/time.ts | 4 ++++ www/app/lib/zulip.ts | 10 +++++----- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/www/app/[domain]/browse/page.tsx b/www/app/[domain]/browse/page.tsx index 211c927b..877d19b5 100644 --- a/www/app/[domain]/browse/page.tsx +++ b/www/app/[domain]/browse/page.tsx @@ -8,7 +8,7 @@ import { FaGear } from "react-icons/fa6"; import { FaCheck, FaTrash, FaStar, FaMicrophone } from "react-icons/fa"; import { MdError } from "react-icons/md"; import useTranscriptList from "../transcripts/useTranscriptList"; -import { formatTime } from "../../lib/time"; +import { formatTimeMs } from "../../lib/time"; import useApi from "../../lib/useApi"; import { useError } from "../../(errors)/errorContext"; import { FaEllipsisVertical } from "react-icons/fa6"; @@ -273,7 +273,7 @@ export default function TranscriptBrowser() { {new Date(item.created_at).toLocaleString("en-US")} {"\u00A0"}-{"\u00A0"} - {formatTime(Math.floor(item.duration / 1000))} + {formatTimeMs(item.duration)} {item.short_summary} diff --git a/www/app/lib/time.ts b/www/app/lib/time.ts index 28d5d330..a632c979 100644 --- a/www/app/lib/time.ts +++ b/www/app/lib/time.ts @@ -1,3 +1,7 @@ +export const formatTimeMs = (milliseconds: number): string => { + return formatTime(Math.floor(milliseconds / 1000)); +}; + export const formatTime = (seconds: number): string => { let hours = Math.floor(seconds / 3600); let minutes = Math.floor((seconds % 3600) / 60); diff --git a/www/app/lib/zulip.ts b/www/app/lib/zulip.ts index 48ec94fb..12e761a4 100644 --- a/www/app/lib/zulip.ts +++ b/www/app/lib/zulip.ts @@ -1,11 +1,11 @@ import { GetTranscript, GetTranscriptTopic } from "../api"; -import { formatTime } from "./time"; +import { formatTime, formatTimeMs } from "./time"; import { extractDomain } from "./utils"; export async function sendZulipMessage( stream: string, topic: string, - message: string, + message: string ) { console.log("Sendiing zulip message", stream, topic); try { @@ -28,14 +28,14 @@ export const ZULIP_MSG_MAX_LENGTH = 10000; export function getZulipMessage( transcript: GetTranscript, topics: GetTranscriptTopic[] | null, - includeTopics: boolean, + includeTopics: boolean ) { const date = new Date(transcript.created_at); // Get the timezone offset in minutes and convert it to hours and minutes const timezoneOffset = -date.getTimezoneOffset(); const offsetHours = String( - Math.floor(Math.abs(timezoneOffset) / 60), + Math.floor(Math.abs(timezoneOffset) / 60) ).padStart(2, "0"); const offsetMinutes = String(Math.abs(timezoneOffset) % 60).padStart(2, "0"); const offsetSign = timezoneOffset >= 0 ? "+" : "-"; @@ -58,7 +58,7 @@ export function getZulipMessage( **Date**: **Link**: [${extractDomain(link)}](${link}) -**Duration**: ${formatTime(transcript.duration)} +**Duration**: ${formatTimeMs(transcript.duration)} `; let topicText = "";