diff --git a/server/gpu/modal/reflector_diarizer.py b/server/gpu/modal/reflector_diarizer.py
index b1989a11..1a0e9f0a 100644
--- a/server/gpu/modal/reflector_diarizer.py
+++ b/server/gpu/modal/reflector_diarizer.py
@@ -11,7 +11,7 @@ from pydantic import BaseModel
PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.0"
MODEL_DIR = "/root/diarization_models"
-
+HUGGINGFACE_TOKEN = modal.Secret.from_name("my-huggingface-secret")
stub = Stub(name="reflector-diarizer")
@@ -34,7 +34,7 @@ def download_pyannote_audio():
Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.0",
cache_dir=MODEL_DIR,
- use_auth_token="***REMOVED***"
+ use_auth_token=HUGGINGFACE_TOKEN
)
diff --git a/server/gpu/modal/reflector_translator.py b/server/gpu/modal/reflector_translator.py
index cc7822fc..8e920a5a 100644
--- a/server/gpu/modal/reflector_translator.py
+++ b/server/gpu/modal/reflector_translator.py
@@ -55,15 +55,15 @@ def configure_seamless_m4t():
import yaml
- ASSETS_DIR: str = "./seamless_communication/src/seamless_communication/assets/cards"
+ CARDS_DIR: str = "./seamless_communication/src/seamless_communication/cards"
- with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file:
+ with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file:
model_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
- with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "r") as file:
+ with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "r") as file:
vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
- with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "r") as file:
+ with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "r") as file:
unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
- with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "r") as file:
+ with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "r") as file:
unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots"
@@ -87,18 +87,18 @@ def configure_seamless_m4t():
os.getcwd(), model_dir, latest_model_version, tokenizer_name
)
- model_yaml_data["checkpoint"] = f"file:/{model_path}"
- vocoder_yaml_data["checkpoint"] = f"file:/{vocoder_path}"
- unity_100_yaml_data["tokenizer"] = f"file:/{tokenizer_path}"
- unity_200_yaml_data["tokenizer"] = f"file:/{tokenizer_path}"
+ model_yaml_data["checkpoint"] = f"file://{model_path}"
+ vocoder_yaml_data["checkpoint"] = f"file://{vocoder_path}"
+ unity_100_yaml_data["tokenizer"] = f"file://{tokenizer_path}"
+ unity_200_yaml_data["tokenizer"] = f"file://{tokenizer_path}"
- with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file:
+ with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file:
yaml.dump(model_yaml_data, file)
- with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "w") as file:
+ with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "w") as file:
yaml.dump(vocoder_yaml_data, file)
- with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "w") as file:
+ with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "w") as file:
yaml.dump(unity_100_yaml_data, file)
- with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "w") as file:
+ with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "w") as file:
yaml.dump(unity_200_yaml_data, file)
@@ -144,7 +144,7 @@ transcriber_image = (
class Translator:
def __enter__(self):
import torch
- from seamless_communication.models.inference.translator import Translator
+ from seamless_communication.inference.translator import Translator
self.lock = threading.Lock()
self.use_gpu = torch.cuda.is_available()
@@ -363,14 +363,15 @@ class Translator:
@method()
def translate_text(self, text: str, source_language: str, target_language: str):
with self.lock:
- translated_text, _, _ = self.translator.predict(
+ translation_result, _ = self.translator.predict(
text,
"t2tt",
src_lang=self.get_seamless_lang_code(source_language),
tgt_lang=self.get_seamless_lang_code(target_language),
- ngram_filtering=True,
+ unit_generation_ngram_filtering=True,
)
- return {"text": {source_language: text, target_language: str(translated_text)}}
+ translated_text = str(translation_result[0])
+ return {"text": {source_language: text, target_language: translated_text}}
# -------------------------------------------------------------------
diff --git a/server/reflector/app.py b/server/reflector/app.py
index 2a72ff44..1be71210 100644
--- a/server/reflector/app.py
+++ b/server/reflector/app.py
@@ -45,7 +45,7 @@ if settings.SENTRY_DSN:
logger.error("Sentry is not installed, avoided")
else:
logger.info("Sentry enabled")
- sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=1.0)
+ sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=0.01)
else:
logger.info("Sentry disabled")
diff --git a/server/tests/test_transcripts_rtc_ws.py b/server/tests/test_transcripts_rtc_ws.py
index c607fe06..e95839f0 100644
--- a/server/tests/test_transcripts_rtc_ws.py
+++ b/server/tests/test_transcripts_rtc_ws.py
@@ -70,6 +70,7 @@ async def test_transcript_rtc_and_websocket(
dummy_storage,
fake_mp3_upload,
ensure_casing,
+ nltk,
appserver,
sentence_tokenize,
):
@@ -227,6 +228,7 @@ async def test_transcript_rtc_and_websocket_and_fr(
dummy_storage,
fake_mp3_upload,
ensure_casing,
+ nltk,
appserver,
sentence_tokenize,
):
diff --git a/www/app/[domain]/browse/page.tsx b/www/app/[domain]/browse/page.tsx
index 16321f9b..4a59c6dc 100644
--- a/www/app/[domain]/browse/page.tsx
+++ b/www/app/[domain]/browse/page.tsx
@@ -8,7 +8,7 @@ import { FaGear } from "react-icons/fa6";
import { FaCheck, FaTrash, FaStar, FaMicrophone } from "react-icons/fa";
import { MdError } from "react-icons/md";
import useTranscriptList from "../transcripts/useTranscriptList";
-import { formatTime } from "../../lib/time";
+import { formatTimeMs } from "../../lib/time";
import useApi from "../../lib/useApi";
import { useError } from "../../(errors)/errorContext";
import { FaEllipsisVertical } from "react-icons/fa6";
@@ -274,7 +274,7 @@ export default function TranscriptBrowser() {
{new Date(item.created_at).toLocaleString("en-US")}
{"\u00A0"}-{"\u00A0"}
- {formatTime(Math.floor(item.duration / 1000))}
+ {formatTimeMs(item.duration)}
{item.short_summary}
diff --git a/www/app/[domain]/transcripts/player.tsx b/www/app/[domain]/transcripts/player.tsx
index 65cc72d8..cd3703f4 100644
--- a/www/app/[domain]/transcripts/player.tsx
+++ b/www/app/[domain]/transcripts/player.tsx
@@ -50,7 +50,6 @@ export default function Player(props: PlayerProps) {
const _wavesurfer = WaveSurfer.create({
container: waveformRef.current,
peaks: [props.waveform.data],
-
height: "auto",
duration: Math.floor(props.mediaDuration / 1000),
media: props.media,
@@ -161,7 +160,7 @@ export default function Player(props: PlayerProps) {
const timeLabel = () => {
if (props.mediaDuration && Math.floor(props.mediaDuration / 1000) > 0)
return `${formatTime(currentTime)}/${formatTime(
- Math.floor(props.mediaDuration / 1000),
+ Math.floor(props.mediaDuration / 1000)
)}`;
return "";
};
diff --git a/www/app/lib/time.ts b/www/app/lib/time.ts
index de383f37..a632c979 100644
--- a/www/app/lib/time.ts
+++ b/www/app/lib/time.ts
@@ -1,4 +1,6 @@
-// TODO format duraction in be ?
+export const formatTimeMs = (milliseconds: number): string => {
+ return formatTime(Math.floor(milliseconds / 1000));
+};
export const formatTime = (seconds: number): string => {
let hours = Math.floor(seconds / 3600);
diff --git a/www/app/lib/zulip.ts b/www/app/lib/zulip.ts
index 48ec94fb..12e761a4 100644
--- a/www/app/lib/zulip.ts
+++ b/www/app/lib/zulip.ts
@@ -1,11 +1,11 @@
import { GetTranscript, GetTranscriptTopic } from "../api";
-import { formatTime } from "./time";
+import { formatTime, formatTimeMs } from "./time";
import { extractDomain } from "./utils";
export async function sendZulipMessage(
stream: string,
topic: string,
- message: string,
+ message: string
) {
console.log("Sendiing zulip message", stream, topic);
try {
@@ -28,14 +28,14 @@ export const ZULIP_MSG_MAX_LENGTH = 10000;
export function getZulipMessage(
transcript: GetTranscript,
topics: GetTranscriptTopic[] | null,
- includeTopics: boolean,
+ includeTopics: boolean
) {
const date = new Date(transcript.created_at);
// Get the timezone offset in minutes and convert it to hours and minutes
const timezoneOffset = -date.getTimezoneOffset();
const offsetHours = String(
- Math.floor(Math.abs(timezoneOffset) / 60),
+ Math.floor(Math.abs(timezoneOffset) / 60)
).padStart(2, "0");
const offsetMinutes = String(Math.abs(timezoneOffset) % 60).padStart(2, "0");
const offsetSign = timezoneOffset >= 0 ? "+" : "-";
@@ -58,7 +58,7 @@ export function getZulipMessage(
**Date**:
**Link**: [${extractDomain(link)}](${link})
-**Duration**: ${formatTime(transcript.duration)}
+**Duration**: ${formatTimeMs(transcript.duration)}
`;
let topicText = "";
diff --git a/www/sentry.client.config.ts b/www/sentry.client.config.ts
index 2c8fb7ce..aff65bbd 100644
--- a/www/sentry.client.config.ts
+++ b/www/sentry.client.config.ts
@@ -4,18 +4,22 @@
import * as Sentry from "@sentry/nextjs";
-Sentry.init({
- dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
+const SENTRY_DSN = process.env.SENTRY_DSN;
- // Adjust this value in production, or use tracesSampler for greater control
- tracesSampleRate: 0,
+if (SENTRY_DSN) {
+ Sentry.init({
+ dsn: SENTRY_DSN,
- // Setting this option to true will print useful information to the console while you're setting up Sentry.
- debug: false,
+ // Adjust this value in production, or use tracesSampler for greater control
+ tracesSampleRate: 0,
- replaysOnErrorSampleRate: 0.0,
+ // Setting this option to true will print useful information to the console while you're setting up Sentry.
+ debug: false,
- // This sets the sample rate to be 10%. You may want this to be 100% while
- // in development and sample at a lower rate in production
- replaysSessionSampleRate: 0.0,
-});
+ replaysOnErrorSampleRate: 0.0,
+
+ // This sets the sample rate to be 10%. You may want this to be 100% while
+ // in development and sample at a lower rate in production
+ replaysSessionSampleRate: 0.0,
+ });
+}
diff --git a/www/sentry.edge.config.ts b/www/sentry.edge.config.ts
index 92c2b174..c0127472 100644
--- a/www/sentry.edge.config.ts
+++ b/www/sentry.edge.config.ts
@@ -5,12 +5,16 @@
import * as Sentry from "@sentry/nextjs";
-Sentry.init({
- dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
+const SENTRY_DSN = process.env.SENTRY_DSN;
- // Adjust this value in production, or use tracesSampler for greater control
- tracesSampleRate: 0,
+if (SENTRY_DSN) {
+ Sentry.init({
+ dsn: SENTRY_DSN,
- // Setting this option to true will print useful information to the console while you're setting up Sentry.
- debug: false,
-});
+ // Adjust this value in production, or use tracesSampler for greater control
+ tracesSampleRate: 0,
+
+ // Setting this option to true will print useful information to the console while you're setting up Sentry.
+ debug: false,
+ });
+}
diff --git a/www/sentry.server.config.ts b/www/sentry.server.config.ts
index 7d24e518..be9622fd 100644
--- a/www/sentry.server.config.ts
+++ b/www/sentry.server.config.ts
@@ -4,12 +4,16 @@
import * as Sentry from "@sentry/nextjs";
-Sentry.init({
- dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
+const SENTRY_DSN = process.env.SENTRY_DSN;
- // Adjust this value in production, or use tracesSampler for greater control
- tracesSampleRate: 0,
+if (SENTRY_DSN) {
+ Sentry.init({
+ dsn: SENTRY_DSN,
- // Setting this option to true will print useful information to the console while you're setting up Sentry.
- debug: false,
-});
+ // Adjust this value in production, or use tracesSampler for greater control
+ tracesSampleRate: 0,
+
+ // Setting this option to true will print useful information to the console while you're setting up Sentry.
+ debug: false,
+ });
+}