Merge branch 'main' into UI-Meeting

This commit is contained in:
2024-06-07 17:37:19 +02:00
11 changed files with 71 additions and 55 deletions

View File

@@ -11,7 +11,7 @@ from pydantic import BaseModel
PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.0" PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.0"
MODEL_DIR = "/root/diarization_models" MODEL_DIR = "/root/diarization_models"
HUGGINGFACE_TOKEN = modal.Secret.from_name("my-huggingface-secret")
stub = Stub(name="reflector-diarizer") stub = Stub(name="reflector-diarizer")
@@ -34,7 +34,7 @@ def download_pyannote_audio():
Pipeline.from_pretrained( Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.0", "pyannote/speaker-diarization-3.0",
cache_dir=MODEL_DIR, cache_dir=MODEL_DIR,
use_auth_token="***REMOVED***" use_auth_token=HUGGINGFACE_TOKEN
) )

View File

@@ -55,15 +55,15 @@ def configure_seamless_m4t():
import yaml import yaml
ASSETS_DIR: str = "./seamless_communication/src/seamless_communication/assets/cards" CARDS_DIR: str = "./seamless_communication/src/seamless_communication/cards"
with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file: with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file:
model_yaml_data = yaml.load(file, Loader=yaml.FullLoader) model_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "r") as file: with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "r") as file:
vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader) vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "r") as file: with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "r") as file:
unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader) unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "r") as file: with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "r") as file:
unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader) unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots" model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots"
@@ -87,18 +87,18 @@ def configure_seamless_m4t():
os.getcwd(), model_dir, latest_model_version, tokenizer_name os.getcwd(), model_dir, latest_model_version, tokenizer_name
) )
model_yaml_data["checkpoint"] = f"file:/{model_path}" model_yaml_data["checkpoint"] = f"file://{model_path}"
vocoder_yaml_data["checkpoint"] = f"file:/{vocoder_path}" vocoder_yaml_data["checkpoint"] = f"file://{vocoder_path}"
unity_100_yaml_data["tokenizer"] = f"file:/{tokenizer_path}" unity_100_yaml_data["tokenizer"] = f"file://{tokenizer_path}"
unity_200_yaml_data["tokenizer"] = f"file:/{tokenizer_path}" unity_200_yaml_data["tokenizer"] = f"file://{tokenizer_path}"
with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file: with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file:
yaml.dump(model_yaml_data, file) yaml.dump(model_yaml_data, file)
with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "w") as file: with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "w") as file:
yaml.dump(vocoder_yaml_data, file) yaml.dump(vocoder_yaml_data, file)
with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "w") as file: with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "w") as file:
yaml.dump(unity_100_yaml_data, file) yaml.dump(unity_100_yaml_data, file)
with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "w") as file: with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "w") as file:
yaml.dump(unity_200_yaml_data, file) yaml.dump(unity_200_yaml_data, file)
@@ -144,7 +144,7 @@ transcriber_image = (
class Translator: class Translator:
def __enter__(self): def __enter__(self):
import torch import torch
from seamless_communication.models.inference.translator import Translator from seamless_communication.inference.translator import Translator
self.lock = threading.Lock() self.lock = threading.Lock()
self.use_gpu = torch.cuda.is_available() self.use_gpu = torch.cuda.is_available()
@@ -363,14 +363,15 @@ class Translator:
@method() @method()
def translate_text(self, text: str, source_language: str, target_language: str): def translate_text(self, text: str, source_language: str, target_language: str):
with self.lock: with self.lock:
translated_text, _, _ = self.translator.predict( translation_result, _ = self.translator.predict(
text, text,
"t2tt", "t2tt",
src_lang=self.get_seamless_lang_code(source_language), src_lang=self.get_seamless_lang_code(source_language),
tgt_lang=self.get_seamless_lang_code(target_language), tgt_lang=self.get_seamless_lang_code(target_language),
ngram_filtering=True, unit_generation_ngram_filtering=True,
) )
return {"text": {source_language: text, target_language: str(translated_text)}} translated_text = str(translation_result[0])
return {"text": {source_language: text, target_language: translated_text}}
# ------------------------------------------------------------------- # -------------------------------------------------------------------

View File

@@ -45,7 +45,7 @@ if settings.SENTRY_DSN:
logger.error("Sentry is not installed, avoided") logger.error("Sentry is not installed, avoided")
else: else:
logger.info("Sentry enabled") logger.info("Sentry enabled")
sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=1.0) sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=0.01)
else: else:
logger.info("Sentry disabled") logger.info("Sentry disabled")

View File

@@ -70,6 +70,7 @@ async def test_transcript_rtc_and_websocket(
dummy_storage, dummy_storage,
fake_mp3_upload, fake_mp3_upload,
ensure_casing, ensure_casing,
nltk,
appserver, appserver,
sentence_tokenize, sentence_tokenize,
): ):
@@ -227,6 +228,7 @@ async def test_transcript_rtc_and_websocket_and_fr(
dummy_storage, dummy_storage,
fake_mp3_upload, fake_mp3_upload,
ensure_casing, ensure_casing,
nltk,
appserver, appserver,
sentence_tokenize, sentence_tokenize,
): ):

View File

@@ -8,7 +8,7 @@ import { FaGear } from "react-icons/fa6";
import { FaCheck, FaTrash, FaStar, FaMicrophone } from "react-icons/fa"; import { FaCheck, FaTrash, FaStar, FaMicrophone } from "react-icons/fa";
import { MdError } from "react-icons/md"; import { MdError } from "react-icons/md";
import useTranscriptList from "../transcripts/useTranscriptList"; import useTranscriptList from "../transcripts/useTranscriptList";
import { formatTime } from "../../lib/time"; import { formatTimeMs } from "../../lib/time";
import useApi from "../../lib/useApi"; import useApi from "../../lib/useApi";
import { useError } from "../../(errors)/errorContext"; import { useError } from "../../(errors)/errorContext";
import { FaEllipsisVertical } from "react-icons/fa6"; import { FaEllipsisVertical } from "react-icons/fa6";
@@ -274,7 +274,7 @@ export default function TranscriptBrowser() {
<Text fontSize="small"> <Text fontSize="small">
{new Date(item.created_at).toLocaleString("en-US")} {new Date(item.created_at).toLocaleString("en-US")}
{"\u00A0"}-{"\u00A0"} {"\u00A0"}-{"\u00A0"}
{formatTime(Math.floor(item.duration / 1000))} {formatTimeMs(item.duration)}
</Text> </Text>
<ExpandableText noOfLines={5}> <ExpandableText noOfLines={5}>
{item.short_summary} {item.short_summary}

View File

@@ -50,7 +50,6 @@ export default function Player(props: PlayerProps) {
const _wavesurfer = WaveSurfer.create({ const _wavesurfer = WaveSurfer.create({
container: waveformRef.current, container: waveformRef.current,
peaks: [props.waveform.data], peaks: [props.waveform.data],
height: "auto", height: "auto",
duration: Math.floor(props.mediaDuration / 1000), duration: Math.floor(props.mediaDuration / 1000),
media: props.media, media: props.media,
@@ -161,7 +160,7 @@ export default function Player(props: PlayerProps) {
const timeLabel = () => { const timeLabel = () => {
if (props.mediaDuration && Math.floor(props.mediaDuration / 1000) > 0) if (props.mediaDuration && Math.floor(props.mediaDuration / 1000) > 0)
return `${formatTime(currentTime)}/${formatTime( return `${formatTime(currentTime)}/${formatTime(
Math.floor(props.mediaDuration / 1000), Math.floor(props.mediaDuration / 1000)
)}`; )}`;
return ""; return "";
}; };

View File

@@ -1,4 +1,6 @@
// TODO format duraction in be ? export const formatTimeMs = (milliseconds: number): string => {
return formatTime(Math.floor(milliseconds / 1000));
};
export const formatTime = (seconds: number): string => { export const formatTime = (seconds: number): string => {
let hours = Math.floor(seconds / 3600); let hours = Math.floor(seconds / 3600);

View File

@@ -1,11 +1,11 @@
import { GetTranscript, GetTranscriptTopic } from "../api"; import { GetTranscript, GetTranscriptTopic } from "../api";
import { formatTime } from "./time"; import { formatTime, formatTimeMs } from "./time";
import { extractDomain } from "./utils"; import { extractDomain } from "./utils";
export async function sendZulipMessage( export async function sendZulipMessage(
stream: string, stream: string,
topic: string, topic: string,
message: string, message: string
) { ) {
console.log("Sendiing zulip message", stream, topic); console.log("Sendiing zulip message", stream, topic);
try { try {
@@ -28,14 +28,14 @@ export const ZULIP_MSG_MAX_LENGTH = 10000;
export function getZulipMessage( export function getZulipMessage(
transcript: GetTranscript, transcript: GetTranscript,
topics: GetTranscriptTopic[] | null, topics: GetTranscriptTopic[] | null,
includeTopics: boolean, includeTopics: boolean
) { ) {
const date = new Date(transcript.created_at); const date = new Date(transcript.created_at);
// Get the timezone offset in minutes and convert it to hours and minutes // Get the timezone offset in minutes and convert it to hours and minutes
const timezoneOffset = -date.getTimezoneOffset(); const timezoneOffset = -date.getTimezoneOffset();
const offsetHours = String( const offsetHours = String(
Math.floor(Math.abs(timezoneOffset) / 60), Math.floor(Math.abs(timezoneOffset) / 60)
).padStart(2, "0"); ).padStart(2, "0");
const offsetMinutes = String(Math.abs(timezoneOffset) % 60).padStart(2, "0"); const offsetMinutes = String(Math.abs(timezoneOffset) % 60).padStart(2, "0");
const offsetSign = timezoneOffset >= 0 ? "+" : "-"; const offsetSign = timezoneOffset >= 0 ? "+" : "-";
@@ -58,7 +58,7 @@ export function getZulipMessage(
**Date**: <time:${dateTimeString}> **Date**: <time:${dateTimeString}>
**Link**: [${extractDomain(link)}](${link}) **Link**: [${extractDomain(link)}](${link})
**Duration**: ${formatTime(transcript.duration)} **Duration**: ${formatTimeMs(transcript.duration)}
`; `;
let topicText = ""; let topicText = "";

View File

@@ -4,18 +4,22 @@
import * as Sentry from "@sentry/nextjs"; import * as Sentry from "@sentry/nextjs";
Sentry.init({ const SENTRY_DSN = process.env.SENTRY_DSN;
dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
// Adjust this value in production, or use tracesSampler for greater control if (SENTRY_DSN) {
tracesSampleRate: 0, Sentry.init({
dsn: SENTRY_DSN,
// Setting this option to true will print useful information to the console while you're setting up Sentry. // Adjust this value in production, or use tracesSampler for greater control
debug: false, tracesSampleRate: 0,
replaysOnErrorSampleRate: 0.0, // Setting this option to true will print useful information to the console while you're setting up Sentry.
debug: false,
// This sets the sample rate to be 10%. You may want this to be 100% while replaysOnErrorSampleRate: 0.0,
// in development and sample at a lower rate in production
replaysSessionSampleRate: 0.0, // This sets the sample rate to be 10%. You may want this to be 100% while
}); // in development and sample at a lower rate in production
replaysSessionSampleRate: 0.0,
});
}

View File

@@ -5,12 +5,16 @@
import * as Sentry from "@sentry/nextjs"; import * as Sentry from "@sentry/nextjs";
Sentry.init({ const SENTRY_DSN = process.env.SENTRY_DSN;
dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
// Adjust this value in production, or use tracesSampler for greater control if (SENTRY_DSN) {
tracesSampleRate: 0, Sentry.init({
dsn: SENTRY_DSN,
// Setting this option to true will print useful information to the console while you're setting up Sentry. // Adjust this value in production, or use tracesSampler for greater control
debug: false, tracesSampleRate: 0,
});
// Setting this option to true will print useful information to the console while you're setting up Sentry.
debug: false,
});
}

View File

@@ -4,12 +4,16 @@
import * as Sentry from "@sentry/nextjs"; import * as Sentry from "@sentry/nextjs";
Sentry.init({ const SENTRY_DSN = process.env.SENTRY_DSN;
dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
// Adjust this value in production, or use tracesSampler for greater control if (SENTRY_DSN) {
tracesSampleRate: 0, Sentry.init({
dsn: SENTRY_DSN,
// Setting this option to true will print useful information to the console while you're setting up Sentry. // Adjust this value in production, or use tracesSampler for greater control
debug: false, tracesSampleRate: 0,
});
// Setting this option to true will print useful information to the console while you're setting up Sentry.
debug: false,
});
}