mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
Merge branch 'main' into UI-Meeting
This commit is contained in:
@@ -11,7 +11,7 @@ from pydantic import BaseModel
|
||||
|
||||
PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.0"
|
||||
MODEL_DIR = "/root/diarization_models"
|
||||
|
||||
HUGGINGFACE_TOKEN = modal.Secret.from_name("my-huggingface-secret")
|
||||
stub = Stub(name="reflector-diarizer")
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ def download_pyannote_audio():
|
||||
Pipeline.from_pretrained(
|
||||
"pyannote/speaker-diarization-3.0",
|
||||
cache_dir=MODEL_DIR,
|
||||
use_auth_token="***REMOVED***"
|
||||
use_auth_token=HUGGINGFACE_TOKEN
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -55,15 +55,15 @@ def configure_seamless_m4t():
|
||||
|
||||
import yaml
|
||||
|
||||
ASSETS_DIR: str = "./seamless_communication/src/seamless_communication/assets/cards"
|
||||
CARDS_DIR: str = "./seamless_communication/src/seamless_communication/cards"
|
||||
|
||||
with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file:
|
||||
with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file:
|
||||
model_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
|
||||
with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "r") as file:
|
||||
with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "r") as file:
|
||||
vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
|
||||
with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "r") as file:
|
||||
with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "r") as file:
|
||||
unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
|
||||
with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "r") as file:
|
||||
with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "r") as file:
|
||||
unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
|
||||
|
||||
model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots"
|
||||
@@ -87,18 +87,18 @@ def configure_seamless_m4t():
|
||||
os.getcwd(), model_dir, latest_model_version, tokenizer_name
|
||||
)
|
||||
|
||||
model_yaml_data["checkpoint"] = f"file:/{model_path}"
|
||||
vocoder_yaml_data["checkpoint"] = f"file:/{vocoder_path}"
|
||||
unity_100_yaml_data["tokenizer"] = f"file:/{tokenizer_path}"
|
||||
unity_200_yaml_data["tokenizer"] = f"file:/{tokenizer_path}"
|
||||
model_yaml_data["checkpoint"] = f"file://{model_path}"
|
||||
vocoder_yaml_data["checkpoint"] = f"file://{vocoder_path}"
|
||||
unity_100_yaml_data["tokenizer"] = f"file://{tokenizer_path}"
|
||||
unity_200_yaml_data["tokenizer"] = f"file://{tokenizer_path}"
|
||||
|
||||
with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file:
|
||||
with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file:
|
||||
yaml.dump(model_yaml_data, file)
|
||||
with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "w") as file:
|
||||
with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "w") as file:
|
||||
yaml.dump(vocoder_yaml_data, file)
|
||||
with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "w") as file:
|
||||
with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "w") as file:
|
||||
yaml.dump(unity_100_yaml_data, file)
|
||||
with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "w") as file:
|
||||
with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "w") as file:
|
||||
yaml.dump(unity_200_yaml_data, file)
|
||||
|
||||
|
||||
@@ -144,7 +144,7 @@ transcriber_image = (
|
||||
class Translator:
|
||||
def __enter__(self):
|
||||
import torch
|
||||
from seamless_communication.models.inference.translator import Translator
|
||||
from seamless_communication.inference.translator import Translator
|
||||
|
||||
self.lock = threading.Lock()
|
||||
self.use_gpu = torch.cuda.is_available()
|
||||
@@ -363,14 +363,15 @@ class Translator:
|
||||
@method()
|
||||
def translate_text(self, text: str, source_language: str, target_language: str):
|
||||
with self.lock:
|
||||
translated_text, _, _ = self.translator.predict(
|
||||
translation_result, _ = self.translator.predict(
|
||||
text,
|
||||
"t2tt",
|
||||
src_lang=self.get_seamless_lang_code(source_language),
|
||||
tgt_lang=self.get_seamless_lang_code(target_language),
|
||||
ngram_filtering=True,
|
||||
unit_generation_ngram_filtering=True,
|
||||
)
|
||||
return {"text": {source_language: text, target_language: str(translated_text)}}
|
||||
translated_text = str(translation_result[0])
|
||||
return {"text": {source_language: text, target_language: translated_text}}
|
||||
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
@@ -45,7 +45,7 @@ if settings.SENTRY_DSN:
|
||||
logger.error("Sentry is not installed, avoided")
|
||||
else:
|
||||
logger.info("Sentry enabled")
|
||||
sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=1.0)
|
||||
sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=0.01)
|
||||
else:
|
||||
logger.info("Sentry disabled")
|
||||
|
||||
|
||||
@@ -70,6 +70,7 @@ async def test_transcript_rtc_and_websocket(
|
||||
dummy_storage,
|
||||
fake_mp3_upload,
|
||||
ensure_casing,
|
||||
nltk,
|
||||
appserver,
|
||||
sentence_tokenize,
|
||||
):
|
||||
@@ -227,6 +228,7 @@ async def test_transcript_rtc_and_websocket_and_fr(
|
||||
dummy_storage,
|
||||
fake_mp3_upload,
|
||||
ensure_casing,
|
||||
nltk,
|
||||
appserver,
|
||||
sentence_tokenize,
|
||||
):
|
||||
|
||||
@@ -8,7 +8,7 @@ import { FaGear } from "react-icons/fa6";
|
||||
import { FaCheck, FaTrash, FaStar, FaMicrophone } from "react-icons/fa";
|
||||
import { MdError } from "react-icons/md";
|
||||
import useTranscriptList from "../transcripts/useTranscriptList";
|
||||
import { formatTime } from "../../lib/time";
|
||||
import { formatTimeMs } from "../../lib/time";
|
||||
import useApi from "../../lib/useApi";
|
||||
import { useError } from "../../(errors)/errorContext";
|
||||
import { FaEllipsisVertical } from "react-icons/fa6";
|
||||
@@ -274,7 +274,7 @@ export default function TranscriptBrowser() {
|
||||
<Text fontSize="small">
|
||||
{new Date(item.created_at).toLocaleString("en-US")}
|
||||
{"\u00A0"}-{"\u00A0"}
|
||||
{formatTime(Math.floor(item.duration / 1000))}
|
||||
{formatTimeMs(item.duration)}
|
||||
</Text>
|
||||
<ExpandableText noOfLines={5}>
|
||||
{item.short_summary}
|
||||
|
||||
@@ -50,7 +50,6 @@ export default function Player(props: PlayerProps) {
|
||||
const _wavesurfer = WaveSurfer.create({
|
||||
container: waveformRef.current,
|
||||
peaks: [props.waveform.data],
|
||||
|
||||
height: "auto",
|
||||
duration: Math.floor(props.mediaDuration / 1000),
|
||||
media: props.media,
|
||||
@@ -161,7 +160,7 @@ export default function Player(props: PlayerProps) {
|
||||
const timeLabel = () => {
|
||||
if (props.mediaDuration && Math.floor(props.mediaDuration / 1000) > 0)
|
||||
return `${formatTime(currentTime)}/${formatTime(
|
||||
Math.floor(props.mediaDuration / 1000),
|
||||
Math.floor(props.mediaDuration / 1000)
|
||||
)}`;
|
||||
return "";
|
||||
};
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
// TODO format duraction in be ?
|
||||
export const formatTimeMs = (milliseconds: number): string => {
|
||||
return formatTime(Math.floor(milliseconds / 1000));
|
||||
};
|
||||
|
||||
export const formatTime = (seconds: number): string => {
|
||||
let hours = Math.floor(seconds / 3600);
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import { GetTranscript, GetTranscriptTopic } from "../api";
|
||||
import { formatTime } from "./time";
|
||||
import { formatTime, formatTimeMs } from "./time";
|
||||
import { extractDomain } from "./utils";
|
||||
|
||||
export async function sendZulipMessage(
|
||||
stream: string,
|
||||
topic: string,
|
||||
message: string,
|
||||
message: string
|
||||
) {
|
||||
console.log("Sendiing zulip message", stream, topic);
|
||||
try {
|
||||
@@ -28,14 +28,14 @@ export const ZULIP_MSG_MAX_LENGTH = 10000;
|
||||
export function getZulipMessage(
|
||||
transcript: GetTranscript,
|
||||
topics: GetTranscriptTopic[] | null,
|
||||
includeTopics: boolean,
|
||||
includeTopics: boolean
|
||||
) {
|
||||
const date = new Date(transcript.created_at);
|
||||
|
||||
// Get the timezone offset in minutes and convert it to hours and minutes
|
||||
const timezoneOffset = -date.getTimezoneOffset();
|
||||
const offsetHours = String(
|
||||
Math.floor(Math.abs(timezoneOffset) / 60),
|
||||
Math.floor(Math.abs(timezoneOffset) / 60)
|
||||
).padStart(2, "0");
|
||||
const offsetMinutes = String(Math.abs(timezoneOffset) % 60).padStart(2, "0");
|
||||
const offsetSign = timezoneOffset >= 0 ? "+" : "-";
|
||||
@@ -58,7 +58,7 @@ export function getZulipMessage(
|
||||
|
||||
**Date**: <time:${dateTimeString}>
|
||||
**Link**: [${extractDomain(link)}](${link})
|
||||
**Duration**: ${formatTime(transcript.duration)}
|
||||
**Duration**: ${formatTimeMs(transcript.duration)}
|
||||
|
||||
`;
|
||||
let topicText = "";
|
||||
|
||||
@@ -4,8 +4,11 @@
|
||||
|
||||
import * as Sentry from "@sentry/nextjs";
|
||||
|
||||
const SENTRY_DSN = process.env.SENTRY_DSN;
|
||||
|
||||
if (SENTRY_DSN) {
|
||||
Sentry.init({
|
||||
dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
|
||||
dsn: SENTRY_DSN,
|
||||
|
||||
// Adjust this value in production, or use tracesSampler for greater control
|
||||
tracesSampleRate: 0,
|
||||
@@ -19,3 +22,4 @@ Sentry.init({
|
||||
// in development and sample at a lower rate in production
|
||||
replaysSessionSampleRate: 0.0,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -5,8 +5,11 @@
|
||||
|
||||
import * as Sentry from "@sentry/nextjs";
|
||||
|
||||
const SENTRY_DSN = process.env.SENTRY_DSN;
|
||||
|
||||
if (SENTRY_DSN) {
|
||||
Sentry.init({
|
||||
dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
|
||||
dsn: SENTRY_DSN,
|
||||
|
||||
// Adjust this value in production, or use tracesSampler for greater control
|
||||
tracesSampleRate: 0,
|
||||
@@ -14,3 +17,4 @@ Sentry.init({
|
||||
// Setting this option to true will print useful information to the console while you're setting up Sentry.
|
||||
debug: false,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -4,8 +4,11 @@
|
||||
|
||||
import * as Sentry from "@sentry/nextjs";
|
||||
|
||||
const SENTRY_DSN = process.env.SENTRY_DSN;
|
||||
|
||||
if (SENTRY_DSN) {
|
||||
Sentry.init({
|
||||
dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
|
||||
dsn: SENTRY_DSN,
|
||||
|
||||
// Adjust this value in production, or use tracesSampler for greater control
|
||||
tracesSampleRate: 0,
|
||||
@@ -13,3 +16,4 @@ Sentry.init({
|
||||
// Setting this option to true will print useful information to the console while you're setting up Sentry.
|
||||
debug: false,
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user