mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
Merge branch 'main' into UI-Meeting
This commit is contained in:
@@ -11,7 +11,7 @@ from pydantic import BaseModel
|
|||||||
|
|
||||||
PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.0"
|
PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.0"
|
||||||
MODEL_DIR = "/root/diarization_models"
|
MODEL_DIR = "/root/diarization_models"
|
||||||
|
HUGGINGFACE_TOKEN = modal.Secret.from_name("my-huggingface-secret")
|
||||||
stub = Stub(name="reflector-diarizer")
|
stub = Stub(name="reflector-diarizer")
|
||||||
|
|
||||||
|
|
||||||
@@ -34,7 +34,7 @@ def download_pyannote_audio():
|
|||||||
Pipeline.from_pretrained(
|
Pipeline.from_pretrained(
|
||||||
"pyannote/speaker-diarization-3.0",
|
"pyannote/speaker-diarization-3.0",
|
||||||
cache_dir=MODEL_DIR,
|
cache_dir=MODEL_DIR,
|
||||||
use_auth_token="***REMOVED***"
|
use_auth_token=HUGGINGFACE_TOKEN
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -55,15 +55,15 @@ def configure_seamless_m4t():
|
|||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
ASSETS_DIR: str = "./seamless_communication/src/seamless_communication/assets/cards"
|
CARDS_DIR: str = "./seamless_communication/src/seamless_communication/cards"
|
||||||
|
|
||||||
with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file:
|
with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file:
|
||||||
model_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
|
model_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
|
||||||
with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "r") as file:
|
with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "r") as file:
|
||||||
vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
|
vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
|
||||||
with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "r") as file:
|
with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "r") as file:
|
||||||
unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
|
unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
|
||||||
with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "r") as file:
|
with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "r") as file:
|
||||||
unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
|
unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
|
||||||
|
|
||||||
model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots"
|
model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots"
|
||||||
@@ -87,18 +87,18 @@ def configure_seamless_m4t():
|
|||||||
os.getcwd(), model_dir, latest_model_version, tokenizer_name
|
os.getcwd(), model_dir, latest_model_version, tokenizer_name
|
||||||
)
|
)
|
||||||
|
|
||||||
model_yaml_data["checkpoint"] = f"file:/{model_path}"
|
model_yaml_data["checkpoint"] = f"file://{model_path}"
|
||||||
vocoder_yaml_data["checkpoint"] = f"file:/{vocoder_path}"
|
vocoder_yaml_data["checkpoint"] = f"file://{vocoder_path}"
|
||||||
unity_100_yaml_data["tokenizer"] = f"file:/{tokenizer_path}"
|
unity_100_yaml_data["tokenizer"] = f"file://{tokenizer_path}"
|
||||||
unity_200_yaml_data["tokenizer"] = f"file:/{tokenizer_path}"
|
unity_200_yaml_data["tokenizer"] = f"file://{tokenizer_path}"
|
||||||
|
|
||||||
with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file:
|
with open(f"{CARDS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file:
|
||||||
yaml.dump(model_yaml_data, file)
|
yaml.dump(model_yaml_data, file)
|
||||||
with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "w") as file:
|
with open(f"{CARDS_DIR}/vocoder_36langs.yaml", "w") as file:
|
||||||
yaml.dump(vocoder_yaml_data, file)
|
yaml.dump(vocoder_yaml_data, file)
|
||||||
with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "w") as file:
|
with open(f"{CARDS_DIR}/unity_nllb-100.yaml", "w") as file:
|
||||||
yaml.dump(unity_100_yaml_data, file)
|
yaml.dump(unity_100_yaml_data, file)
|
||||||
with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "w") as file:
|
with open(f"{CARDS_DIR}/unity_nllb-200.yaml", "w") as file:
|
||||||
yaml.dump(unity_200_yaml_data, file)
|
yaml.dump(unity_200_yaml_data, file)
|
||||||
|
|
||||||
|
|
||||||
@@ -144,7 +144,7 @@ transcriber_image = (
|
|||||||
class Translator:
|
class Translator:
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
import torch
|
import torch
|
||||||
from seamless_communication.models.inference.translator import Translator
|
from seamless_communication.inference.translator import Translator
|
||||||
|
|
||||||
self.lock = threading.Lock()
|
self.lock = threading.Lock()
|
||||||
self.use_gpu = torch.cuda.is_available()
|
self.use_gpu = torch.cuda.is_available()
|
||||||
@@ -363,14 +363,15 @@ class Translator:
|
|||||||
@method()
|
@method()
|
||||||
def translate_text(self, text: str, source_language: str, target_language: str):
|
def translate_text(self, text: str, source_language: str, target_language: str):
|
||||||
with self.lock:
|
with self.lock:
|
||||||
translated_text, _, _ = self.translator.predict(
|
translation_result, _ = self.translator.predict(
|
||||||
text,
|
text,
|
||||||
"t2tt",
|
"t2tt",
|
||||||
src_lang=self.get_seamless_lang_code(source_language),
|
src_lang=self.get_seamless_lang_code(source_language),
|
||||||
tgt_lang=self.get_seamless_lang_code(target_language),
|
tgt_lang=self.get_seamless_lang_code(target_language),
|
||||||
ngram_filtering=True,
|
unit_generation_ngram_filtering=True,
|
||||||
)
|
)
|
||||||
return {"text": {source_language: text, target_language: str(translated_text)}}
|
translated_text = str(translation_result[0])
|
||||||
|
return {"text": {source_language: text, target_language: translated_text}}
|
||||||
|
|
||||||
|
|
||||||
# -------------------------------------------------------------------
|
# -------------------------------------------------------------------
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ if settings.SENTRY_DSN:
|
|||||||
logger.error("Sentry is not installed, avoided")
|
logger.error("Sentry is not installed, avoided")
|
||||||
else:
|
else:
|
||||||
logger.info("Sentry enabled")
|
logger.info("Sentry enabled")
|
||||||
sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=1.0)
|
sentry_sdk.init(dsn=settings.SENTRY_DSN, traces_sample_rate=0.01)
|
||||||
else:
|
else:
|
||||||
logger.info("Sentry disabled")
|
logger.info("Sentry disabled")
|
||||||
|
|
||||||
|
|||||||
@@ -70,6 +70,7 @@ async def test_transcript_rtc_and_websocket(
|
|||||||
dummy_storage,
|
dummy_storage,
|
||||||
fake_mp3_upload,
|
fake_mp3_upload,
|
||||||
ensure_casing,
|
ensure_casing,
|
||||||
|
nltk,
|
||||||
appserver,
|
appserver,
|
||||||
sentence_tokenize,
|
sentence_tokenize,
|
||||||
):
|
):
|
||||||
@@ -227,6 +228,7 @@ async def test_transcript_rtc_and_websocket_and_fr(
|
|||||||
dummy_storage,
|
dummy_storage,
|
||||||
fake_mp3_upload,
|
fake_mp3_upload,
|
||||||
ensure_casing,
|
ensure_casing,
|
||||||
|
nltk,
|
||||||
appserver,
|
appserver,
|
||||||
sentence_tokenize,
|
sentence_tokenize,
|
||||||
):
|
):
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import { FaGear } from "react-icons/fa6";
|
|||||||
import { FaCheck, FaTrash, FaStar, FaMicrophone } from "react-icons/fa";
|
import { FaCheck, FaTrash, FaStar, FaMicrophone } from "react-icons/fa";
|
||||||
import { MdError } from "react-icons/md";
|
import { MdError } from "react-icons/md";
|
||||||
import useTranscriptList from "../transcripts/useTranscriptList";
|
import useTranscriptList from "../transcripts/useTranscriptList";
|
||||||
import { formatTime } from "../../lib/time";
|
import { formatTimeMs } from "../../lib/time";
|
||||||
import useApi from "../../lib/useApi";
|
import useApi from "../../lib/useApi";
|
||||||
import { useError } from "../../(errors)/errorContext";
|
import { useError } from "../../(errors)/errorContext";
|
||||||
import { FaEllipsisVertical } from "react-icons/fa6";
|
import { FaEllipsisVertical } from "react-icons/fa6";
|
||||||
@@ -274,7 +274,7 @@ export default function TranscriptBrowser() {
|
|||||||
<Text fontSize="small">
|
<Text fontSize="small">
|
||||||
{new Date(item.created_at).toLocaleString("en-US")}
|
{new Date(item.created_at).toLocaleString("en-US")}
|
||||||
{"\u00A0"}-{"\u00A0"}
|
{"\u00A0"}-{"\u00A0"}
|
||||||
{formatTime(Math.floor(item.duration / 1000))}
|
{formatTimeMs(item.duration)}
|
||||||
</Text>
|
</Text>
|
||||||
<ExpandableText noOfLines={5}>
|
<ExpandableText noOfLines={5}>
|
||||||
{item.short_summary}
|
{item.short_summary}
|
||||||
|
|||||||
@@ -50,7 +50,6 @@ export default function Player(props: PlayerProps) {
|
|||||||
const _wavesurfer = WaveSurfer.create({
|
const _wavesurfer = WaveSurfer.create({
|
||||||
container: waveformRef.current,
|
container: waveformRef.current,
|
||||||
peaks: [props.waveform.data],
|
peaks: [props.waveform.data],
|
||||||
|
|
||||||
height: "auto",
|
height: "auto",
|
||||||
duration: Math.floor(props.mediaDuration / 1000),
|
duration: Math.floor(props.mediaDuration / 1000),
|
||||||
media: props.media,
|
media: props.media,
|
||||||
@@ -161,7 +160,7 @@ export default function Player(props: PlayerProps) {
|
|||||||
const timeLabel = () => {
|
const timeLabel = () => {
|
||||||
if (props.mediaDuration && Math.floor(props.mediaDuration / 1000) > 0)
|
if (props.mediaDuration && Math.floor(props.mediaDuration / 1000) > 0)
|
||||||
return `${formatTime(currentTime)}/${formatTime(
|
return `${formatTime(currentTime)}/${formatTime(
|
||||||
Math.floor(props.mediaDuration / 1000),
|
Math.floor(props.mediaDuration / 1000)
|
||||||
)}`;
|
)}`;
|
||||||
return "";
|
return "";
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
// TODO format duraction in be ?
|
export const formatTimeMs = (milliseconds: number): string => {
|
||||||
|
return formatTime(Math.floor(milliseconds / 1000));
|
||||||
|
};
|
||||||
|
|
||||||
export const formatTime = (seconds: number): string => {
|
export const formatTime = (seconds: number): string => {
|
||||||
let hours = Math.floor(seconds / 3600);
|
let hours = Math.floor(seconds / 3600);
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
import { GetTranscript, GetTranscriptTopic } from "../api";
|
import { GetTranscript, GetTranscriptTopic } from "../api";
|
||||||
import { formatTime } from "./time";
|
import { formatTime, formatTimeMs } from "./time";
|
||||||
import { extractDomain } from "./utils";
|
import { extractDomain } from "./utils";
|
||||||
|
|
||||||
export async function sendZulipMessage(
|
export async function sendZulipMessage(
|
||||||
stream: string,
|
stream: string,
|
||||||
topic: string,
|
topic: string,
|
||||||
message: string,
|
message: string
|
||||||
) {
|
) {
|
||||||
console.log("Sendiing zulip message", stream, topic);
|
console.log("Sendiing zulip message", stream, topic);
|
||||||
try {
|
try {
|
||||||
@@ -28,14 +28,14 @@ export const ZULIP_MSG_MAX_LENGTH = 10000;
|
|||||||
export function getZulipMessage(
|
export function getZulipMessage(
|
||||||
transcript: GetTranscript,
|
transcript: GetTranscript,
|
||||||
topics: GetTranscriptTopic[] | null,
|
topics: GetTranscriptTopic[] | null,
|
||||||
includeTopics: boolean,
|
includeTopics: boolean
|
||||||
) {
|
) {
|
||||||
const date = new Date(transcript.created_at);
|
const date = new Date(transcript.created_at);
|
||||||
|
|
||||||
// Get the timezone offset in minutes and convert it to hours and minutes
|
// Get the timezone offset in minutes and convert it to hours and minutes
|
||||||
const timezoneOffset = -date.getTimezoneOffset();
|
const timezoneOffset = -date.getTimezoneOffset();
|
||||||
const offsetHours = String(
|
const offsetHours = String(
|
||||||
Math.floor(Math.abs(timezoneOffset) / 60),
|
Math.floor(Math.abs(timezoneOffset) / 60)
|
||||||
).padStart(2, "0");
|
).padStart(2, "0");
|
||||||
const offsetMinutes = String(Math.abs(timezoneOffset) % 60).padStart(2, "0");
|
const offsetMinutes = String(Math.abs(timezoneOffset) % 60).padStart(2, "0");
|
||||||
const offsetSign = timezoneOffset >= 0 ? "+" : "-";
|
const offsetSign = timezoneOffset >= 0 ? "+" : "-";
|
||||||
@@ -58,7 +58,7 @@ export function getZulipMessage(
|
|||||||
|
|
||||||
**Date**: <time:${dateTimeString}>
|
**Date**: <time:${dateTimeString}>
|
||||||
**Link**: [${extractDomain(link)}](${link})
|
**Link**: [${extractDomain(link)}](${link})
|
||||||
**Duration**: ${formatTime(transcript.duration)}
|
**Duration**: ${formatTimeMs(transcript.duration)}
|
||||||
|
|
||||||
`;
|
`;
|
||||||
let topicText = "";
|
let topicText = "";
|
||||||
|
|||||||
@@ -4,18 +4,22 @@
|
|||||||
|
|
||||||
import * as Sentry from "@sentry/nextjs";
|
import * as Sentry from "@sentry/nextjs";
|
||||||
|
|
||||||
Sentry.init({
|
const SENTRY_DSN = process.env.SENTRY_DSN;
|
||||||
dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
|
|
||||||
|
|
||||||
// Adjust this value in production, or use tracesSampler for greater control
|
if (SENTRY_DSN) {
|
||||||
tracesSampleRate: 0,
|
Sentry.init({
|
||||||
|
dsn: SENTRY_DSN,
|
||||||
|
|
||||||
// Setting this option to true will print useful information to the console while you're setting up Sentry.
|
// Adjust this value in production, or use tracesSampler for greater control
|
||||||
debug: false,
|
tracesSampleRate: 0,
|
||||||
|
|
||||||
replaysOnErrorSampleRate: 0.0,
|
// Setting this option to true will print useful information to the console while you're setting up Sentry.
|
||||||
|
debug: false,
|
||||||
|
|
||||||
// This sets the sample rate to be 10%. You may want this to be 100% while
|
replaysOnErrorSampleRate: 0.0,
|
||||||
// in development and sample at a lower rate in production
|
|
||||||
replaysSessionSampleRate: 0.0,
|
// This sets the sample rate to be 10%. You may want this to be 100% while
|
||||||
});
|
// in development and sample at a lower rate in production
|
||||||
|
replaysSessionSampleRate: 0.0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,12 +5,16 @@
|
|||||||
|
|
||||||
import * as Sentry from "@sentry/nextjs";
|
import * as Sentry from "@sentry/nextjs";
|
||||||
|
|
||||||
Sentry.init({
|
const SENTRY_DSN = process.env.SENTRY_DSN;
|
||||||
dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
|
|
||||||
|
|
||||||
// Adjust this value in production, or use tracesSampler for greater control
|
if (SENTRY_DSN) {
|
||||||
tracesSampleRate: 0,
|
Sentry.init({
|
||||||
|
dsn: SENTRY_DSN,
|
||||||
|
|
||||||
// Setting this option to true will print useful information to the console while you're setting up Sentry.
|
// Adjust this value in production, or use tracesSampler for greater control
|
||||||
debug: false,
|
tracesSampleRate: 0,
|
||||||
});
|
|
||||||
|
// Setting this option to true will print useful information to the console while you're setting up Sentry.
|
||||||
|
debug: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,12 +4,16 @@
|
|||||||
|
|
||||||
import * as Sentry from "@sentry/nextjs";
|
import * as Sentry from "@sentry/nextjs";
|
||||||
|
|
||||||
Sentry.init({
|
const SENTRY_DSN = process.env.SENTRY_DSN;
|
||||||
dsn: "https://a6fb679d6e99e303bb2ea0e2d68bfe46@o1376440.ingest.sentry.io/4505634666577920",
|
|
||||||
|
|
||||||
// Adjust this value in production, or use tracesSampler for greater control
|
if (SENTRY_DSN) {
|
||||||
tracesSampleRate: 0,
|
Sentry.init({
|
||||||
|
dsn: SENTRY_DSN,
|
||||||
|
|
||||||
// Setting this option to true will print useful information to the console while you're setting up Sentry.
|
// Adjust this value in production, or use tracesSampler for greater control
|
||||||
debug: false,
|
tracesSampleRate: 0,
|
||||||
});
|
|
||||||
|
// Setting this option to true will print useful information to the console while you're setting up Sentry.
|
||||||
|
debug: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user