update language codes

This commit is contained in:
Gokul Mohanarangan
2023-10-14 17:35:30 +05:30
15 changed files with 1107 additions and 427 deletions

View File

@@ -167,194 +167,196 @@ class Translator:
""" """
# TODO: Enhance with complete list of lang codes # TODO: Enhance with complete list of lang codes
seamless_lang_code = { seamless_lang_code = {
# Afrikaans
'af': 'afr',
# Amharic # Amharic
"am": "amh", 'am': 'amh',
# Modern Standard Arabic # Modern Standard Arabic
"ar": "arb", 'ar': 'arb',
# Moroccan Arabic # Moroccan Arabic
# (No 2-letter code) 'ary': 'ary',
# Egyptian Arabic # Egyptian Arabic
# (No 2-letter code) 'arz': 'arz',
# Assamese # Assamese
"as": "asm", 'as': 'asm',
# North Azerbaijani # North Azerbaijani
"az": "azj", 'az': 'azj',
# Belarusian # Belarusian
"be": "bel", 'be': 'bel',
# Bengali # Bengali
"bn": "ben", 'bn': 'ben',
# Bosnian # Bosnian
"bs": "bos", 'bs': 'bos',
# Bulgarian # Bulgarian
"bg": "bul", 'bg': 'bul',
# Catalan # Catalan
"ca": "cat", 'ca': 'cat',
# Cebuano # Cebuano
"ceb": "ceb", 'ceb': 'ceb',
# Czech # Czech
"cs": "ces", 'cs': 'ces',
# Central Kurdish # Central Kurdish
"ckb": "ckb", 'ku': 'ckb',
# Mandarin Chinese (Simplified) # Mandarin Chinese
"zh": "cmn", 'cmn': 'cmn_Hant',
# Mandarin Chinese (Traditional)
# (No separate 2-letter code)
# Welsh # Welsh
"cy": "cym", 'cy': 'cym',
# Danish # Danish
"da": "dan", 'da': 'dan',
# German # German
"de": "deu", 'de': 'deu',
# Greek # Greek
"el": "ell", 'el': 'ell',
# English # English
"en": "eng", 'en': 'eng',
# Estonian # Estonian
"et": "est", 'et': 'est',
# Basque # Basque
"eu": "eus", 'eu': 'eus',
# Finnish # Finnish
"fi": "fin", 'fi': 'fin',
# French # French
"fr": "fra", 'fr': 'fra',
# West Central Oromo
# (No 2-letter code)
# Irish # Irish
"ga": "gle", 'ga': 'gle',
# West Central Oromo,
'gaz': 'gaz',
# Galician # Galician
"gl": "glg", 'gl': 'glg',
# Gujarati # Gujarati
"gu": "guj", 'gu': 'guj',
# Hebrew # Hebrew
"he": "heb", 'he': 'heb',
# Hindi # Hindi
"hi": "hin", 'hi': 'hin',
# Croatian # Croatian
"hr": "hrv", 'hr': 'hrv',
# Hungarian # Hungarian
"hu": "hun", 'hu': 'hun',
# Armenian # Armenian
"hy": "hye", 'hy': 'hye',
# Igbo # Igbo
"ig": "ibo", 'ig': 'ibo',
# Indonesian # Indonesian
"id": "ind", 'id': 'ind',
# Icelandic # Icelandic
"is": "isl", 'is': 'isl',
# Italian # Italian
"it": "ita", 'it': 'ita',
# Javanese # Javanese
"jv": "jav", 'jv': 'jav',
# Japanese # Japanese
"ja": "jpn", 'ja': 'jpn',
# Kannada # Kannada
"kn": "kan", 'kn': 'kan',
# Georgian # Georgian
"ka": "kat", 'ka': 'kat',
# Kazakh # Kazakh
"kk": "kaz", 'kk': 'kaz',
# Halh Mongolian # Halh Mongolian
# (No 2-letter code) 'khk': 'khk',
# Khmer # Khmer
"km": "khm", 'km': 'khm',
# Kyrgyz # Kyrgyz
"ky": "kir", 'ky': 'kir',
# Korean # Korean
"ko": "kor", 'ko': 'kor',
# Lao # Lao
"lo": "lao", 'lo': 'lao',
# Lithuanian # Lithuanian
"lt": "lit", 'lt': 'lit',
# Ganda # Ganda
"lg": "lug", 'lg': 'lug',
# Luo # Luo
"luo": "luo", 'luo': 'luo',
# Standard Latvian # Standard Latvian
"lv": "lvs", 'lv': 'lvs',
# Maithili # Maithili
# (No 2-letter code) 'mai': 'mai',
# Malayalam # Malayalam
"ml": "mal", 'ml': 'mal',
# Marathi # Marathi
"mr": "mar", 'mr': 'mar',
# Macedonian # Macedonian
"mk": "mkd", 'mk': 'mkd',
# Maltese # Maltese
"mt": "mlt", 'mt': 'mlt',
# Meitei # Meitei
# (No 2-letter code) 'mni': 'mni',
# Burmese # Burmese
"my": "mya", 'my': 'mya',
# Dutch # Dutch
"nl": "nld", 'nl': 'nld',
# Norwegian Nynorsk # Norwegian Nynorsk
"nn": "nno", 'nn': 'nno',
# Norwegian Bokmål # Norwegian Bokmål
"nb": "nob", 'nb': 'nob',
# Nepali # Nepali
"ne": "npi", 'ne': 'npi',
# Nyanja # Nyanja
"ny": "nya", 'ny': 'nya',
# Odia # Odia
"or": "ory", 'or': 'ory',
# Punjabi # Punjabi
"pa": "pan", 'pa': 'pan',
# Southern Pashto # Southern Pashto
# (No 2-letter code) 'pbt': 'pbt',
# Western Persian # Western Persian
"fa": "pes", 'pes': 'pes',
# Polish # Polish
"pl": "pol", 'pl': 'pol',
# Portuguese # Portuguese
"pt": "por", 'pt': 'por',
# Romanian # Romanian
"ro": "ron", 'ro': 'ron',
# Russian # Russian
"ru": "rus", 'ru': 'rus',
# Slovak # Slovak
"sk": "slk", 'sk': 'slk',
# Slovenian # Slovenian
"sl": "slv", 'sl': 'slv',
# Shona # Shona
"sn": "sna", 'sn': 'sna',
# Sindhi # Sindhi
"sd": "snd", 'sd': 'snd',
# Somali # Somali
"so": "som", 'so': 'som',
# Spanish # Spanish
"es": "spa", 'es': 'spa',
# Serbian # Serbian
"sr": "srp", 'sr': 'srp',
# Swedish # Swedish
"sv": "swe", 'sv': 'swe',
# Swahili # Swahili
"sw": "swh", 'sw': 'swh',
# Tamil # Tamil
"ta": "tam", 'ta': 'tam',
# Telugu # Telugu
"te": "tel", 'te': 'tel',
# Tajik # Tajik
"tg": "tgk", 'tg': 'tgk',
# Tagalog # Tagalog
"tl": "tgl", 'tl': 'tgl',
# Thai # Thai
"th": "tha", 'th': 'tha',
# Turkish # Turkish
"tr": "tur", 'tr': 'tur',
# Ukrainian # Ukrainian
"uk": "ukr", 'uk': 'ukr',
# Urdu # Urdu
"ur": "urd", 'ur': 'urd',
# Northern Uzbek # Northern Uzbek
"uz": "uzn", 'uz': 'uzn',
# Vietnamese # Vietnamese
"vi": "vie", 'vi': 'vie',
# Yoruba # Yoruba
"yo": "yor", 'yo': 'yor',
# Cantonese # Cantonese
# (No separate 2-letter code) 'yue': 'yue',
# Standard Malay
'ms': 'zsm',
# Zulu # Zulu
"zu": "zul", 'zu': 'zul'
} }
return seamless_lang_code.get(lang_code, "eng") return seamless_lang_code.get(lang_code, "eng")
@@ -408,10 +410,10 @@ def web():
result: dict result: dict
@app.post("/translate", dependencies=[Depends(apikey_auth)]) @app.post("/translate", dependencies=[Depends(apikey_auth)])
def translate( async def translate(
text: str, text: str,
source_language: Annotated[str, Body(...)] = "en", source_language: Annotated[str, Body(...)] = "en",
target_language: Annotated[str, Body(...)] = "fr", target_language: Annotated[str, Body(...)] = "fr",
) -> TranslateResponse: ) -> TranslateResponse:
func = translatorstub.translate_text.spawn( func = translatorstub.translate_text.spawn(
text=text, text=text,

View File

@@ -14,14 +14,15 @@ API will be a POST request to TRANSCRIPT_URL:
""" """
from pathlib import Path
import httpx
from reflector.processors.audio_transcript import AudioTranscriptProcessor from reflector.processors.audio_transcript import AudioTranscriptProcessor
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
from reflector.processors.types import AudioFile, Transcript, Word from reflector.processors.types import AudioFile, Transcript, Word
from reflector.settings import settings from reflector.settings import settings
from reflector.storage import Storage from reflector.storage import Storage
from reflector.utils.retry import retry from reflector.utils.retry import retry
from pathlib import Path
import httpx
class AudioTranscriptBananaProcessor(AudioTranscriptProcessor): class AudioTranscriptBananaProcessor(AudioTranscriptProcessor):

View File

@@ -1,7 +1,7 @@
from faster_whisper import WhisperModel
from reflector.processors.audio_transcript import AudioTranscriptProcessor from reflector.processors.audio_transcript import AudioTranscriptProcessor
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
from reflector.processors.types import AudioFile, Transcript, Word from reflector.processors.types import AudioFile, Transcript, Word
from faster_whisper import WhisperModel
class AudioTranscriptWhisperProcessor(AudioTranscriptProcessor): class AudioTranscriptWhisperProcessor(AudioTranscriptProcessor):

View File

@@ -117,113 +117,204 @@ class FinalTitle(BaseModel):
title: str title: str
# https://github.com/facebookresearch/seamless_communication/tree/main/scripts/m4t/predict#supported-languages
class TranslationLanguages(BaseModel): class TranslationLanguages(BaseModel):
language_to_id_mapping: dict = { language_to_id_mapping: dict = {
"Afrikaans": "af", # Afrikaans
"Albanian": "sq", "af": "afr",
"Amharic": "am", # Amharic
"Arabic": "ar", "am": "amh",
"Armenian": "hy", # Modern Standard Arabic
"Asturian": "ast", "ar": "arb",
"Azerbaijani": "az", # Moroccan Arabic
"Bashkir": "ba", "ary": "ary",
"Belarusian": "be", # Egyptian Arabic
"Bengali": "bn", "arz": "arz",
"Bosnian": "bs", # Assamese
"Breton": "br", "as": "asm",
"Bulgarian": "bg", # North Azerbaijani
"Burmese": "my", "az": "azj",
"Catalan; Valencian": "ca", # Belarusian
"Cebuano": "ceb", "be": "bel",
"Central Khmer": "km", # Bengali
"Chinese": "zh", "bn": "ben",
"Croatian": "hr", # Bosnian
"Czech": "cs", "bs": "bos",
"Danish": "da", # Bulgarian
"Dutch; Flemish": "nl", "bg": "bul",
"English": "en", # Catalan
"Estonian": "et", "ca": "cat",
"Finnish": "fi", # Cebuano
"French": "fr", "ceb": "ceb",
"Fulah": "ff", # Czech
"Gaelic; Scottish Gaelic": "gd", "cs": "ces",
"Galician": "gl", # Central Kurdish
"Ganda": "lg", "ku": "ckb",
"Georgian": "ka", # Mandarin Chinese
"German": "de", "cmn": "cmn_Hant",
"Greeek": "el", # Welsh
"Gujarati": "gu", "cy": "cym",
"Haitian; Haitian Creole": "ht", # Danish
"Hausa": "ha", "da": "dan",
"Hebrew": "he", # German
"Hindi": "hi", "de": "deu",
"Hungarian": "hu", # Greek
"Icelandic": "is", "el": "ell",
"Igbo": "ig", # English
"Iloko": "ilo", "en": "eng",
"Indonesian": "id", # Estonian
"Irish": "ga", "et": "est",
"Italian": "it", # Basque
"Japanese": "ja", "eu": "eus",
"Javanese": "jv", # Finnish
"Kannada": "kn", "fi": "fin",
"Kazakh": "kk", # French
"Korean": "ko", "fr": "fra",
"Lao": "lo", # Irish
"Latvian": "lv", "ga": "gle",
"Lingala": "ln", # West Central Oromo,
"Lithuanian": "lt", "gaz": "gaz",
"Luxembourgish; Letzeburgesch": "lb", # Galician
"Macedonian": "mk", "gl": "glg",
"Malagasy": "mg", # Gujarati
"Malay": "ms", "gu": "guj",
"Malayalam": "ml", # Hebrew
"Marathi": "mr", "he": "heb",
"Mongolian": "mn", # Hindi
"Nepali": "ne", "hi": "hin",
"Northern Sotho": "ns", # Croatian
"Norwegian": "no", "hr": "hrv",
"Occitan": "oc", # Hungarian
"Oriya": "or", "hu": "hun",
"Panjabi; Punjabi": "pa", # Armenian
"Persian": "fa", "hy": "hye",
"Polish": "pl", # Igbo
"Portuguese": "pt", "ig": "ibo",
"Pushto; Pashto": "ps", # Indonesian
"Romanian; Moldavian; Moldovan": "ro", "id": "ind",
"Russian": "ru", # Icelandic
"Serbian": "sr", "is": "isl",
"Sindhi": "sd", # Italian
"Sinhala; Sinhalese": "si", "it": "ita",
"Slovak": "sk", # Javanese
"Slovenian": "sl", "jv": "jav",
"Somali": "so", # Japanese
"Spanish": "es", "ja": "jpn",
"Sundanese": "su", # Kannada
"Swahili": "sw", "kn": "kan",
"Swati": "ss", # Georgian
"Swedish": "sv", "ka": "kat",
"Tagalog": "tl", # Kazakh
"Tamil": "ta", "kk": "kaz",
"Thai": "th", # Halh Mongolian
"Tswana": "tn", "khk": "khk",
"Turkish": "tr", # Khmer
"Ukrainian": "uk", "km": "khm",
"Urdu": "ur", # Kyrgyz
"Uzbek": "uz", "ky": "kir",
"Vietnamese": "vi", # Korean
"Welsh": "cy", "ko": "kor",
"Western Frisian": "fy", # Lao
"Wolof": "wo", "lo": "lao",
"Xhosa": "xh", # Lithuanian
"Yiddish": "yi", "lt": "lit",
"Yoruba": "yo", # Ganda
"Zulu": "zu", "lg": "lug",
# Luo
"luo": "luo",
# Standard Latvian
"lv": "lvs",
# Maithili
"mai": "mai",
# Malayalam
"ml": "mal",
# Marathi
"mr": "mar",
# Macedonian
"mk": "mkd",
# Maltese
"mt": "mlt",
# Meitei
"mni": "mni",
# Burmese
"my": "mya",
# Dutch
"nl": "nld",
# Norwegian Nynorsk
"nn": "nno",
# Norwegian Bokmål
"nb": "nob",
# Nepali
"ne": "npi",
# Nyanja
"ny": "nya",
# Odia
"or": "ory",
# Punjabi
"pa": "pan",
# Southern Pashto
"pbt": "pbt",
# Western Persian
"pes": "pes",
# Polish
"pl": "pol",
# Portuguese
"pt": "por",
# Romanian
"ro": "ron",
# Russian
"ru": "rus",
# Slovak
"sk": "slk",
# Slovenian
"sl": "slv",
# Shona
"sn": "sna",
# Sindhi
"sd": "snd",
# Somali
"so": "som",
# Spanish
"es": "spa",
# Serbian
"sr": "srp",
# Swedish
"sv": "swe",
# Swahili
"sw": "swh",
# Tamil
"ta": "tam",
# Telugu
"te": "tel",
# Tajik
"tg": "tgk",
# Tagalog
"tl": "tgl",
# Thai
"th": "tha",
# Turkish
"tr": "tur",
# Ukrainian
"uk": "ukr",
# Urdu
"ur": "urd",
# Northern Uzbek
"uz": "uzn",
# Vietnamese
"vi": "vie",
# Yoruba
"yo": "yor",
# Cantonese
"yue": "yue",
# Standard Malay
"ms": "zsm",
# Zulu
"zu": "zul",
} }
@property @property
def supported_languages(self): def supported_languages(self):
return self.language_to_id_mapping.values() return self.language_to_id_mapping.keys()
def is_supported(self, lang_id: str) -> bool: def is_supported(self, lang_id: str) -> bool:
if lang_id in self.supported_languages: if lang_id in self.supported_languages:

View File

@@ -1,7 +1,6 @@
import asyncio import asyncio
import av import av
from reflector.logger import logger from reflector.logger import logger
from reflector.processors import ( from reflector.processors import (
AudioChunkerProcessor, AudioChunkerProcessor,

View File

@@ -0,0 +1,495 @@
import Script from "next/script";
// type Script = 'Latn' | 'Ethi' | 'Arab' | 'Beng' | 'Cyrl' | 'Taml' | 'Hant' | 'Hans' | 'Grek' | 'Gujr' | 'Hebr'| 'Deva'| 'Armn' | 'Jpan' | 'Knda' | 'Geor';
type LanguageOption = {
value: string | undefined;
name: string;
script?: string;
};
const supportedLanguages: LanguageOption[] = [
{
value: "af",
name: "Afrikaans",
script: "Latn",
},
{
value: "am",
name: "Amharic",
script: "Ethi",
},
{
value: "ar",
name: "Modern Standard Arabic",
script: "Arab",
},
{
value: "ary",
name: "Moroccan Arabic",
script: "Arab",
},
{
value: "arz",
name: "Egyptian Arabic",
script: "Arab",
},
{
value: "as",
name: "Assamese",
script: "Beng",
},
{
value: "az",
name: "North Azerbaijani",
script: "Latn",
},
{
value: "be",
name: "Belarusian",
script: "Cyrl",
},
{
value: "bn",
name: "Bengali",
script: "Beng",
},
{
value: "bs",
name: "Bosnian",
script: "Latn",
},
{
value: "bg",
name: "Bulgarian",
script: "Cyrl",
},
{
value: "ca",
name: "Catalan",
script: "Latn",
},
{
value: "ceb",
name: "Cebuano",
script: "Latn",
},
{
value: "cs",
name: "Czech",
script: "Latn",
},
{
value: "ku",
name: "Central Kurdish",
script: "Arab",
},
{
value: "cmn",
name: "Mandarin Chinese",
script: "Hans",
},
{
value: "cy",
name: "Welsh",
script: "Latn",
},
{
value: "da",
name: "Danish",
script: "Latn",
},
{
value: "de",
name: "German",
script: "Latn",
},
{
value: "el",
name: "Greek",
script: "Grek",
},
{
value: "en",
name: "English",
script: "Latn",
},
{
value: "et",
name: "Estonian",
script: "Latn",
},
{
value: "eu",
name: "Basque",
script: "Latn",
},
{
value: "fi",
name: "Finnish",
script: "Latn",
},
{
value: "fr",
name: "French",
script: "Latn",
},
{
value: "gaz",
name: "West Central Oromo",
script: "Latn",
},
{
value: "ga",
name: "Irish",
script: "Latn",
},
{
value: "gl",
name: "Galician",
script: "Latn",
},
{
value: "gu",
name: "Gujarati",
script: "Gujr",
},
{
value: "he",
name: "Hebrew",
script: "Hebr",
},
{
value: "hi",
name: "Hindi",
script: "Deva",
},
{
value: "hr",
name: "Croatian",
script: "Latn",
},
{
value: "hu",
name: "Hungarian",
script: "Latn",
},
{
value: "hy",
name: "Armenian",
script: "Armn",
},
{
value: "ig",
name: "Igbo",
script: "Latn",
},
{
value: "id",
name: "Indonesian",
script: "Latn",
},
{
value: "is",
name: "Icelandic",
script: "Latn",
},
{
value: "it",
name: "Italian",
script: "Latn",
},
{
value: "jv",
name: "Javanese",
script: "Latn",
},
{
value: "ja",
name: "Japanese",
script: "Jpan",
},
{
value: "kn",
name: "Kannada",
script: "Knda",
},
{
value: "ka",
name: "Georgian",
script: "Geor",
},
{
value: "kk",
name: "Kazakh",
script: "Cyrl",
},
{
value: "khk",
name: "Halh Mongolian",
script: "Cyrl",
},
{
value: "km",
name: "Khmer",
script: "Khmr",
},
{
value: "ky",
name: "Kyrgyz",
script: "Cyrl",
},
{
value: "ko",
name: "Korean",
script: "Kore",
},
{
value: "lo",
name: "Lao",
script: "Laoo",
},
{
value: "lt",
name: "Lithuanian",
script: "Latn",
},
{
value: "lg",
name: "Ganda",
script: "Latn",
},
{
value: "luo",
name: "Luo",
script: "Latn",
},
{
value: "lv",
name: "Standard Latvian",
script: "Latn",
},
{
value: "mai",
name: "Maithili",
script: "Deva",
},
{
value: "ml",
name: "Malayalam",
script: "Mlym",
},
{
value: "mr",
name: "Marathi",
script: "Deva",
},
{
value: "mk",
name: "Macedonian",
script: "Cyrl",
},
{
value: "mt",
name: "Maltese",
script: "Latn",
},
{
value: "mni",
name: "Meitei",
script: "Beng",
},
{
value: "my",
name: "Burmese",
script: "Mymr",
},
{
value: "nl",
name: "Dutch",
script: "Latn",
},
{
value: "nn",
name: "Norwegian Nynorsk",
script: "Latn",
},
{
value: "nb",
name: "Norwegian Bokmål",
script: "Latn",
},
{
value: "ne",
name: "Nepali",
script: "Deva",
},
{
value: "ny",
name: "Nyanja",
script: "Latn",
},
{
value: "or",
name: "Odia",
script: "Orya",
},
{
value: "pa",
name: "Punjabi",
script: "Guru",
},
{
value: "pbt",
name: "Southern Pashto",
script: "Arab",
},
{
value: "pes",
name: "Western Persian",
script: "Arab",
},
{
value: "pl",
name: "Polish",
script: "Latn",
},
{
value: "pt",
name: "Portuguese",
script: "Latn",
},
{
value: "ro",
name: "Romanian",
script: "Latn",
},
{
value: "ru",
name: "Russian",
script: "Cyrl",
},
{
value: "sk",
name: "Slovak",
script: "Latn",
},
{
value: "sl",
name: "Slovenian",
script: "Latn",
},
{
value: "sn",
name: "Shona",
script: "Latn",
},
{
value: "sd",
name: "Sindhi",
script: "Arab",
},
{
value: "so",
name: "Somali",
script: "Latn",
},
{
value: "es",
name: "Spanish",
script: "Latn",
},
{
value: "sr",
name: "Serbian",
script: "Cyrl",
},
{
value: "sv",
name: "Swedish",
script: "Latn",
},
{
value: "sw",
name: "Swahili",
script: "Latn",
},
{
value: "ta",
name: "Tamil",
script: "Taml",
},
{
value: "te",
name: "Telugu",
script: "Telu",
},
{
value: "tg",
name: "Tajik",
script: "Cyrl",
},
{
value: "tl",
name: "Tagalog",
script: "Latn",
},
{
value: "th",
name: "Thai",
script: "Thai",
},
{
value: "tr",
name: "Turkish",
script: "Latn",
},
{
value: "uk",
name: "Ukrainian",
script: "Cyrl",
},
{
value: "ur",
name: "Urdu",
script: "Arab",
},
{
value: "uz",
name: "Northern Uzbek",
script: "Latn",
},
{
value: "vi",
name: "Vietnamese",
script: "Latn",
},
{
value: "yo",
name: "Yoruba",
script: "Latn",
},
{
value: "yue",
name: "Cantonese",
script: "Hant",
},
{
value: "ms",
name: "Standard Malay",
script: "Latn",
},
{
value: "zu",
name: "Zulu",
script: "Latn",
},
];
const supportedLatinLanguages = supportedLanguages.filter(
(lan) => lan.script == "Latn",
);
supportedLatinLanguages.push({ value: undefined, name: "None" });
export { supportedLatinLanguages };
export default supportedLanguages;

View File

@@ -22,7 +22,7 @@ type TranscriptDetails = {
export default function TranscriptDetails(details: TranscriptDetails) { export default function TranscriptDetails(details: TranscriptDetails) {
const api = getApi(); const api = getApi();
const transcript = useTranscript(null, api, details.params.transcriptId); const transcript = useTranscript(details.params.transcriptId);
const topics = useTopics(api, details.params.transcriptId); const topics = useTopics(api, details.params.transcriptId);
const waveform = useWaveform(api, details.params.transcriptId); const waveform = useWaveform(api, details.params.transcriptId);
const useActiveTopic = useState<Topic | null>(null); const useActiveTopic = useState<Topic | null>(null);

View File

@@ -0,0 +1,142 @@
"use client";
import React, { useEffect, useState } from "react";
import Recorder from "../../recorder";
import { TopicList } from "../../topicList";
import useWebRTC from "../../useWebRTC";
import useTranscript from "../../useTranscript";
import { useWebSockets } from "../../useWebSockets";
import useAudioDevice from "../../useAudioDevice";
import "../../../styles/button.css";
import { Topic } from "../../webSocketTypes";
import getApi from "../../../lib/getApi";
import LiveTrancription from "../../liveTranscription";
import DisconnectedIndicator from "../../disconnectedIndicator";
import { FontAwesomeIcon } from "@fortawesome/react-fontawesome";
import { faGear } from "@fortawesome/free-solid-svg-icons";
import { lockWakeState, releaseWakeState } from "../../../lib/wakeLock";
type TranscriptDetails = {
params: {
transcriptId: string;
};
};
const TranscriptRecord = (details: TranscriptDetails) => {
const [stream, setStream] = useState<MediaStream | null>(null);
const [disconnected, setDisconnected] = useState<boolean>(false);
const useActiveTopic = useState<Topic | null>(null);
useEffect(() => {
if (process.env.NEXT_PUBLIC_ENV === "development") {
document.onkeyup = (e) => {
if (e.key === "d") {
setDisconnected((prev) => !prev);
}
};
}
}, []);
const transcript = useTranscript(details.params.transcriptId);
const api = getApi();
const webRTC = useWebRTC(stream, details.params.transcriptId, api);
const webSockets = useWebSockets(details.params.transcriptId);
const { audioDevices, getAudioStream } = useAudioDevice();
const [hasRecorded, setHasRecorded] = useState(false);
const [transcriptStarted, setTranscriptStarted] = useState(false);
useEffect(() => {
if (!transcriptStarted && webSockets.transcriptText.length !== 0)
setTranscriptStarted(true);
}, [webSockets.transcriptText]);
useEffect(() => {
if (transcript?.response?.longSummary) {
const newUrl = `/transcripts/${transcript.response.id}`;
// Shallow redirection does not work on NextJS 13
// https://github.com/vercel/next.js/discussions/48110
// https://github.com/vercel/next.js/discussions/49540
// router.push(newUrl, undefined, { shallow: true });
history.replaceState({}, "", newUrl);
}
});
useEffect(() => {
lockWakeState();
return () => {
releaseWakeState();
};
}, []);
return (
<>
<Recorder
setStream={setStream}
onStop={() => {
setStream(null);
setHasRecorded(true);
webRTC?.send(JSON.stringify({ cmd: "STOP" }));
}}
topics={webSockets.topics}
getAudioStream={getAudioStream}
useActiveTopic={useActiveTopic}
isPastMeeting={false}
audioDevices={audioDevices}
/>
<div className="grid grid-cols-1 lg:grid-cols-2 grid-rows-mobile-inner lg:grid-rows-1 gap-2 lg:gap-4 h-full">
<TopicList
topics={webSockets.topics}
useActiveTopic={useActiveTopic}
autoscroll={true}
/>
<section
className={`w-full h-full bg-blue-400/20 rounded-lg md:rounded-xl p-2 md:px-4`}
>
{!hasRecorded ? (
<>
{transcriptStarted && (
<h2 className="md:text-lg font-bold">Transcription</h2>
)}
<div className="flex flex-col justify-center align center text-center h-full">
<div className="py-2 h-auto">
{!transcriptStarted ? (
<div className="text-center text-gray-500">
The conversation transcript will appear here shortly after
you start recording.
</div>
) : (
<LiveTrancription
text={webSockets.transcriptText}
translateText={webSockets.translateText}
/>
)}
</div>
</div>
</>
) : (
<div className="flex flex-col justify-center align center text-center h-full text-gray-500">
<div className="p-2 md:p-4">
<FontAwesomeIcon
icon={faGear}
className="animate-spin-slow h-14 w-14 md:h-20 md:w-20"
/>
</div>
<p>
We are generating the final summary for you. This may take a
couple of minutes. Please do not navigate away from the page
during this time.
</p>
</div>
)}
</section>
</div>
{disconnected && <DisconnectedIndicator />}
</>
);
};
export default TranscriptRecord;

View File

@@ -0,0 +1,54 @@
import { useEffect, useState } from "react";
import { DefaultApi, V1TranscriptsCreateRequest } from "../api/apis/DefaultApi";
import { GetTranscript } from "../api";
import { useError } from "../(errors)/errorContext";
import getApi from "../lib/getApi";
type CreateTranscript = {
response: GetTranscript | null;
loading: boolean;
error: Error | null;
create: (params: V1TranscriptsCreateRequest["createTranscript"]) => void;
};
const useCreateTranscript = (): CreateTranscript => {
const [response, setResponse] = useState<GetTranscript | null>(null);
const [loading, setLoading] = useState<boolean>(false);
const [error, setErrorState] = useState<Error | null>(null);
const { setError } = useError();
const api = getApi();
const create = (params: V1TranscriptsCreateRequest["createTranscript"]) => {
if (loading) return;
setLoading(true);
const requestParameters: V1TranscriptsCreateRequest = {
createTranscript: {
name: params.name || "Weekly All-Hands", // Default
targetLanguage: params.targetLanguage || "en", // Default
},
};
console.debug(
"POST - /v1/transcripts/ - Requesting new transcription creation",
requestParameters,
);
api
.v1TranscriptsCreate(requestParameters)
.then((result) => {
setResponse(result);
setLoading(false);
console.debug("New transcript created:", result);
})
.catch((err) => {
setError(err);
setErrorState(err);
setLoading(false);
});
};
return { response, loading, error, create };
};
export default useCreateTranscript;

View File

@@ -1,14 +1,19 @@
type LiveTranscriptionProps = { type LiveTranscriptionProps = {
text: string; text: string;
translateText: string;
}; };
export default function LiveTrancription(props: LiveTranscriptionProps) { export default function LiveTrancription(props: LiveTranscriptionProps) {
return ( return (
<div className="text-center p-4"> <div className="text-center p-4">
<p className="text-lg md:text-xl font-bold line-clamp-4"> <p className="text-lg md:text-xl font-bold line-clamp-4">
{/* Nous allons prendre quelques appels téléphoniques et répondre à quelques questions */} {props.translateText ? props.translateText : props.text}
{props.text}
</p> </p>
{props.translateText && (
<p className="text-base md:textlg font-bold line-clamp-4">
{props.text}
</p>
)}
</div> </div>
); );
} }

View File

@@ -1,197 +1,112 @@
"use client"; "use client";
import React, { useEffect, useState } from "react"; import React, { useEffect, useState } from "react";
import Recorder from "../recorder";
import { TopicList } from "../topicList";
import useWebRTC from "../useWebRTC";
import useTranscript from "../useTranscript";
import { useWebSockets } from "../useWebSockets";
import useAudioDevice from "../useAudioDevice"; import useAudioDevice from "../useAudioDevice";
import "../../styles/button.css"; import "../../styles/button.css";
import { Topic } from "../webSocketTypes";
import getApi from "../../lib/getApi"; import getApi from "../../lib/getApi";
import LiveTrancription from "../liveTranscription";
import DisconnectedIndicator from "../disconnectedIndicator";
import { FontAwesomeIcon } from "@fortawesome/react-fontawesome";
import { faGear } from "@fortawesome/free-solid-svg-icons";
import About from "../../(aboutAndPrivacy)/about"; import About from "../../(aboutAndPrivacy)/about";
import Privacy from "../../(aboutAndPrivacy)/privacy"; import Privacy from "../../(aboutAndPrivacy)/privacy";
import { lockWakeState, releaseWakeState } from "../../lib/wakeLock";
import { useRouter } from "next/navigation"; import { useRouter } from "next/navigation";
import useCreateTranscript from "../createTranscript";
import SelectSearch from "react-select-search";
import { supportedLatinLanguages } from "../../supportedLanguages";
import "react-select-search/style.css";
const TranscriptCreate = () => { const TranscriptCreate = () => {
const [stream, setStream] = useState<MediaStream | null>(null); // const transcript = useTranscript(stream, api);
const [disconnected, setDisconnected] = useState<boolean>(false);
const useActiveTopic = useState<Topic | null>(null);
useEffect(() => {
if (process.env.NEXT_PUBLIC_ENV === "development") {
document.onkeyup = (e) => {
if (e.key === "d") {
setDisconnected((prev) => !prev);
}
};
}
}, []);
const api = getApi();
const transcript = useTranscript(stream, api);
const webRTC = useWebRTC(stream, transcript?.response?.id, api);
const webSockets = useWebSockets(transcript?.response?.id);
const router = useRouter(); const router = useRouter();
const { const api = getApi();
loading,
permissionOk,
permissionDenied,
audioDevices,
requestPermission,
getAudioStream,
} = useAudioDevice();
const [hasRecorded, setHasRecorded] = useState(false); const [name, setName] = useState<string>();
const [transcriptStarted, setTranscriptStarted] = useState(false); const nameChange = (event: React.ChangeEvent<HTMLInputElement>) => {
setName(event.target.value);
};
const [targetLanguage, setTargetLanguage] = useState<string>();
const onLanguageChange = (newval) => {
typeof newval === "string" && setTargetLanguage(newval);
};
const createTranscript = useCreateTranscript();
const send = () => {
if (createTranscript.loading || permissionDenied) return;
createTranscript.create({ name, targetLanguage });
};
useEffect(() => { useEffect(() => {
if (!transcriptStarted && webSockets.transcriptText.length !== 0) createTranscript.response &&
setTranscriptStarted(true); router.push(`/transcripts/${createTranscript.response.id}/record`);
}, [webSockets.transcriptText]); }, [createTranscript.response]);
useEffect(() => { const { loading, permissionOk, permissionDenied, requestPermission } =
if (transcript?.response?.id) { useAudioDevice();
const newUrl = `/transcripts/${transcript.response.id}`;
// Shallow redirection does not work on NextJS 13
// https://github.com/vercel/next.js/discussions/48110
// https://github.com/vercel/next.js/discussions/49540
// router.push(newUrl, undefined, { shallow: true });
history.replaceState({}, "", newUrl);
}
});
useEffect(() => {
lockWakeState();
return () => {
releaseWakeState();
};
}, []);
return ( return (
<> <>
{permissionOk ? ( <div></div>
<> <div className="grid grid-cols-1 lg:grid-cols-2 grid-rows-mobile-inner lg:grid-rows-1 gap-2 lg:gap-4 h-full">
<Recorder <section className="flex flex-col w-full h-full items-center justify-evenly p-4 md:px-6 md:py-8">
setStream={setStream} <div className="flex flex-col max-w-xl items-center justify-center">
onStop={() => { <h1 className="text-2xl font-bold mb-2">
webRTC?.send(JSON.stringify({ cmd: "STOP" })); Welcome to reflector.media
setStream(null); </h1>
setHasRecorded(true); <p>
}} Reflector is a transcription and summarization pipeline that
topics={webSockets.topics} transforms audio into knowledge. The output is meeting minutes and
getAudioStream={getAudioStream} topic summaries enabling topic-specific analyses stored in your
useActiveTopic={useActiveTopic} systems of record. This is accomplished on your infrastructure
isPastMeeting={false} without 3rd parties keeping your data private, secure, and
audioDevices={audioDevices} organized.
/> </p>
<About buttonText="Learn more" />
</div>
</section>
<section className="rounded-xl md:bg-blue-200 flex flex-col justify-start p-6">
<h2 className="text-2xl font-bold mt-4 mb-2"> Try Reflector</h2>
<label className="mb-3">
<p>What is this meeting about ?</p>
<input type="text" onChange={nameChange} />
</label>
<div className="grid grid-cols-1 lg:grid-cols-2 grid-rows-mobile-inner lg:grid-rows-1 gap-2 lg:gap-4 h-full"> <label className="mb-3">
<TopicList <p>Do you need live translation ?</p>
topics={webSockets.topics} <SelectSearch
useActiveTopic={useActiveTopic} search
autoscroll={true} options={supportedLatinLanguages}
value={targetLanguage}
onChange={onLanguageChange}
placeholder="Choose your language"
/> />
</label>
<section {loading ? (
className={`w-full h-full bg-blue-400/20 rounded-lg md:rounded-xl p-2 md:px-4`} <p className="">Checking permission...</p>
> ) : permissionOk ? (
{!hasRecorded ? ( <> Microphone permission granted </>
<> ) : (
{transcriptStarted && ( <>
<h2 className="md:text-lg font-bold">Transcription</h2> <p className="">
)} In order to use Reflector, we kindly request permission to
<div className="flex flex-col justify-center align center text-center h-full"> access your microphone during meetings and events.
<div className="py-2 h-auto"> <br />
{!transcriptStarted ? ( <Privacy buttonText="Privacy policy" />
<div className="text-center text-gray-500"> <br />
The conversation transcript will appear here shortly {permissionDenied &&
after you start recording. "Permission to use your microphone was denied, please change the permission setting in your browser and refresh this page."}
</div> </p>
) : ( <button
<LiveTrancription text={webSockets.transcriptText} /> className="mt-4 bg-blue-400 hover:bg-blue-500 focus-visible:bg-blue-500 text-white font-bold py-2 px-4 rounded m-auto"
)} onClick={requestPermission}
</div> disabled={permissionDenied}
</div> >
</> {permissionDenied ? "Access denied" : "Grant Permission"}
) : ( </button>
<div className="flex flex-col justify-center align center text-center h-full text-gray-500"> </>
<div className="p-2 md:p-4"> )}
<FontAwesomeIcon <button onClick={send} disabled={!permissionOk}>
icon={faGear} {createTranscript.loading ? "loading" : "Send"}
className="animate-spin-slow h-14 w-14 md:h-20 md:w-20" </button>
/> </section>
</div> </div>
<p>
We are generating the final summary for you. This may take a
couple of minutes. Please do not navigate away from the page
during this time.
</p>
</div>
)}
</section>
</div>
{disconnected && <DisconnectedIndicator />}
</>
) : (
<>
<div></div>
<div className="max-h-full overflow-auto">
<section className="flex flex-col w-full h-full items-center justify-evenly p-4 md:px-6 md:py-8">
<div>
<div className="flex flex-col max-w-xl items-center justify-center">
<h1 className="text-2xl font-bold mb-2">
Welcome to reflector.media
</h1>
<p>
Reflector is a transcription and summarization pipeline that
transforms audio into knowledge. The output is meeting
minutes and topic summaries enabling topic-specific analyses
stored in your systems of record. This is accomplished on
your infrastructure without 3rd parties keeping your
data private, secure, and organized.
</p>
<About buttonText="Learn more" />
<h2 className="text-2xl font-bold mt-4 mb-2">
Audio Permissions
</h2>
{loading ? (
<p className="text-center">Checking permission...</p>
) : (
<>
<p className="text-center">
In order to use Reflector, we kindly request permission
to access your microphone during meetings and events.
<br />
<Privacy buttonText="Privacy policy" />
<br />
{permissionDenied
? "Permission to use your microphone was denied, please change the permission setting in your browser and refresh this page."
: "Please grant permission to continue."}
</p>
<button
className="mt-4 bg-blue-400 hover:bg-blue-500 focus-visible:bg-blue-500 text-white font-bold py-2 px-4 rounded m-auto"
onClick={requestPermission}
disabled={permissionDenied}
>
{permissionDenied
? "Access denied"
: "Grant Permission"}
</button>
</>
)}
</div>
</div>
</section>
</div>
</>
)}
</> </>
); );
}; };

View File

@@ -1,11 +1,11 @@
import { useEffect, useState } from "react"; import { useEffect, useState } from "react";
import { import {
DefaultApi,
V1TranscriptGetRequest, V1TranscriptGetRequest,
V1TranscriptsCreateRequest, V1TranscriptsCreateRequest,
} from "../api/apis/DefaultApi"; } from "../api/apis/DefaultApi";
import { GetTranscript } from "../api"; import { GetTranscript } from "../api";
import { useError } from "../(errors)/errorContext"; import { useError } from "../(errors)/errorContext";
import getApi from "../lib/getApi";
type Transcript = { type Transcript = {
response: GetTranscript | null; response: GetTranscript | null;
@@ -13,23 +13,12 @@ type Transcript = {
error: Error | null; error: Error | null;
}; };
const useTranscript = ( const useTranscript = (id: string | null): Transcript => {
stream: MediaStream | null,
api: DefaultApi,
id: string | null = null,
): Transcript => {
const [response, setResponse] = useState<GetTranscript | null>(null); const [response, setResponse] = useState<GetTranscript | null>(null);
const [loading, setLoading] = useState<boolean>(false); const [loading, setLoading] = useState<boolean>(false);
const [error, setErrorState] = useState<Error | null>(null); const [error, setErrorState] = useState<Error | null>(null);
const { setError } = useError(); const { setError } = useError();
const api = getApi();
const getOrCreateTranscript = (id: string | null) => {
if (id) {
getTranscript(id);
} else if (stream) {
createTranscript();
}
};
const getTranscript = (id: string | null) => { const getTranscript = (id: string | null) => {
if (!id) throw new Error("Transcript ID is required to get transcript"); if (!id) throw new Error("Transcript ID is required to get transcript");
@@ -43,34 +32,7 @@ const useTranscript = (
.then((result) => { .then((result) => {
setResponse(result); setResponse(result);
setLoading(false); setLoading(false);
console.debug("New transcript created:", result); console.debug("Transcript Loaded:", result);
})
.catch((err) => {
setError(err);
setErrorState(err);
});
};
const createTranscript = () => {
setLoading(true);
const requestParameters: V1TranscriptsCreateRequest = {
createTranscript: {
name: "Weekly All-Hands", // Hardcoded for now
targetLanguage: "en", // Hardcoded for now
},
};
console.debug(
"POST - /v1/transcripts/ - Requesting new transcription creation",
requestParameters,
);
api
.v1TranscriptsCreate(requestParameters)
.then((result) => {
setResponse(result);
setLoading(false);
console.debug("New transcript created:", result);
}) })
.catch((err) => { .catch((err) => {
setError(err); setError(err);
@@ -79,8 +41,8 @@ const useTranscript = (
}; };
useEffect(() => { useEffect(() => {
getOrCreateTranscript(id); getTranscript(id);
}, [id, stream]); }, [id]);
return { response, loading, error }; return { response, loading, error };
}; };

View File

@@ -5,6 +5,7 @@ import { useRouter } from "next/navigation";
type UseWebSockets = { type UseWebSockets = {
transcriptText: string; transcriptText: string;
translateText: string;
topics: Topic[]; topics: Topic[];
finalSummary: FinalSummary; finalSummary: FinalSummary;
status: Status; status: Status;
@@ -12,7 +13,9 @@ type UseWebSockets = {
export const useWebSockets = (transcriptId: string | null): UseWebSockets => { export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
const [transcriptText, setTranscriptText] = useState<string>(""); const [transcriptText, setTranscriptText] = useState<string>("");
const [translateText, setTranslateText] = useState<string>("");
const [textQueue, setTextQueue] = useState<string[]>([]); const [textQueue, setTextQueue] = useState<string[]>([]);
const [translationQueue, setTranslationQueue] = useState<string[]>([]);
const [isProcessing, setIsProcessing] = useState(false); const [isProcessing, setIsProcessing] = useState(false);
const [topics, setTopics] = useState<Topic[]>([]); const [topics, setTopics] = useState<Topic[]>([]);
const [finalSummary, setFinalSummary] = useState<FinalSummary>({ const [finalSummary, setFinalSummary] = useState<FinalSummary>({
@@ -30,6 +33,8 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
setIsProcessing(true); setIsProcessing(true);
const text = textQueue[0]; const text = textQueue[0];
setTranscriptText(text); setTranscriptText(text);
setTranslateText(translationQueue[0]);
console.log("displaying " + translateText);
const WPM_READING = 200 + textQueue.length * 10; // words per minute to read const WPM_READING = 200 + textQueue.length * 10; // words per minute to read
const wordCount = text.split(/\s+/).length; const wordCount = text.split(/\s+/).length;
@@ -38,6 +43,7 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
setTimeout(() => { setTimeout(() => {
setIsProcessing(false); setIsProcessing(false);
setTextQueue((prevQueue) => prevQueue.slice(1)); setTextQueue((prevQueue) => prevQueue.slice(1));
setTranslationQueue((prevQueue) => prevQueue.slice(1));
}, delay); }, delay);
}, [textQueue, isProcessing]); }, [textQueue, isProcessing]);
@@ -158,11 +164,13 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
switch (message.event) { switch (message.event) {
case "TRANSCRIPT": case "TRANSCRIPT":
const newText = (message.data.text ?? "").trim(); const newText = (message.data.text ?? "").trim();
const newTranslation = (message.data.translation ?? "").trim();
if (!newText) break; if (!newText) break;
console.debug("TRANSCRIPT event:", newText); console.debug("TRANSCRIPT event:", newText);
setTextQueue((prevQueue) => [...prevQueue, newText]); setTextQueue((prevQueue) => [...prevQueue, newText]);
setTranslationQueue((prevQueue) => [...prevQueue, newTranslation]);
break; break;
case "TOPIC": case "TOPIC":
@@ -233,5 +241,5 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
}; };
}, [transcriptId]); }, [transcriptId]);
return { transcriptText, topics, finalSummary, status }; return { transcriptText, translateText, topics, finalSummary, status };
}; };

View File

@@ -27,6 +27,7 @@
"react-dropdown": "^1.11.0", "react-dropdown": "^1.11.0",
"react-markdown": "^9.0.0", "react-markdown": "^9.0.0",
"react-qr-code": "^2.0.12", "react-qr-code": "^2.0.12",
"react-select-search": "^4.1.7",
"sass": "^1.63.6", "sass": "^1.63.6",
"simple-peer": "^9.11.1", "simple-peer": "^9.11.1",
"superagent": "^8.0.9", "superagent": "^8.0.9",

View File

@@ -2088,6 +2088,11 @@ react-qr-code@^2.0.12:
prop-types "^15.8.1" prop-types "^15.8.1"
qr.js "0.0.0" qr.js "0.0.0"
react-select-search@^4.1.7:
version "4.1.7"
resolved "https://registry.yarnpkg.com/react-select-search/-/react-select-search-4.1.7.tgz#5662729b9052282bde52e1352006d495d9c5ed6e"
integrity sha512-pU7ONAdK+bmz2tbhBWYQv9m5mnXOn8yImuiy+5UhimIG80d5iKv3nSYJIjJWjDbdrrdoXiCRwQm8xbA8llTjmQ==
react@^18.2.0: react@^18.2.0:
version "18.2.0" version "18.2.0"
resolved "https://registry.npmjs.org/react/-/react-18.2.0.tgz" resolved "https://registry.npmjs.org/react/-/react-18.2.0.tgz"