mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
Upgrade modal apps
This commit is contained in:
@@ -72,7 +72,7 @@ diarizer_image = (
|
|||||||
@app.cls(
|
@app.cls(
|
||||||
gpu=modal.gpu.A100(size="40GB"),
|
gpu=modal.gpu.A100(size="40GB"),
|
||||||
timeout=60 * 30,
|
timeout=60 * 30,
|
||||||
container_idle_timeout=60,
|
scaledown_window=60,
|
||||||
allow_concurrent_inputs=1,
|
allow_concurrent_inputs=1,
|
||||||
image=diarizer_image,
|
image=diarizer_image,
|
||||||
)
|
)
|
||||||
@@ -126,7 +126,7 @@ class Diarizer:
|
|||||||
|
|
||||||
@app.function(
|
@app.function(
|
||||||
timeout=60 * 10,
|
timeout=60 * 10,
|
||||||
container_idle_timeout=60 * 3,
|
scaledown_window=60 * 3,
|
||||||
allow_concurrent_inputs=40,
|
allow_concurrent_inputs=40,
|
||||||
secrets=[
|
secrets=[
|
||||||
Secret.from_name("reflector-gpu"),
|
Secret.from_name("reflector-gpu"),
|
||||||
|
|||||||
@@ -3,13 +3,14 @@ Reflector GPU backend - LLM
|
|||||||
===========================
|
===========================
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import threading
|
import threading
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import modal
|
import modal
|
||||||
from modal import Image, Secret, App, asgi_app, method, enter, exit
|
from modal import App, Image, Secret, asgi_app, enter, exit, method
|
||||||
|
|
||||||
# LLM
|
# LLM
|
||||||
LLM_MODEL: str = "lmsys/vicuna-13b-v1.5"
|
LLM_MODEL: str = "lmsys/vicuna-13b-v1.5"
|
||||||
@@ -56,7 +57,7 @@ llm_image = (
|
|||||||
"accelerate==0.21.0",
|
"accelerate==0.21.0",
|
||||||
"einops==0.6.1",
|
"einops==0.6.1",
|
||||||
"hf-transfer~=0.1",
|
"hf-transfer~=0.1",
|
||||||
"huggingface_hub==0.16.4"
|
"huggingface_hub==0.16.4",
|
||||||
)
|
)
|
||||||
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
|
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
|
||||||
.run_function(download_llm)
|
.run_function(download_llm)
|
||||||
@@ -67,7 +68,7 @@ llm_image = (
|
|||||||
@app.cls(
|
@app.cls(
|
||||||
gpu="A100",
|
gpu="A100",
|
||||||
timeout=60 * 5,
|
timeout=60 * 5,
|
||||||
container_idle_timeout=60 * 5,
|
scaledown_window=60 * 5,
|
||||||
allow_concurrent_inputs=15,
|
allow_concurrent_inputs=15,
|
||||||
image=llm_image,
|
image=llm_image,
|
||||||
)
|
)
|
||||||
@@ -83,7 +84,7 @@ class LLM:
|
|||||||
torch_dtype=getattr(torch, LLM_TORCH_DTYPE),
|
torch_dtype=getattr(torch, LLM_TORCH_DTYPE),
|
||||||
low_cpu_mem_usage=LLM_LOW_CPU_MEM_USAGE,
|
low_cpu_mem_usage=LLM_LOW_CPU_MEM_USAGE,
|
||||||
cache_dir=IMAGE_MODEL_DIR,
|
cache_dir=IMAGE_MODEL_DIR,
|
||||||
local_files_only=True
|
local_files_only=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# JSONFormer doesn't yet support generation configs
|
# JSONFormer doesn't yet support generation configs
|
||||||
@@ -97,9 +98,7 @@ class LLM:
|
|||||||
# load tokenizer
|
# load tokenizer
|
||||||
print("Instance llm tokenizer")
|
print("Instance llm tokenizer")
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
LLM_MODEL,
|
LLM_MODEL, cache_dir=IMAGE_MODEL_DIR, local_files_only=True
|
||||||
cache_dir=IMAGE_MODEL_DIR,
|
|
||||||
local_files_only=True
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# move model to gpu
|
# move model to gpu
|
||||||
@@ -119,7 +118,9 @@ class LLM:
|
|||||||
print("Exit llm")
|
print("Exit llm")
|
||||||
|
|
||||||
@method()
|
@method()
|
||||||
def generate(self, prompt: str, gen_schema: str | None, gen_cfg: str | None) -> dict:
|
def generate(
|
||||||
|
self, prompt: str, gen_schema: str | None, gen_cfg: str | None
|
||||||
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
Perform a generation action using the LLM
|
Perform a generation action using the LLM
|
||||||
"""
|
"""
|
||||||
@@ -140,7 +141,7 @@ class LLM:
|
|||||||
tokenizer=self.tokenizer,
|
tokenizer=self.tokenizer,
|
||||||
json_schema=json.loads(gen_schema),
|
json_schema=json.loads(gen_schema),
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
max_string_token_length=gen_cfg.max_new_tokens
|
max_string_token_length=gen_cfg.max_new_tokens,
|
||||||
)
|
)
|
||||||
response = jsonformer_llm()
|
response = jsonformer_llm()
|
||||||
else:
|
else:
|
||||||
@@ -153,18 +154,21 @@ class LLM:
|
|||||||
output = self.model.generate(input_ids, generation_config=gen_cfg)
|
output = self.model.generate(input_ids, generation_config=gen_cfg)
|
||||||
|
|
||||||
# decode output
|
# decode output
|
||||||
response = self.tokenizer.decode(output[0].cpu(), skip_special_tokens=True)
|
response = self.tokenizer.decode(
|
||||||
response = response[len(prompt):]
|
output[0].cpu(), skip_special_tokens=True
|
||||||
|
)
|
||||||
|
response = response[len(prompt) :]
|
||||||
print(f"Generated {response=}")
|
print(f"Generated {response=}")
|
||||||
return {"text": response}
|
return {"text": response}
|
||||||
|
|
||||||
|
|
||||||
# -------------------------------------------------------------------
|
# -------------------------------------------------------------------
|
||||||
# Web API
|
# Web API
|
||||||
# -------------------------------------------------------------------
|
# -------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@app.function(
|
@app.function(
|
||||||
container_idle_timeout=60 * 10,
|
scaledown_window=60 * 10,
|
||||||
timeout=60 * 5,
|
timeout=60 * 5,
|
||||||
allow_concurrent_inputs=45,
|
allow_concurrent_inputs=45,
|
||||||
secrets=[
|
secrets=[
|
||||||
@@ -201,7 +205,9 @@ def web():
|
|||||||
):
|
):
|
||||||
gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None
|
gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None
|
||||||
gen_cfg = json.dumps(req.gen_cfg) if req.gen_cfg else None
|
gen_cfg = json.dumps(req.gen_cfg) if req.gen_cfg else None
|
||||||
func = llmstub.generate.spawn(prompt=req.prompt, gen_schema=gen_schema, gen_cfg=gen_cfg)
|
func = llmstub.generate.spawn(
|
||||||
|
prompt=req.prompt, gen_schema=gen_schema, gen_cfg=gen_cfg
|
||||||
|
)
|
||||||
result = func.get()
|
result = func.get()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|||||||
@@ -3,13 +3,14 @@ Reflector GPU backend - LLM
|
|||||||
===========================
|
===========================
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import threading
|
import threading
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import modal
|
import modal
|
||||||
from modal import Image, Secret, App, asgi_app, method, enter, exit
|
from modal import App, Image, Secret, asgi_app, enter, exit, method
|
||||||
|
|
||||||
# LLM
|
# LLM
|
||||||
LLM_MODEL: str = "HuggingFaceH4/zephyr-7b-alpha"
|
LLM_MODEL: str = "HuggingFaceH4/zephyr-7b-alpha"
|
||||||
@@ -56,7 +57,7 @@ llm_image = (
|
|||||||
"accelerate==0.21.0",
|
"accelerate==0.21.0",
|
||||||
"einops==0.6.1",
|
"einops==0.6.1",
|
||||||
"hf-transfer~=0.1",
|
"hf-transfer~=0.1",
|
||||||
"huggingface_hub==0.16.4"
|
"huggingface_hub==0.16.4",
|
||||||
)
|
)
|
||||||
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
|
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
|
||||||
.run_function(download_llm)
|
.run_function(download_llm)
|
||||||
@@ -67,7 +68,7 @@ llm_image = (
|
|||||||
@app.cls(
|
@app.cls(
|
||||||
gpu="A10G",
|
gpu="A10G",
|
||||||
timeout=60 * 5,
|
timeout=60 * 5,
|
||||||
container_idle_timeout=60 * 5,
|
scaledown_window=60 * 5,
|
||||||
allow_concurrent_inputs=10,
|
allow_concurrent_inputs=10,
|
||||||
image=llm_image,
|
image=llm_image,
|
||||||
)
|
)
|
||||||
@@ -83,7 +84,7 @@ class LLM:
|
|||||||
torch_dtype=getattr(torch, LLM_TORCH_DTYPE),
|
torch_dtype=getattr(torch, LLM_TORCH_DTYPE),
|
||||||
low_cpu_mem_usage=LLM_LOW_CPU_MEM_USAGE,
|
low_cpu_mem_usage=LLM_LOW_CPU_MEM_USAGE,
|
||||||
cache_dir=IMAGE_MODEL_DIR,
|
cache_dir=IMAGE_MODEL_DIR,
|
||||||
local_files_only=True
|
local_files_only=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# JSONFormer doesn't yet support generation configs
|
# JSONFormer doesn't yet support generation configs
|
||||||
@@ -97,9 +98,7 @@ class LLM:
|
|||||||
# load tokenizer
|
# load tokenizer
|
||||||
print("Instance llm tokenizer")
|
print("Instance llm tokenizer")
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
LLM_MODEL,
|
LLM_MODEL, cache_dir=IMAGE_MODEL_DIR, local_files_only=True
|
||||||
cache_dir=IMAGE_MODEL_DIR,
|
|
||||||
local_files_only=True
|
|
||||||
)
|
)
|
||||||
gen_cfg.pad_token_id = tokenizer.eos_token_id
|
gen_cfg.pad_token_id = tokenizer.eos_token_id
|
||||||
gen_cfg.eos_token_id = tokenizer.eos_token_id
|
gen_cfg.eos_token_id = tokenizer.eos_token_id
|
||||||
@@ -122,7 +121,9 @@ class LLM:
|
|||||||
print("Exit llm")
|
print("Exit llm")
|
||||||
|
|
||||||
@method()
|
@method()
|
||||||
def generate(self, prompt: str, gen_schema: str | None, gen_cfg: str | None) -> dict:
|
def generate(
|
||||||
|
self, prompt: str, gen_schema: str | None, gen_cfg: str | None
|
||||||
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
Perform a generation action using the LLM
|
Perform a generation action using the LLM
|
||||||
"""
|
"""
|
||||||
@@ -145,7 +146,7 @@ class LLM:
|
|||||||
tokenizer=self.tokenizer,
|
tokenizer=self.tokenizer,
|
||||||
json_schema=json.loads(gen_schema),
|
json_schema=json.loads(gen_schema),
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
max_string_token_length=gen_cfg.max_new_tokens
|
max_string_token_length=gen_cfg.max_new_tokens,
|
||||||
)
|
)
|
||||||
response = jsonformer_llm()
|
response = jsonformer_llm()
|
||||||
else:
|
else:
|
||||||
@@ -158,21 +159,22 @@ class LLM:
|
|||||||
output = self.model.generate(input_ids, generation_config=gen_cfg)
|
output = self.model.generate(input_ids, generation_config=gen_cfg)
|
||||||
|
|
||||||
# decode output
|
# decode output
|
||||||
response = self.tokenizer.decode(output[0].cpu(), skip_special_tokens=True)
|
response = self.tokenizer.decode(
|
||||||
response = response[len(prompt):]
|
output[0].cpu(), skip_special_tokens=True
|
||||||
response = {
|
)
|
||||||
"long_summary": response
|
response = response[len(prompt) :]
|
||||||
}
|
response = {"long_summary": response}
|
||||||
print(f"Generated {response=}")
|
print(f"Generated {response=}")
|
||||||
return {"text": response}
|
return {"text": response}
|
||||||
|
|
||||||
|
|
||||||
# -------------------------------------------------------------------
|
# -------------------------------------------------------------------
|
||||||
# Web API
|
# Web API
|
||||||
# -------------------------------------------------------------------
|
# -------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@app.function(
|
@app.function(
|
||||||
container_idle_timeout=60 * 10,
|
scaledown_window=60 * 10,
|
||||||
timeout=60 * 5,
|
timeout=60 * 5,
|
||||||
allow_concurrent_inputs=30,
|
allow_concurrent_inputs=30,
|
||||||
secrets=[
|
secrets=[
|
||||||
@@ -205,11 +207,13 @@ def web():
|
|||||||
|
|
||||||
@app.post("/llm", dependencies=[Depends(apikey_auth)])
|
@app.post("/llm", dependencies=[Depends(apikey_auth)])
|
||||||
def llm(
|
def llm(
|
||||||
req: LLMRequest,
|
req: LLMRequest,
|
||||||
):
|
):
|
||||||
gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None
|
gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None
|
||||||
gen_cfg = json.dumps(req.gen_cfg) if req.gen_cfg else None
|
gen_cfg = json.dumps(req.gen_cfg) if req.gen_cfg else None
|
||||||
func = llmstub.generate.spawn(prompt=req.prompt, gen_schema=gen_schema, gen_cfg=gen_cfg)
|
func = llmstub.generate.spawn(
|
||||||
|
prompt=req.prompt, gen_schema=gen_schema, gen_cfg=gen_cfg
|
||||||
|
)
|
||||||
result = func.get()
|
result = func.get()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ image = (
|
|||||||
@app.cls(
|
@app.cls(
|
||||||
gpu="A10G",
|
gpu="A10G",
|
||||||
timeout=5 * MINUTES,
|
timeout=5 * MINUTES,
|
||||||
container_idle_timeout=5 * MINUTES,
|
scaledown_window=5 * MINUTES,
|
||||||
allow_concurrent_inputs=6,
|
allow_concurrent_inputs=6,
|
||||||
image=image,
|
image=image,
|
||||||
volumes={MODELS_DIR: volume},
|
volumes={MODELS_DIR: volume},
|
||||||
@@ -107,7 +107,7 @@ class Transcriber:
|
|||||||
|
|
||||||
|
|
||||||
@app.function(
|
@app.function(
|
||||||
container_idle_timeout=60,
|
scaledown_window=60,
|
||||||
timeout=60,
|
timeout=60,
|
||||||
allow_concurrent_inputs=40,
|
allow_concurrent_inputs=40,
|
||||||
secrets=[
|
secrets=[
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ Reflector GPU backend - transcriber
|
|||||||
import os
|
import os
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
from modal import Image, Secret, App, asgi_app, method, enter
|
from modal import App, Image, Secret, asgi_app, enter, method
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
# Seamless M4T
|
# Seamless M4T
|
||||||
@@ -137,7 +137,7 @@ transcriber_image = (
|
|||||||
@app.cls(
|
@app.cls(
|
||||||
gpu="A10G",
|
gpu="A10G",
|
||||||
timeout=60 * 5,
|
timeout=60 * 5,
|
||||||
container_idle_timeout=60 * 5,
|
scaledown_window=60 * 5,
|
||||||
allow_concurrent_inputs=4,
|
allow_concurrent_inputs=4,
|
||||||
image=transcriber_image,
|
image=transcriber_image,
|
||||||
)
|
)
|
||||||
@@ -169,195 +169,195 @@ class Translator:
|
|||||||
# TODO: Enhance with complete list of lang codes
|
# TODO: Enhance with complete list of lang codes
|
||||||
seamless_lang_code = {
|
seamless_lang_code = {
|
||||||
# Afrikaans
|
# Afrikaans
|
||||||
'af': 'afr',
|
"af": "afr",
|
||||||
# Amharic
|
# Amharic
|
||||||
'am': 'amh',
|
"am": "amh",
|
||||||
# Modern Standard Arabic
|
# Modern Standard Arabic
|
||||||
'ar': 'arb',
|
"ar": "arb",
|
||||||
# Moroccan Arabic
|
# Moroccan Arabic
|
||||||
'ary': 'ary',
|
"ary": "ary",
|
||||||
# Egyptian Arabic
|
# Egyptian Arabic
|
||||||
'arz': 'arz',
|
"arz": "arz",
|
||||||
# Assamese
|
# Assamese
|
||||||
'as': 'asm',
|
"as": "asm",
|
||||||
# North Azerbaijani
|
# North Azerbaijani
|
||||||
'az': 'azj',
|
"az": "azj",
|
||||||
# Belarusian
|
# Belarusian
|
||||||
'be': 'bel',
|
"be": "bel",
|
||||||
# Bengali
|
# Bengali
|
||||||
'bn': 'ben',
|
"bn": "ben",
|
||||||
# Bosnian
|
# Bosnian
|
||||||
'bs': 'bos',
|
"bs": "bos",
|
||||||
# Bulgarian
|
# Bulgarian
|
||||||
'bg': 'bul',
|
"bg": "bul",
|
||||||
# Catalan
|
# Catalan
|
||||||
'ca': 'cat',
|
"ca": "cat",
|
||||||
# Cebuano
|
# Cebuano
|
||||||
'ceb': 'ceb',
|
"ceb": "ceb",
|
||||||
# Czech
|
# Czech
|
||||||
'cs': 'ces',
|
"cs": "ces",
|
||||||
# Central Kurdish
|
# Central Kurdish
|
||||||
'ku': 'ckb',
|
"ku": "ckb",
|
||||||
# Mandarin Chinese
|
# Mandarin Chinese
|
||||||
'cmn': 'cmn_Hant',
|
"cmn": "cmn_Hant",
|
||||||
# Welsh
|
# Welsh
|
||||||
'cy': 'cym',
|
"cy": "cym",
|
||||||
# Danish
|
# Danish
|
||||||
'da': 'dan',
|
"da": "dan",
|
||||||
# German
|
# German
|
||||||
'de': 'deu',
|
"de": "deu",
|
||||||
# Greek
|
# Greek
|
||||||
'el': 'ell',
|
"el": "ell",
|
||||||
# English
|
# English
|
||||||
'en': 'eng',
|
"en": "eng",
|
||||||
# Estonian
|
# Estonian
|
||||||
'et': 'est',
|
"et": "est",
|
||||||
# Basque
|
# Basque
|
||||||
'eu': 'eus',
|
"eu": "eus",
|
||||||
# Finnish
|
# Finnish
|
||||||
'fi': 'fin',
|
"fi": "fin",
|
||||||
# French
|
# French
|
||||||
'fr': 'fra',
|
"fr": "fra",
|
||||||
# Irish
|
# Irish
|
||||||
'ga': 'gle',
|
"ga": "gle",
|
||||||
# West Central Oromo,
|
# West Central Oromo,
|
||||||
'gaz': 'gaz',
|
"gaz": "gaz",
|
||||||
# Galician
|
# Galician
|
||||||
'gl': 'glg',
|
"gl": "glg",
|
||||||
# Gujarati
|
# Gujarati
|
||||||
'gu': 'guj',
|
"gu": "guj",
|
||||||
# Hebrew
|
# Hebrew
|
||||||
'he': 'heb',
|
"he": "heb",
|
||||||
# Hindi
|
# Hindi
|
||||||
'hi': 'hin',
|
"hi": "hin",
|
||||||
# Croatian
|
# Croatian
|
||||||
'hr': 'hrv',
|
"hr": "hrv",
|
||||||
# Hungarian
|
# Hungarian
|
||||||
'hu': 'hun',
|
"hu": "hun",
|
||||||
# Armenian
|
# Armenian
|
||||||
'hy': 'hye',
|
"hy": "hye",
|
||||||
# Igbo
|
# Igbo
|
||||||
'ig': 'ibo',
|
"ig": "ibo",
|
||||||
# Indonesian
|
# Indonesian
|
||||||
'id': 'ind',
|
"id": "ind",
|
||||||
# Icelandic
|
# Icelandic
|
||||||
'is': 'isl',
|
"is": "isl",
|
||||||
# Italian
|
# Italian
|
||||||
'it': 'ita',
|
"it": "ita",
|
||||||
# Javanese
|
# Javanese
|
||||||
'jv': 'jav',
|
"jv": "jav",
|
||||||
# Japanese
|
# Japanese
|
||||||
'ja': 'jpn',
|
"ja": "jpn",
|
||||||
# Kannada
|
# Kannada
|
||||||
'kn': 'kan',
|
"kn": "kan",
|
||||||
# Georgian
|
# Georgian
|
||||||
'ka': 'kat',
|
"ka": "kat",
|
||||||
# Kazakh
|
# Kazakh
|
||||||
'kk': 'kaz',
|
"kk": "kaz",
|
||||||
# Halh Mongolian
|
# Halh Mongolian
|
||||||
'khk': 'khk',
|
"khk": "khk",
|
||||||
# Khmer
|
# Khmer
|
||||||
'km': 'khm',
|
"km": "khm",
|
||||||
# Kyrgyz
|
# Kyrgyz
|
||||||
'ky': 'kir',
|
"ky": "kir",
|
||||||
# Korean
|
# Korean
|
||||||
'ko': 'kor',
|
"ko": "kor",
|
||||||
# Lao
|
# Lao
|
||||||
'lo': 'lao',
|
"lo": "lao",
|
||||||
# Lithuanian
|
# Lithuanian
|
||||||
'lt': 'lit',
|
"lt": "lit",
|
||||||
# Ganda
|
# Ganda
|
||||||
'lg': 'lug',
|
"lg": "lug",
|
||||||
# Luo
|
# Luo
|
||||||
'luo': 'luo',
|
"luo": "luo",
|
||||||
# Standard Latvian
|
# Standard Latvian
|
||||||
'lv': 'lvs',
|
"lv": "lvs",
|
||||||
# Maithili
|
# Maithili
|
||||||
'mai': 'mai',
|
"mai": "mai",
|
||||||
# Malayalam
|
# Malayalam
|
||||||
'ml': 'mal',
|
"ml": "mal",
|
||||||
# Marathi
|
# Marathi
|
||||||
'mr': 'mar',
|
"mr": "mar",
|
||||||
# Macedonian
|
# Macedonian
|
||||||
'mk': 'mkd',
|
"mk": "mkd",
|
||||||
# Maltese
|
# Maltese
|
||||||
'mt': 'mlt',
|
"mt": "mlt",
|
||||||
# Meitei
|
# Meitei
|
||||||
'mni': 'mni',
|
"mni": "mni",
|
||||||
# Burmese
|
# Burmese
|
||||||
'my': 'mya',
|
"my": "mya",
|
||||||
# Dutch
|
# Dutch
|
||||||
'nl': 'nld',
|
"nl": "nld",
|
||||||
# Norwegian Nynorsk
|
# Norwegian Nynorsk
|
||||||
'nn': 'nno',
|
"nn": "nno",
|
||||||
# Norwegian Bokmål
|
# Norwegian Bokmål
|
||||||
'nb': 'nob',
|
"nb": "nob",
|
||||||
# Nepali
|
# Nepali
|
||||||
'ne': 'npi',
|
"ne": "npi",
|
||||||
# Nyanja
|
# Nyanja
|
||||||
'ny': 'nya',
|
"ny": "nya",
|
||||||
# Odia
|
# Odia
|
||||||
'or': 'ory',
|
"or": "ory",
|
||||||
# Punjabi
|
# Punjabi
|
||||||
'pa': 'pan',
|
"pa": "pan",
|
||||||
# Southern Pashto
|
# Southern Pashto
|
||||||
'pbt': 'pbt',
|
"pbt": "pbt",
|
||||||
# Western Persian
|
# Western Persian
|
||||||
'pes': 'pes',
|
"pes": "pes",
|
||||||
# Polish
|
# Polish
|
||||||
'pl': 'pol',
|
"pl": "pol",
|
||||||
# Portuguese
|
# Portuguese
|
||||||
'pt': 'por',
|
"pt": "por",
|
||||||
# Romanian
|
# Romanian
|
||||||
'ro': 'ron',
|
"ro": "ron",
|
||||||
# Russian
|
# Russian
|
||||||
'ru': 'rus',
|
"ru": "rus",
|
||||||
# Slovak
|
# Slovak
|
||||||
'sk': 'slk',
|
"sk": "slk",
|
||||||
# Slovenian
|
# Slovenian
|
||||||
'sl': 'slv',
|
"sl": "slv",
|
||||||
# Shona
|
# Shona
|
||||||
'sn': 'sna',
|
"sn": "sna",
|
||||||
# Sindhi
|
# Sindhi
|
||||||
'sd': 'snd',
|
"sd": "snd",
|
||||||
# Somali
|
# Somali
|
||||||
'so': 'som',
|
"so": "som",
|
||||||
# Spanish
|
# Spanish
|
||||||
'es': 'spa',
|
"es": "spa",
|
||||||
# Serbian
|
# Serbian
|
||||||
'sr': 'srp',
|
"sr": "srp",
|
||||||
# Swedish
|
# Swedish
|
||||||
'sv': 'swe',
|
"sv": "swe",
|
||||||
# Swahili
|
# Swahili
|
||||||
'sw': 'swh',
|
"sw": "swh",
|
||||||
# Tamil
|
# Tamil
|
||||||
'ta': 'tam',
|
"ta": "tam",
|
||||||
# Telugu
|
# Telugu
|
||||||
'te': 'tel',
|
"te": "tel",
|
||||||
# Tajik
|
# Tajik
|
||||||
'tg': 'tgk',
|
"tg": "tgk",
|
||||||
# Tagalog
|
# Tagalog
|
||||||
'tl': 'tgl',
|
"tl": "tgl",
|
||||||
# Thai
|
# Thai
|
||||||
'th': 'tha',
|
"th": "tha",
|
||||||
# Turkish
|
# Turkish
|
||||||
'tr': 'tur',
|
"tr": "tur",
|
||||||
# Ukrainian
|
# Ukrainian
|
||||||
'uk': 'ukr',
|
"uk": "ukr",
|
||||||
# Urdu
|
# Urdu
|
||||||
'ur': 'urd',
|
"ur": "urd",
|
||||||
# Northern Uzbek
|
# Northern Uzbek
|
||||||
'uz': 'uzn',
|
"uz": "uzn",
|
||||||
# Vietnamese
|
# Vietnamese
|
||||||
'vi': 'vie',
|
"vi": "vie",
|
||||||
# Yoruba
|
# Yoruba
|
||||||
'yo': 'yor',
|
"yo": "yor",
|
||||||
# Cantonese
|
# Cantonese
|
||||||
'yue': 'yue',
|
"yue": "yue",
|
||||||
# Standard Malay
|
# Standard Malay
|
||||||
'ms': 'zsm',
|
"ms": "zsm",
|
||||||
# Zulu
|
# Zulu
|
||||||
'zu': 'zul'
|
"zu": "zul",
|
||||||
}
|
}
|
||||||
return seamless_lang_code.get(lang_code, "eng")
|
return seamless_lang_code.get(lang_code, "eng")
|
||||||
|
|
||||||
@@ -381,7 +381,7 @@ class Translator:
|
|||||||
|
|
||||||
|
|
||||||
@app.function(
|
@app.function(
|
||||||
container_idle_timeout=60,
|
scaledown_window=60,
|
||||||
timeout=60,
|
timeout=60,
|
||||||
allow_concurrent_inputs=40,
|
allow_concurrent_inputs=40,
|
||||||
secrets=[
|
secrets=[
|
||||||
@@ -413,9 +413,9 @@ def web():
|
|||||||
|
|
||||||
@app.post("/translate", dependencies=[Depends(apikey_auth)])
|
@app.post("/translate", dependencies=[Depends(apikey_auth)])
|
||||||
async def translate(
|
async def translate(
|
||||||
text: str,
|
text: str,
|
||||||
source_language: Annotated[str, Body(...)] = "en",
|
source_language: Annotated[str, Body(...)] = "en",
|
||||||
target_language: Annotated[str, Body(...)] = "fr",
|
target_language: Annotated[str, Body(...)] = "fr",
|
||||||
) -> TranslateResponse:
|
) -> TranslateResponse:
|
||||||
func = translatorstub.translate_text.spawn(
|
func = translatorstub.translate_text.spawn(
|
||||||
text=text,
|
text=text,
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ app = modal.App("reflector-vllm-hermes3")
|
|||||||
image=vllm_image,
|
image=vllm_image,
|
||||||
gpu=modal.gpu.A100(count=N_GPU, size="40GB"),
|
gpu=modal.gpu.A100(count=N_GPU, size="40GB"),
|
||||||
timeout=60 * 5,
|
timeout=60 * 5,
|
||||||
container_idle_timeout=60 * 5,
|
scaledown_window=60 * 5,
|
||||||
allow_concurrent_inputs=100,
|
allow_concurrent_inputs=100,
|
||||||
secrets=[
|
secrets=[
|
||||||
modal.Secret.from_name("reflector-gpu"),
|
modal.Secret.from_name("reflector-gpu"),
|
||||||
|
|||||||
Reference in New Issue
Block a user