mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 04:39:06 +00:00
wrap JSONFormer around LLM
This commit is contained in:
@@ -10,7 +10,7 @@ from modal import Image, method, Stub, asgi_app, Secret
|
|||||||
|
|
||||||
# LLM
|
# LLM
|
||||||
LLM_MODEL: str = "lmsys/vicuna-13b-v1.5"
|
LLM_MODEL: str = "lmsys/vicuna-13b-v1.5"
|
||||||
LLM_LOW_CPU_MEM_USAGE: bool = False
|
LLM_LOW_CPU_MEM_USAGE: bool = True
|
||||||
LLM_TORCH_DTYPE: str = "bfloat16"
|
LLM_TORCH_DTYPE: str = "bfloat16"
|
||||||
LLM_MAX_NEW_TOKENS: int = 300
|
LLM_MAX_NEW_TOKENS: int = 300
|
||||||
|
|
||||||
@@ -49,6 +49,8 @@ llm_image = (
|
|||||||
"torch",
|
"torch",
|
||||||
"sentencepiece",
|
"sentencepiece",
|
||||||
"protobuf",
|
"protobuf",
|
||||||
|
"jsonformer==0.12.0",
|
||||||
|
"accelerate==0.21.0",
|
||||||
"einops==0.6.1",
|
"einops==0.6.1",
|
||||||
"hf-transfer~=0.1",
|
"hf-transfer~=0.1",
|
||||||
"huggingface_hub==0.16.4",
|
"huggingface_hub==0.16.4",
|
||||||
@@ -81,6 +83,7 @@ class LLM:
|
|||||||
|
|
||||||
# generation configuration
|
# generation configuration
|
||||||
print("Instance llm generation config")
|
print("Instance llm generation config")
|
||||||
|
# JSONFormer doesn't yet support generation configs, but keeping for future usage
|
||||||
model.config.max_new_tokens = LLM_MAX_NEW_TOKENS
|
model.config.max_new_tokens = LLM_MAX_NEW_TOKENS
|
||||||
gen_cfg = GenerationConfig.from_model_config(model.config)
|
gen_cfg = GenerationConfig.from_model_config(model.config)
|
||||||
gen_cfg.max_new_tokens = LLM_MAX_NEW_TOKENS
|
gen_cfg.max_new_tokens = LLM_MAX_NEW_TOKENS
|
||||||
@@ -97,6 +100,13 @@ class LLM:
|
|||||||
self.model = model
|
self.model = model
|
||||||
self.tokenizer = tokenizer
|
self.tokenizer = tokenizer
|
||||||
self.gen_cfg = gen_cfg
|
self.gen_cfg = gen_cfg
|
||||||
|
self.json_schema = {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"title": {"type": "string"},
|
||||||
|
"summary": {"type": "string"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
print("Exit llm")
|
print("Exit llm")
|
||||||
@@ -109,16 +119,17 @@ class LLM:
|
|||||||
@method()
|
@method()
|
||||||
def generate(self, prompt: str):
|
def generate(self, prompt: str):
|
||||||
print(f"Generate {prompt=}")
|
print(f"Generate {prompt=}")
|
||||||
# tokenize prompt
|
import jsonformer
|
||||||
input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(
|
import json
|
||||||
self.model.device
|
|
||||||
)
|
|
||||||
output = self.model.generate(input_ids, generation_config=self.gen_cfg)
|
|
||||||
|
|
||||||
# decode output
|
jsonformer_llm = jsonformer.Jsonformer(model=self.model,
|
||||||
response = self.tokenizer.decode(output[0].cpu(), skip_special_tokens=True)
|
tokenizer=self.tokenizer,
|
||||||
|
json_schema=self.json_schema,
|
||||||
|
prompt=prompt,
|
||||||
|
max_string_token_length=self.gen_cfg.max_new_tokens)
|
||||||
|
response = jsonformer_llm()
|
||||||
print(f"Generated {response=}")
|
print(f"Generated {response=}")
|
||||||
return {"text": response}
|
return {"text": json.dumps(response)}
|
||||||
|
|
||||||
|
|
||||||
# -------------------------------------------------------------------
|
# -------------------------------------------------------------------
|
||||||
|
|||||||
Reference in New Issue
Block a user