wrap JSONFormer around LLM

This commit is contained in:
Gokul Mohanarangan
2023-08-16 14:03:25 +05:30
parent 2f0e9a51f7
commit 0cdd7037fb

View File

@@ -10,7 +10,7 @@ from modal import Image, method, Stub, asgi_app, Secret
# LLM # LLM
LLM_MODEL: str = "lmsys/vicuna-13b-v1.5" LLM_MODEL: str = "lmsys/vicuna-13b-v1.5"
LLM_LOW_CPU_MEM_USAGE: bool = False LLM_LOW_CPU_MEM_USAGE: bool = True
LLM_TORCH_DTYPE: str = "bfloat16" LLM_TORCH_DTYPE: str = "bfloat16"
LLM_MAX_NEW_TOKENS: int = 300 LLM_MAX_NEW_TOKENS: int = 300
@@ -49,6 +49,8 @@ llm_image = (
"torch", "torch",
"sentencepiece", "sentencepiece",
"protobuf", "protobuf",
"jsonformer==0.12.0",
"accelerate==0.21.0",
"einops==0.6.1", "einops==0.6.1",
"hf-transfer~=0.1", "hf-transfer~=0.1",
"huggingface_hub==0.16.4", "huggingface_hub==0.16.4",
@@ -81,6 +83,7 @@ class LLM:
# generation configuration # generation configuration
print("Instance llm generation config") print("Instance llm generation config")
# JSONFormer doesn't yet support generation configs, but keeping for future usage
model.config.max_new_tokens = LLM_MAX_NEW_TOKENS model.config.max_new_tokens = LLM_MAX_NEW_TOKENS
gen_cfg = GenerationConfig.from_model_config(model.config) gen_cfg = GenerationConfig.from_model_config(model.config)
gen_cfg.max_new_tokens = LLM_MAX_NEW_TOKENS gen_cfg.max_new_tokens = LLM_MAX_NEW_TOKENS
@@ -97,6 +100,13 @@ class LLM:
self.model = model self.model = model
self.tokenizer = tokenizer self.tokenizer = tokenizer
self.gen_cfg = gen_cfg self.gen_cfg = gen_cfg
self.json_schema = {
"type": "object",
"properties": {
"title": {"type": "string"},
"summary": {"type": "string"},
},
}
def __exit__(self, *args): def __exit__(self, *args):
print("Exit llm") print("Exit llm")
@@ -109,16 +119,17 @@ class LLM:
@method() @method()
def generate(self, prompt: str): def generate(self, prompt: str):
print(f"Generate {prompt=}") print(f"Generate {prompt=}")
# tokenize prompt import jsonformer
input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to( import json
self.model.device
)
output = self.model.generate(input_ids, generation_config=self.gen_cfg)
# decode output jsonformer_llm = jsonformer.Jsonformer(model=self.model,
response = self.tokenizer.decode(output[0].cpu(), skip_special_tokens=True) tokenizer=self.tokenizer,
json_schema=self.json_schema,
prompt=prompt,
max_string_token_length=self.gen_cfg.max_new_tokens)
response = jsonformer_llm()
print(f"Generated {response=}") print(f"Generated {response=}")
return {"text": response} return {"text": json.dumps(response)}
# ------------------------------------------------------------------- # -------------------------------------------------------------------