server: fixes pipeline logger not transmitted to processors

Closes #110
This commit is contained in:
Mathieu Virbel
2023-08-04 12:02:18 +02:00
parent 6d2085ce61
commit dce92e0cf7
6 changed files with 32 additions and 9 deletions

View File

@@ -1,6 +1,6 @@
from reflector.logger import logger
from reflector.settings import settings from reflector.settings import settings
from reflector.utils.retry import retry from reflector.utils.retry import retry
from reflector.logger import logger as reflector_logger
import importlib import importlib
import json import json
import re import re
@@ -29,15 +29,18 @@ class LLM:
importlib.import_module(module_name) importlib.import_module(module_name)
return cls._registry[name]() return cls._registry[name]()
async def generate(self, prompt: str, **kwargs) -> dict: async def generate(self, prompt: str, logger: reflector_logger, **kwargs) -> dict:
logger.info("LLM generate", prompt=repr(prompt))
try: try:
result = await retry(self._generate)(prompt=prompt, **kwargs) result = await retry(self._generate)(prompt=prompt, **kwargs)
except Exception: except Exception:
logger.exception("Failed to call llm after retrying") logger.exception("Failed to call llm after retrying")
raise raise
logger.debug("LLM result [raw]", result=repr(result))
if isinstance(result, str): if isinstance(result, str):
result = self._parse_json(result) result = self._parse_json(result)
logger.debug("LLM result [parsed]", result=repr(result))
return result return result

View File

@@ -21,7 +21,6 @@ class OpenAILLM(LLM):
"Authorization": f"Bearer {self.openai_key}", "Authorization": f"Bearer {self.openai_key}",
} }
logger.debug(f"LLM openai prompt: {prompt}")
async with httpx.AsyncClient(timeout=self.timeout) as client: async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.post( response = await client.post(
@@ -36,7 +35,6 @@ class OpenAILLM(LLM):
) )
response.raise_for_status() response.raise_for_status()
result = response.json() result = response.json()
logger.info(f"LLM openai result: {result}")
return result["choices"][0]["text"] return result["choices"][0]["text"]

View File

@@ -17,7 +17,8 @@ class Processor:
self.logger = (custom_logger or logger).bind(processor=self.__class__.__name__) self.logger = (custom_logger or logger).bind(processor=self.__class__.__name__)
def set_pipeline(self, pipeline: "Pipeline"): def set_pipeline(self, pipeline: "Pipeline"):
self.logger = self.logger.bind(pipeline=pipeline.uid) # if pipeline is used, pipeline logger will be used instead
self.logger = pipeline.logger.bind(processor=self.__class__.__name__)
def connect(self, processor: "Processor"): def connect(self, processor: "Processor"):
""" """
@@ -111,6 +112,10 @@ class ThreadedProcessor(Processor):
self.queue = asyncio.Queue() self.queue = asyncio.Queue()
self.task = asyncio.get_running_loop().create_task(self.loop()) self.task = asyncio.get_running_loop().create_task(self.loop())
def set_pipeline(self, pipeline: "Pipeline"):
super().set_pipeline(pipeline)
self.processor.set_pipeline(pipeline)
async def loop(self): async def loop(self):
while True: while True:
data = await self.queue.get() data = await self.queue.get()
@@ -153,6 +158,9 @@ class Pipeline(Processor):
def __init__(self, *processors: Processor): def __init__(self, *processors: Processor):
super().__init__() super().__init__()
self.logger = logger.bind(pipeline=self.uid)
self.logger.info("Pipeline created")
self.processors = processors self.processors = processors
for processor in processors: for processor in processors:
@@ -168,8 +176,10 @@ class Pipeline(Processor):
await self.processors[0].push(data) await self.processors[0].push(data)
async def _flush(self): async def _flush(self):
self.logger.debug("Pipeline flushing")
for processor in self.processors: for processor in self.processors:
await processor.flush() await processor.flush()
self.logger.info("Pipeline flushed")
def describe(self, level=0): def describe(self, level=0):
logger.info(" " * level + "Pipeline:") logger.info(" " * level + "Pipeline:")

View File

@@ -1,5 +1,6 @@
from reflector.processors.base import Processor from reflector.processors.base import Processor
from reflector.processors.types import Transcript, TitleSummary from reflector.processors.types import Transcript, TitleSummary
from reflector.utils.retry import retry
from reflector.llm import LLM from reflector.llm import LLM
@@ -42,8 +43,10 @@ class TranscriptTopicDetectorProcessor(Processor):
async def _flush(self): async def _flush(self):
if not self.transcript: if not self.transcript:
return return
prompt = self.PROMPT.format(input_text=self.transcript.text) text = self.transcript.text
result = await self.llm.generate(prompt=prompt) self.logger.info(f"Detect topic on {len(text)} length transcript")
prompt = self.PROMPT.format(input_text=text)
result = await retry(self.llm.generate)(prompt=prompt, logger=self.logger)
summary = TitleSummary( summary = TitleSummary(
title=result["title"], title=result["title"],
summary=result["summary"], summary=result["summary"],

View File

@@ -67,6 +67,13 @@ class TitleSummary:
duration: float duration: float
transcript: Transcript transcript: Transcript
@property
def human_timestamp(self):
minutes = int(self.timestamp / 60)
seconds = int(self.timestamp % 60)
milliseconds = int((self.timestamp % 1) * 1000)
return f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
@dataclass @dataclass
class FinalSummary: class FinalSummary:

View File

@@ -65,9 +65,11 @@ if __name__ == "__main__":
if event == "transcript": if event == "transcript":
print(f"Transcript[{data.human_timestamp}]: {data.text}") print(f"Transcript[{data.human_timestamp}]: {data.text}")
elif event == "topic": elif event == "topic":
print(f"Topic: {data}") print(f"Topic[{data.human_timestamp}]: title={data.title}")
print(f"Topic[{data.human_timestamp}]: summary={data.summary}")
elif event == "summary": elif event == "summary":
print(f"Summary: {data}") print(f"Summary: duration={data.duration}")
print(f"Summary: summary={data.summary}")
asyncio.run( asyncio.run(
process_audio_file( process_audio_file(