tests: rework tests and fixes bugs along the way

This commit is contained in:
Mathieu Virbel
2023-08-01 16:05:48 +02:00
parent bc55cfdea3
commit 1f8e4200fd
9 changed files with 126 additions and 54 deletions

View File

@@ -1,4 +1,3 @@
from pathlib import Path
import av
from reflector.logger import logger
from reflector.processors import (
@@ -8,11 +7,48 @@ from reflector.processors import (
AudioTranscriptAutoProcessor,
TranscriptLinerProcessor,
TranscriptTopicDetectorProcessor,
TranscriptSummarizerProcessor,
# TranscriptSummarizerProcessor,
)
import asyncio
async def process_audio_file(filename, event_callback):
async def on_transcript(data):
await event_callback("transcript", data)
async def on_topic(data):
await event_callback("topic", data)
async def on_summary(data):
await event_callback("summary", data)
# transcription output
pipeline = Pipeline(
AudioChunkerProcessor(),
AudioMergeProcessor(),
AudioTranscriptAutoProcessor.as_threaded(),
TranscriptLinerProcessor(callback=on_transcript),
TranscriptTopicDetectorProcessor.as_threaded(callback=on_topic),
# TranscriptSummarizerProcessor.as_threaded(
# callback=on_summary
# ),
)
pipeline.describe()
# start processing audio
logger.info(f"Opening {filename}")
container = av.open(filename)
try:
logger.info("Start pushing audio into the pipeline")
for frame in container.decode(audio=0):
await pipeline.push(frame)
finally:
logger.info("Flushing the pipeline")
await pipeline.flush()
logger.info("All done !")
if __name__ == "__main__":
import argparse
@@ -20,42 +56,12 @@ if __name__ == "__main__":
parser.add_argument("source", help="Source file (mp3, wav, mp4...)")
args = parser.parse_args()
async def main():
async def on_transcript(transcript):
print(f"Transcript: [{transcript.human_timestamp}]: {transcript.text}")
async def event_callback(event, data):
if event == "transcript":
print(f"Transcript[{data.human_timestamp}]: {data.text}")
elif event == "topic":
print(f"Topic: {data}")
elif event == "summary":
print(f"Summary: {data}")
async def on_summary(summary):
print(f"Summary: {summary.title} - {summary.summary}")
async def on_final_summary(path):
print(f"Final Summary: {path}")
# transcription output
result_fn = Path(args.source).with_suffix(".jsonl")
pipeline = Pipeline(
AudioChunkerProcessor(),
AudioMergeProcessor(),
AudioTranscriptAutoProcessor.as_threaded(),
TranscriptLinerProcessor(callback=on_transcript),
TranscriptTopicDetectorProcessor.as_threaded(callback=on_summary),
TranscriptSummarizerProcessor.as_threaded(
filename=result_fn, callback=on_final_summary
),
)
pipeline.describe()
# start processing audio
logger.info(f"Opening {args.source}")
container = av.open(args.source)
try:
logger.info("Start pushing audio into the pipeline")
for frame in container.decode(audio=0):
await pipeline.push(frame)
finally:
logger.info("Flushing the pipeline")
await pipeline.flush()
logger.info("All done !")
asyncio.run(main())
asyncio.run(process_audio_file(args.source, event_callback))