mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
tests: rework tests and fixes bugs along the way
This commit is contained in:
@@ -1,4 +1,3 @@
|
||||
from pathlib import Path
|
||||
import av
|
||||
from reflector.logger import logger
|
||||
from reflector.processors import (
|
||||
@@ -8,11 +7,48 @@ from reflector.processors import (
|
||||
AudioTranscriptAutoProcessor,
|
||||
TranscriptLinerProcessor,
|
||||
TranscriptTopicDetectorProcessor,
|
||||
TranscriptSummarizerProcessor,
|
||||
# TranscriptSummarizerProcessor,
|
||||
)
|
||||
import asyncio
|
||||
|
||||
|
||||
async def process_audio_file(filename, event_callback):
|
||||
async def on_transcript(data):
|
||||
await event_callback("transcript", data)
|
||||
|
||||
async def on_topic(data):
|
||||
await event_callback("topic", data)
|
||||
|
||||
async def on_summary(data):
|
||||
await event_callback("summary", data)
|
||||
|
||||
# transcription output
|
||||
pipeline = Pipeline(
|
||||
AudioChunkerProcessor(),
|
||||
AudioMergeProcessor(),
|
||||
AudioTranscriptAutoProcessor.as_threaded(),
|
||||
TranscriptLinerProcessor(callback=on_transcript),
|
||||
TranscriptTopicDetectorProcessor.as_threaded(callback=on_topic),
|
||||
# TranscriptSummarizerProcessor.as_threaded(
|
||||
# callback=on_summary
|
||||
# ),
|
||||
)
|
||||
pipeline.describe()
|
||||
|
||||
# start processing audio
|
||||
logger.info(f"Opening {filename}")
|
||||
container = av.open(filename)
|
||||
try:
|
||||
logger.info("Start pushing audio into the pipeline")
|
||||
for frame in container.decode(audio=0):
|
||||
await pipeline.push(frame)
|
||||
finally:
|
||||
logger.info("Flushing the pipeline")
|
||||
await pipeline.flush()
|
||||
|
||||
logger.info("All done !")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
@@ -20,42 +56,12 @@ if __name__ == "__main__":
|
||||
parser.add_argument("source", help="Source file (mp3, wav, mp4...)")
|
||||
args = parser.parse_args()
|
||||
|
||||
async def main():
|
||||
async def on_transcript(transcript):
|
||||
print(f"Transcript: [{transcript.human_timestamp}]: {transcript.text}")
|
||||
async def event_callback(event, data):
|
||||
if event == "transcript":
|
||||
print(f"Transcript[{data.human_timestamp}]: {data.text}")
|
||||
elif event == "topic":
|
||||
print(f"Topic: {data}")
|
||||
elif event == "summary":
|
||||
print(f"Summary: {data}")
|
||||
|
||||
async def on_summary(summary):
|
||||
print(f"Summary: {summary.title} - {summary.summary}")
|
||||
|
||||
async def on_final_summary(path):
|
||||
print(f"Final Summary: {path}")
|
||||
|
||||
# transcription output
|
||||
result_fn = Path(args.source).with_suffix(".jsonl")
|
||||
|
||||
pipeline = Pipeline(
|
||||
AudioChunkerProcessor(),
|
||||
AudioMergeProcessor(),
|
||||
AudioTranscriptAutoProcessor.as_threaded(),
|
||||
TranscriptLinerProcessor(callback=on_transcript),
|
||||
TranscriptTopicDetectorProcessor.as_threaded(callback=on_summary),
|
||||
TranscriptSummarizerProcessor.as_threaded(
|
||||
filename=result_fn, callback=on_final_summary
|
||||
),
|
||||
)
|
||||
pipeline.describe()
|
||||
|
||||
# start processing audio
|
||||
logger.info(f"Opening {args.source}")
|
||||
container = av.open(args.source)
|
||||
try:
|
||||
logger.info("Start pushing audio into the pipeline")
|
||||
for frame in container.decode(audio=0):
|
||||
await pipeline.push(frame)
|
||||
finally:
|
||||
logger.info("Flushing the pipeline")
|
||||
await pipeline.flush()
|
||||
|
||||
logger.info("All done !")
|
||||
|
||||
asyncio.run(main())
|
||||
asyncio.run(process_audio_file(args.source, event_callback))
|
||||
|
||||
Reference in New Issue
Block a user