style: use ruff for linting and formatting (#524)

This commit is contained in:
2025-07-31 17:57:43 -06:00
committed by GitHub
parent ad56165b54
commit f5b82d44e3
88 changed files with 263 additions and 197 deletions

View File

@@ -1,6 +1,7 @@
import asyncio
import av
from reflector.logger import logger
from reflector.processors import (
AudioChunkerProcessor,

View File

@@ -9,17 +9,18 @@ This tool processes audio files locally without requiring the full server infras
import asyncio
import tempfile
import uuid
from pathlib import Path
from typing import List
import uuid
import av
from reflector.logger import logger
from reflector.processors import (
AudioChunkerProcessor,
AudioFileWriterProcessor,
AudioMergeProcessor,
AudioTranscriptAutoProcessor,
AudioFileWriterProcessor,
Pipeline,
PipelineEvent,
TranscriptFinalSummaryProcessor,
@@ -155,9 +156,10 @@ async def process_audio_file_with_diarization(
# For Modal backend, we need to upload the file to S3 first
if diarization_backend == "modal":
from datetime import datetime
from reflector.storage import get_transcripts_storage
from reflector.utils.s3_temp_file import S3TemporaryFile
from datetime import datetime
storage = get_transcripts_storage()

View File

@@ -8,7 +8,6 @@ This script helps test the diarization functionality with sample audio files.
"""
import asyncio
import json
import sys
from pathlib import Path
@@ -17,23 +16,20 @@ from reflector.logger import logger
async def test_diarization(audio_file: str):
"""Test the diarization functionality"""
# Import the processing function
from process_with_diarization import process_audio_file_with_diarization
# Collect events
events = []
async def event_callback(event):
events.append({
"processor": event.processor,
"data": event.data
})
events.append({"processor": event.processor, "data": event.data})
logger.info(f"Event from {event.processor}")
# Process the audio file
logger.info(f"Processing audio file: {audio_file}")
try:
await process_audio_file_with_diarization(
audio_file,
@@ -44,10 +40,10 @@ async def test_diarization(audio_file: str):
enable_diarization=True,
diarization_backend="modal",
)
# Analyze results
logger.info(f"Processing complete. Received {len(events)} events")
# Look for diarization results
diarized_topics = []
for event in events:
@@ -56,13 +52,17 @@ async def test_diarization(audio_file: str):
if hasattr(event["data"], "transcript") and event["data"].transcript:
words = event["data"].transcript.words
if words and hasattr(words[0], "speaker"):
speakers = set(w.speaker for w in words if hasattr(w, "speaker"))
logger.info(f"Found {len(speakers)} speakers in topic: {event['data'].title}")
speakers = set(
w.speaker for w in words if hasattr(w, "speaker")
)
logger.info(
f"Found {len(speakers)} speakers in topic: {event['data'].title}"
)
diarized_topics.append(event["data"])
if diarized_topics:
logger.info(f"Successfully diarized {len(diarized_topics)} topics")
# Print sample output
sample_topic = diarized_topics[0]
logger.info("Sample diarized output:")
@@ -70,9 +70,9 @@ async def test_diarization(audio_file: str):
logger.info(f" Word {i}: '{word.text}' - Speaker {word.speaker}")
else:
logger.warning("No diarization results found in output")
return events
except Exception as e:
logger.error(f"Error during processing: {e}")
raise
@@ -82,15 +82,15 @@ def main():
if len(sys.argv) < 2:
print("Usage: python test_diarization.py <audio_file>")
sys.exit(1)
audio_file = sys.argv[1]
if not Path(audio_file).exists():
print(f"Error: Audio file '{audio_file}' not found")
sys.exit(1)
# Run the test
asyncio.run(test_diarization(audio_file))
if __name__ == "__main__":
main()
main()