import math import pytest from reflector.utils.transcript_constants import ( compute_max_subjects, compute_topic_chunk_size, ) @pytest.mark.parametrize( "duration_min,total_words,expected_topics_range", [ (5, 750, (1, 3)), (10, 1500, (3, 6)), (30, 4500, (8, 14)), (60, 9000, (14, 22)), (120, 18000, (24, 35)), (180, 27000, (30, 42)), ], ) def test_topic_count_in_expected_range( duration_min, total_words, expected_topics_range ): chunk_size = compute_topic_chunk_size(duration_min * 60, total_words) num_topics = math.ceil(total_words / chunk_size) assert expected_topics_range[0] <= num_topics <= expected_topics_range[1], ( f"For {duration_min}min/{total_words}words: got {num_topics} topics " f"(chunk_size={chunk_size}), expected {expected_topics_range[0]}-{expected_topics_range[1]}" ) def test_chunk_size_within_bounds(): for duration_min in [5, 10, 30, 60, 120, 180]: chunk_size = compute_topic_chunk_size(duration_min * 60, duration_min * 150) assert ( 375 <= chunk_size <= 1500 ), f"For {duration_min}min: chunk_size={chunk_size} out of bounds [375, 1500]" def test_zero_duration_falls_back(): assert compute_topic_chunk_size(0, 1000) == 375 def test_zero_words_falls_back(): assert compute_topic_chunk_size(600, 0) == 375 def test_negative_inputs_fall_back(): assert compute_topic_chunk_size(-10, 1000) == 375 assert compute_topic_chunk_size(600, -5) == 375 def test_very_short_transcript(): """A 1-minute call with very few words should still produce at least 1 topic.""" chunk_size = compute_topic_chunk_size(60, 100) # chunk_size is at least 375, so 100 words = 1 chunk assert chunk_size >= 375 def test_very_long_transcript(): """A 4-hour call should cap at max topics.""" chunk_size = compute_topic_chunk_size(4 * 3600, 36000) num_topics = math.ceil(36000 / chunk_size) assert num_topics <= 50 # --- compute_max_subjects tests --- @pytest.mark.parametrize( "duration_seconds,expected_max", [ (0, 1), # zero/invalid → 1 (-10, 1), # negative → 1 (60, 1), # 1 min → 1 (120, 1), # 2 min → 1 (300, 1), # 5 min (boundary) → 1 (301, 2), # just over 5 min → 2 (900, 2), # 15 min (boundary) → 2 (901, 3), # just over 15 min → 3 (1800, 3), # 30 min (boundary) → 3 (1801, 4), # just over 30 min → 4 (2700, 4), # 45 min (boundary) → 4 (2701, 5), # just over 45 min → 5 (3600, 5), # 60 min (boundary) → 5 (3601, 6), # just over 60 min → 6 (7200, 6), # 2 hours → 6 (14400, 6), # 4 hours → 6 ], ) def test_max_subjects_scales_with_duration(duration_seconds, expected_max): assert compute_max_subjects(duration_seconds) == expected_max def test_max_subjects_never_exceeds_cap(): """Even very long recordings should cap at 6 subjects.""" for hours in range(1, 10): assert compute_max_subjects(hours * 3600) <= 6