mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 04:39:06 +00:00
server: add tests on segmentation and fix issue with speaker
This commit is contained in:
@@ -125,16 +125,30 @@ class Transcript(BaseModel):
|
|||||||
speaker=word.speaker,
|
speaker=word.speaker,
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# If the word is attach to another speaker, push the current segment
|
||||||
|
# and start a new one
|
||||||
|
if word.speaker != current_segment.speaker:
|
||||||
|
segments.append(current_segment)
|
||||||
|
current_segment = TranscriptSegment(
|
||||||
|
text=word.text,
|
||||||
|
start=word.start,
|
||||||
|
speaker=word.speaker,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# if the word is the end of a sentence, and we have enough content,
|
||||||
|
# add the word to the current segment and push it
|
||||||
current_segment.text += word.text
|
current_segment.text += word.text
|
||||||
|
|
||||||
have_punc = PUNC_RE.search(word.text)
|
have_punc = PUNC_RE.search(word.text)
|
||||||
if word.speaker != current_segment.speaker or (
|
if have_punc and (len(current_segment.text) > MAX_SEGMENT_LENGTH):
|
||||||
have_punc and (len(current_segment.text) > MAX_SEGMENT_LENGTH)
|
|
||||||
):
|
|
||||||
segments.append(current_segment)
|
segments.append(current_segment)
|
||||||
current_segment = None
|
current_segment = None
|
||||||
|
|
||||||
if current_segment:
|
if current_segment:
|
||||||
segments.append(current_segment)
|
segments.append(current_segment)
|
||||||
|
|
||||||
return segments
|
return segments
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -142,5 +142,20 @@ def test_processor_transcript_segment():
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
for segment in transcript.as_segments():
|
segments = transcript.as_segments()
|
||||||
print(segment)
|
assert len(segments) == 7
|
||||||
|
|
||||||
|
# check speaker order
|
||||||
|
assert segments[0].speaker == 0
|
||||||
|
assert segments[1].speaker == 0
|
||||||
|
assert segments[2].speaker == 0
|
||||||
|
assert segments[3].speaker == 1
|
||||||
|
assert segments[4].speaker == 2
|
||||||
|
assert segments[5].speaker == 0
|
||||||
|
assert segments[6].speaker == 0
|
||||||
|
|
||||||
|
# check the timing (first entry, and first of others speakers)
|
||||||
|
assert segments[0].start == 5.12
|
||||||
|
assert segments[3].start == 30.72
|
||||||
|
assert segments[4].start == 31.56
|
||||||
|
assert segments[5].start == 32.38
|
||||||
|
|||||||
Reference in New Issue
Block a user