mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 12:49:06 +00:00
@@ -37,18 +37,21 @@ class AudioDiarizationProcessor(Processor):
|
|||||||
async def _diarize(self, data: AudioDiarizationInput):
|
async def _diarize(self, data: AudioDiarizationInput):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def assign_speaker(self, words: list[Word], diarization: list[dict]):
|
@classmethod
|
||||||
self._diarization_remove_overlap(diarization)
|
def assign_speaker(cls, words: list[Word], diarization: list[dict]):
|
||||||
self._diarization_remove_segment_without_words(words, diarization)
|
cls._diarization_remove_overlap(diarization)
|
||||||
self._diarization_merge_same_speaker(words, diarization)
|
cls._diarization_remove_segment_without_words(words, diarization)
|
||||||
self._diarization_assign_speaker(words, diarization)
|
cls._diarization_merge_same_speaker(diarization)
|
||||||
|
cls._diarization_assign_speaker(words, diarization)
|
||||||
|
|
||||||
def iter_words_from_topics(self, topics: TitleSummary):
|
@staticmethod
|
||||||
|
def iter_words_from_topics(topics: list[TitleSummary]):
|
||||||
for topic in topics:
|
for topic in topics:
|
||||||
for word in topic.transcript.words:
|
for word in topic.transcript.words:
|
||||||
yield word
|
yield word
|
||||||
|
|
||||||
def is_word_continuation(self, word_prev, word):
|
@staticmethod
|
||||||
|
def is_word_continuation(word_prev, word):
|
||||||
"""
|
"""
|
||||||
Return True if the word is a continuation of the previous word
|
Return True if the word is a continuation of the previous word
|
||||||
by checking if the previous word is ending with a punctuation
|
by checking if the previous word is ending with a punctuation
|
||||||
@@ -61,7 +64,8 @@ class AudioDiarizationProcessor(Processor):
|
|||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _diarization_remove_overlap(self, diarization: list[dict]):
|
@staticmethod
|
||||||
|
def _diarization_remove_overlap(diarization: list[dict]):
|
||||||
"""
|
"""
|
||||||
Remove overlap in diarization results
|
Remove overlap in diarization results
|
||||||
|
|
||||||
@@ -86,8 +90,9 @@ class AudioDiarizationProcessor(Processor):
|
|||||||
else:
|
else:
|
||||||
diarization_idx += 1
|
diarization_idx += 1
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
def _diarization_remove_segment_without_words(
|
def _diarization_remove_segment_without_words(
|
||||||
self, words: list[Word], diarization: list[dict]
|
words: list[Word], diarization: list[dict]
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Remove diarization segments without words
|
Remove diarization segments without words
|
||||||
@@ -116,9 +121,8 @@ class AudioDiarizationProcessor(Processor):
|
|||||||
else:
|
else:
|
||||||
diarization_idx += 1
|
diarization_idx += 1
|
||||||
|
|
||||||
def _diarization_merge_same_speaker(
|
@staticmethod
|
||||||
self, words: list[Word], diarization: list[dict]
|
def _diarization_merge_same_speaker(diarization: list[dict]):
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Merge diarization contigous segments with the same speaker
|
Merge diarization contigous segments with the same speaker
|
||||||
|
|
||||||
@@ -135,7 +139,8 @@ class AudioDiarizationProcessor(Processor):
|
|||||||
else:
|
else:
|
||||||
diarization_idx += 1
|
diarization_idx += 1
|
||||||
|
|
||||||
def _diarization_assign_speaker(self, words: list[Word], diarization: list[dict]):
|
@classmethod
|
||||||
|
def _diarization_assign_speaker(cls, words: list[Word], diarization: list[dict]):
|
||||||
"""
|
"""
|
||||||
Assign speaker to words based on diarization
|
Assign speaker to words based on diarization
|
||||||
|
|
||||||
@@ -158,7 +163,7 @@ class AudioDiarizationProcessor(Processor):
|
|||||||
# If it's a continuation, assign with the last speaker
|
# If it's a continuation, assign with the last speaker
|
||||||
is_continuation = False
|
is_continuation = False
|
||||||
if word_idx > 0 and word_idx < len(words) - 1:
|
if word_idx > 0 and word_idx < len(words) - 1:
|
||||||
is_continuation = self.is_word_continuation(
|
is_continuation = cls.is_word_continuation(
|
||||||
*words[word_idx - 1 : word_idx + 1]
|
*words[word_idx - 1 : word_idx + 1]
|
||||||
)
|
)
|
||||||
if is_continuation:
|
if is_continuation:
|
||||||
|
|||||||
Reference in New Issue
Block a user