fix waveform generation

This commit is contained in:
Sara
2023-10-09 15:52:11 +02:00
parent 47f7e1836e
commit 2cf61b191f
2 changed files with 11 additions and 8 deletions

View File

@@ -9,11 +9,12 @@ def get_audio_waveform(path: Path | str, segments_count: int = 256) -> list[int]
path = path.as_posix()
container = av.open(path)
stream = container.streams.get(audio=0)[0]
stream = container.streams.audio[0]
duration = container.duration / av.time_base
chunk_size_secs = duration / segments_count
chunk_size = int(chunk_size_secs * stream.rate * stream.channels)
if chunk_size == 0:
# there is not enough data to fill the chunks
# so basically we use chunk_size of 1.
@@ -22,7 +23,7 @@ def get_audio_waveform(path: Path | str, segments_count: int = 256) -> list[int]
# 1.1 is a safety margin as it seems that pyav decode
# does not always return the exact number of chunks
# that we expect.
volumes = np.zeros(int(segments_count * 1.1), dtype=int)
volumes = np.zeros(int(segments_count * 1.1), dtype=float)
current_chunk_idx = 0
current_chunk_size = 0
current_chunk_volume = 0
@@ -35,7 +36,6 @@ def get_audio_waveform(path: Path | str, segments_count: int = 256) -> list[int]
count += len(data)
frames += 1
samples += frame.samples
while len(data) > 0:
datalen = len(data)
@@ -53,13 +53,16 @@ def get_audio_waveform(path: Path | str, segments_count: int = 256) -> list[int]
current_chunk_idx += 1
current_chunk_size = 0
current_chunk_volume = 0
volumes = volumes[:current_chunk_idx]
# normalize the volumes 0-128
volumes = volumes * 128 / volumes.max()
# number of decimals to use when rounding the peak value
digits = 2
max_val = float(max(volumes))
new_volumes = []
for x in volumes:
new_volumes.append(round(x / max_val, digits))
return volumes.astype("uint8").tolist()
return new_volumes
if __name__ == "__main__":

View File

@@ -41,7 +41,7 @@ def generate_transcript_name():
class AudioWaveform(BaseModel):
data: list[int]
data: list[float]
class TranscriptText(BaseModel):