From 2cf61b191fe9f7b2162d07c35f88631938f39a47 Mon Sep 17 00:00:00 2001
From: Sara <sara@monadical.com>
Date: Mon, 9 Oct 2023 15:52:11 +0200
Subject: [PATCH] fix waveform generation

---
 server/reflector/utils/audio_waveform.py | 17 ++++++++++-------
 server/reflector/views/transcripts.py    |  2 +-
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/server/reflector/utils/audio_waveform.py b/server/reflector/utils/audio_waveform.py
index f31e1748..7a6fdb3e 100644
--- a/server/reflector/utils/audio_waveform.py
+++ b/server/reflector/utils/audio_waveform.py
@@ -9,11 +9,12 @@ def get_audio_waveform(path: Path | str, segments_count: int = 256) -> list[int]
         path = path.as_posix()
 
     container = av.open(path)
-    stream = container.streams.get(audio=0)[0]
+    stream = container.streams.audio[0]
     duration = container.duration / av.time_base
 
     chunk_size_secs = duration / segments_count
     chunk_size = int(chunk_size_secs * stream.rate * stream.channels)
+
     if chunk_size == 0:
         # there is not enough data to fill the chunks
         # so basically we use chunk_size of 1.
@@ -22,7 +23,7 @@ def get_audio_waveform(path: Path | str, segments_count: int = 256) -> list[int]
     # 1.1 is a safety margin as it seems that pyav decode
     # does not always return the exact number of chunks
     # that we expect.
-    volumes = np.zeros(int(segments_count * 1.1), dtype=int)
+    volumes = np.zeros(int(segments_count * 1.1), dtype=float)
     current_chunk_idx = 0
     current_chunk_size = 0
     current_chunk_volume = 0
@@ -35,7 +36,6 @@ def get_audio_waveform(path: Path | str, segments_count: int = 256) -> list[int]
         count += len(data)
         frames += 1
         samples += frame.samples
-
         while len(data) > 0:
             datalen = len(data)
 
@@ -53,13 +53,16 @@ def get_audio_waveform(path: Path | str, segments_count: int = 256) -> list[int]
                 current_chunk_idx += 1
                 current_chunk_size = 0
                 current_chunk_volume = 0
-
     volumes = volumes[:current_chunk_idx]
 
-    # normalize the volumes 0-128
-    volumes = volumes * 128 / volumes.max()
+    # number of decimals to use when rounding the peak value
+    digits = 2
+    max_val = float(max(volumes))
+    new_volumes = []
+    for x in volumes:
+        new_volumes.append(round(x / max_val, digits))
 
-    return volumes.astype("uint8").tolist()
+    return new_volumes
 
 
 if __name__ == "__main__":
diff --git a/server/reflector/views/transcripts.py b/server/reflector/views/transcripts.py
index 410839d7..f7ab0f40 100644
--- a/server/reflector/views/transcripts.py
+++ b/server/reflector/views/transcripts.py
@@ -41,7 +41,7 @@ def generate_transcript_name():
 
 
 class AudioWaveform(BaseModel):
-    data: list[int]
+    data: list[float]
 
 
 class TranscriptText(BaseModel):