Merge branch 'main' into www/jose-waveform

This commit is contained in:
Jose B
2023-07-27 15:25:15 -05:00
40 changed files with 3549 additions and 2566 deletions

19
.github/pull_request_template.md vendored Normal file
View File

@@ -0,0 +1,19 @@
## ⚠️ Insert the PR TITLE replacing this text ⚠️
⚠️ Describe your PR replacing this text. Post screenshots or videos whenever possible. ⚠️
### Checklist
- [ ] My branch is updated with main (mandatory)
- [ ] I wrote unit tests for this (if applies)
- [ ] I have included migrations and tested them locally (if applies)
- [ ] I have manually tested this feature locally
> IMPORTANT: Remember that you are responsible for merging this PR after it's been reviewed, and once deployed
> you should perform manual testing to make sure everything went smoothly.
### Urgency
- [ ] Urgent (deploy ASAP)
- [ ] Non-urgent (deploying in next release is ok)

68
.github/workflows/test_server.yml vendored Normal file
View File

@@ -0,0 +1,68 @@
name: Unittests
on:
pull_request:
paths-ignore:
- 'www/**'
push:
paths-ignore:
- 'www/**'
jobs:
pytest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.x
uses: actions/setup-python@v4
with:
python-version: 3.11
- uses: Gr1N/setup-poetry@v8
- name: Cache Python requirements
uses: actions/cache@v2
id: cache-pip
with:
path: ~/.cache/pypoetry/virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('poetry.lock') }}
restore-keys: |
- ${{ runner.os }}-poetry-
- name: Install tests dependencies
run: |
sudo apt-get update
sudo apt-get install -y portaudio19-dev build-essential
- name: Install requirements
run: |
cd server
poetry install
- name: Tests
run: |
cd server
poetry run python -m pytest -v tests
pep8:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.x
uses: actions/setup-python@v4
with:
python-version: 3.11
- name: Validate formatting
run: |
pip install black
cd server
black --check reflector tests
docker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Build and push
id: docker_build
uses: docker/build-push-action@v4
with:
context: server

3
.gitignore vendored
View File

@@ -1,2 +1,3 @@
.DS_Store
server/.env
.env

22
README.md Normal file
View File

@@ -0,0 +1,22 @@
# Reflector
Reflector server is responsible for audio transcription and summarization for now.
_The project is moving fast, documentation is currently unstable and outdated_
## Server
We currently use oogabooga as a LLM backend.
### Using docker
Create a `.env` with
```
LLM_URL=http://IP:HOST/api/v1/generate
```
Then start with:
```
$ docker-compose up
```

20
docker-compose.yml Normal file
View File

@@ -0,0 +1,20 @@
version: "3.9"
services:
server:
build:
context: server
ports:
- 1250:1250
environment:
LLM_URL: "${LLM_URL}"
volumes:
- model-cache:/root/.cache
web:
build:
context: www
ports:
- 3000:3000
volumes:
model-cache:

4
server/.gitignore vendored
View File

@@ -165,9 +165,9 @@ cython_debug/
transcript_*.txt
test_*.txt
wordcloud*.png
utils/config.ini
utils/secrets.ini
test_samples/
*.wav
# *.wav
*.mp3
*.m4a
.DS_Store/

1
server/.python-version Normal file
View File

@@ -0,0 +1 @@
3.11

29
server/Dockerfile Normal file
View File

@@ -0,0 +1,29 @@
FROM python:3.11-slim as base
ENV PIP_DEFAULT_TIMEOUT=100 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
PIP_NO_CACHE_DIR=1 \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
POETRY_VERSION=1.3.1
# install packages needed for base
# RUN apt-get update && apt-get install -y make gettext
# builder
FROM base AS builder
WORKDIR /tmp
# RUN apt-get install -y build-essential libffi-dev zlib1g-dev
COPY pyproject.toml poetry.lock /tmp
RUN pip install "poetry==$POETRY_VERSION"
RUN python -m venv /venv
RUN . /venv/bin/activate && poetry config virtualenvs.create false
RUN . /venv/bin/activate && poetry install --only main --no-root --no-interaction --no-ansi
# bootstrap
FROM base AS final
COPY --from=builder /venv /venv
RUN mkdir -p /app
COPY reflector /app/reflector
WORKDIR /app
CMD ["/venv/bin/python", "-m", "reflector.server"]

View File

@@ -1,5 +1,49 @@
# Reflector
Reflector server is responsible for audio transcription and summarization for now.
_The project is moving fast, documentation is currently unstable and outdated_
## Server
We currently use oogabooga as a LLM backend.
### Using docker
Create a `.env` with
```
LLM_URL=http://IP:HOST/api/v1/generate
```
Then start with:
```
$ docker-compose up server
```
### Using local environment
Install the dependencies with poetry:
```
$ poetry install
```
Then run the server:
```
# With a config.ini
$ poetry run python -m reflector.server
# Within a poetry env
$ poetry shell
$ LLM_URL=http://.../api/v1/generate python -m reflector.server
```
# Old documentation
This is the code base for the Reflector demo (formerly called agenda-talk-diff) for the leads : Troy Web Consulting
panel (A Chat with AWS about AI: Real AI/ML AWS projects and what you should know) on 6/14 at 430PM.
@@ -16,10 +60,10 @@ script is run. You need AWS_ACCESS_KEY / AWS_SECRET_KEY to authenticate your cal
For AWS S3 Web UI,
1) Login to AWS management console.
2) Search for S3 in the search bar at the top.
3) Navigate to list the buckets under the current account, if needed and choose your bucket [```reflector-bucket```]
4) You should be able to see items in the bucket. You can upload/download files here directly.
1. Login to AWS management console.
2. Search for S3 in the search bar at the top.
3. Navigate to list the buckets under the current account, if needed and choose your bucket [```reflector-bucket```]
4. You should be able to see items in the bucket. You can upload/download files here directly.
For CLI,
Refer to the FILE UTIL section below.
@@ -32,90 +76,92 @@ pipeline workflow in the script, you can do so by :
Upload:
``` python3 file_util.py upload <object_name_in_S3_bucket>```
` python3 file_util.py upload <object_name_in_S3_bucket>`
Download:
``` python3 file_util.py download <object_name_in_S3_bucket>```
` python3 file_util.py download <object_name_in_S3_bucket>`
If you want to access the S3 artefacts, from another machine, you can either use the python file_util with the commands
mentioned above or simply use the GUI of AWS Management Console.
To setup,
1) Check values in config.ini file. Specifically add your OPENAI_APIKEY if you plan to use OpenAI API requests.
2) Run ``` export KMP_DUPLICATE_LIB_OK=True``` in
1. Check values in config.ini file. Specifically add your OPENAI_APIKEY if you plan to use OpenAI API requests.
2. Run ` export KMP_DUPLICATE_LIB_OK=True` in
Terminal. [This is taken care of in code, but not reflecting, Will fix this issue later.]
NOTE: If you don't have portaudio installed already, run ```brew install portaudio```
NOTE: If you don't have portaudio installed already, run `brew install portaudio`
3) Run the script setup_depedencies.sh.
3. Run the script setup_depedencies.sh.
``` chmod +x setup_dependencies.sh ```
`chmod +x setup_dependencies.sh`
``` sh setup_dependencies.sh <ENV>```
` sh setup_dependencies.sh <ENV>`
ENV refers to the intended environment for JAX. JAX is available in several
variants, [CPU | GPU | Colab TPU | Google Cloud TPU]
```ENV``` is :
`ENV` is :
cpu -> JAX CPU installation
cuda11 -> JAX CUDA 11.x version
cuda12 -> JAX CUDA 12.x version (Core Weave has CUDA 12 version, can check with ```nvidia-smi```)
cuda12 -> JAX CUDA 12.x version (Core Weave has CUDA 12 version, can check with `nvidia-smi`)
```sh setup_dependencies.sh cuda12```
4) If not already done, install ffmpeg. ```brew install ffmpeg```
4. If not already done, install ffmpeg. `brew install ffmpeg`
For NLTK SSL error,
check [here](https://stackoverflow.com/questions/38916452/nltk-download-ssl-certificate-verify-failed)
5) Run the Whisper-JAX pipeline. Currently, the repo can take a Youtube video and transcribes/summarizes it.
5. Run the Whisper-JAX pipeline. Currently, the repo can take a Youtube video and transcribes/summarizes it.
``` python3 whisjax.py "https://www.youtube.com/watch?v=ihf0S97oxuQ"```
` python3 whisjax.py "https://www.youtube.com/watch?v=ihf0S97oxuQ"`
You can even run it on local file or a file in your configured S3 bucket.
``` python3 whisjax.py "startup.mp4"```
` python3 whisjax.py "startup.mp4"`
The script will take care of a few cases like youtube file, local file, video file, audio-only file,
file in S3, etc. If local file is not present, it can automatically take the file from S3.
**OFFLINE WORKFLOW:**
1) Specify the input source file] from a local, youtube link or upload to S3 if needed and pass it as input to the
1. Specify the input source file] from a local, youtube link or upload to S3 if needed and pass it as input to the
script.If the source file is in
```.m4a``` format, it will get converted to ```.mp4``` automatically.
2) Keep the agenda header topics in a local file named ```agenda-headers.txt```. This needs to be present where the
`.m4a` format, it will get converted to `.mp4` automatically.
2. Keep the agenda header topics in a local file named `agenda-headers.txt`. This needs to be present where the
script is run.
This version of the pipeline compares covered agenda topics using agenda headers in the following format.
1) ```agenda_topic : <short description>```
3) Check all the values in ```config.ini```. You need to predefine 2 categories for which you need to scatter plot the
1. `agenda_topic : <short description>`
3. Check all the values in `config.ini`. You need to predefine 2 categories for which you need to scatter plot the
topic modelling visualization in the config file. This is the default visualization. But, from the dataframe artefact
called
```df_<timestamp>.pkl``` , you can load the df and choose different topics to plot. You can filter using certain
`df_<timestamp>.pkl` , you can load the df and choose different topics to plot. You can filter using certain
words to search for the
transcriptions and you can see the top influencers and characteristic in each topic we have chosen to plot in the
interactive HTML document. I have added a new jupyter notebook that gives the base template to play around with,
named
```Viz_experiments.ipynb```.
4) Run the script. The script automatically transcribes, summarizes and creates a scatter plot of words & topics in the
`Viz_experiments.ipynb`.
4. Run the script. The script automatically transcribes, summarizes and creates a scatter plot of words & topics in the
form of an interactive
HTML file, a sample word cloud and uploads them to the S3 bucket
5) Additional artefacts pushed to S3:
1) HTML visualization file
2) pandas df in pickle format for others to collaborate and make their own visualizations
3) Summary, transcript and transcript with timestamps file in text format.
5. Additional artefacts pushed to S3:
1. HTML visualization file
2. pandas df in pickle format for others to collaborate and make their own visualizations
3. Summary, transcript and transcript with timestamps file in text format.
The script also creates 2 types of mappings.
1) Timestamp -> The top 2 matched agenda topic
2) Topic -> All matched timestamps in the transcription
1. Timestamp -> The top 2 matched agenda topic
2. Topic -> All matched timestamps in the transcription
Other visualizations can be planned based on available artefacts or new ones can be created. Refer the
section ```Viz-experiments```.
section `Viz-experiments`.
**Visualization experiments:**
@@ -135,21 +181,22 @@ We need to way to route audio from an application opened via the browser, ex. "W
microphone input which you will be using for speaking. We
use [Blackhole](https://github.com/ExistentialAudio/BlackHole).
1) Install Blackhole-2ch (2 ch is enough) by 1 of 2 options listed.
2) Setup [Aggregate device](https://github.com/ExistentialAudio/BlackHole/wiki/Aggregate-Device) to route web audio and
1. Install Blackhole-2ch (2 ch is enough) by 1 of 2 options listed.
2. Setup [Aggregate device](https://github.com/ExistentialAudio/BlackHole/wiki/Aggregate-Device) to route web audio and
local microphone input.
Be sure to mirror the settings given ![here](./images/aggregate_input.png)
3) Setup [Multi-Output device](https://github.com/ExistentialAudio/BlackHole/wiki/Multi-Output-Device)
3. Setup [Multi-Output device](https://github.com/ExistentialAudio/BlackHole/wiki/Multi-Output-Device)
Refer ![here](./images/multi-output.png)
4) Set the aggregator input device name created in step 2 in config.ini as ```BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME```
4. Set the aggregator input device name created in step 2 in config.ini as `BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME`
5) Then goto ``` System Preferences -> Sound ``` and choose the devices created from the Output and
5. Then goto `System Preferences -> Sound` and choose the devices created from the Output and
Input tabs.
6) The input from your local microphone, the browser run meeting should be aggregated into one virtual stream to listen
6. The input from your local microphone, the browser run meeting should be aggregated into one virtual stream to listen
to
and the output should be fed back to your specified output devices if everything is configured properly. Check this
before trying out the trial.
@@ -157,18 +204,18 @@ use [Blackhole](https://github.com/ExistentialAudio/BlackHole).
**Permissions:**
You may have to add permission for "Terminal"/Code Editors [Pycharm/VSCode, etc.] microphone access to record audio in
```System Preferences -> Privacy & Security -> Microphone```,
```System Preferences -> Privacy & Security -> Accessibility```,
```System Preferences -> Privacy & Security -> Input Monitoring```.
`System Preferences -> Privacy & Security -> Microphone`,
`System Preferences -> Privacy & Security -> Accessibility`,
`System Preferences -> Privacy & Security -> Input Monitoring`.
From the reflector root folder,
run ```python3 whisjax_realtime.py```
run `python3 whisjax_realtime.py`
The transcription text should be written to ```real_time_transcription_<timestamp>.txt```.
The transcription text should be written to `real_time_transcription_<timestamp>.txt`.
NEXT STEPS:
1) Create a RunPod setup for this feature (mentioned in 1 & 2) and test it end-to-end
2) Perform Speaker Diarization using Whisper-JAX
3) Based on the feasibility of the above points, explore suitable visualizations for transcription & summarization.
1. Create a RunPod setup for this feature (mentioned in 1 & 2) and test it end-to-end
2. Perform Speaker Diarization using Whisper-JAX
3. Based on the feasibility of the above points, explore suitable visualizations for transcription & summarization.

View File

@@ -1,77 +0,0 @@
import argparse
import asyncio
import signal
from aiortc.contrib.signaling import (add_signaling_arguments,
create_signaling)
from utils.log_utils import LOGGER
from stream_client import StreamClient
from typing import NoReturn
async def main() -> NoReturn:
"""
Reflector's entry point to the python client for WebRTC streaming if not
using the browser based UI-application
:return:
"""
parser = argparse.ArgumentParser(description="Data channels ping/pong")
parser.add_argument(
"--url", type=str, nargs="?", default="http://0.0.0.0:1250/offer"
)
parser.add_argument(
"--ping-pong",
help="Benchmark data channel with ping pong",
type=eval,
choices=[True, False],
default="False",
)
parser.add_argument(
"--play-from",
type=str,
default="",
)
add_signaling_arguments(parser)
args = parser.parse_args()
signaling = create_signaling(args)
async def shutdown(signal, loop):
"""Cleanup tasks tied to the service's shutdown."""
LOGGER.info(f"Received exit signal {signal.name}...")
LOGGER.info("Closing database connections")
LOGGER.info("Nacking outstanding messages")
tasks = [t for t in asyncio.all_tasks() if t is not
asyncio.current_task()]
[task.cancel() for task in tasks]
LOGGER.info(f"Cancelling {len(tasks)} outstanding tasks")
await asyncio.gather(*tasks, return_exceptions=True)
LOGGER.info(f'{"Flushing metrics"}')
loop.stop()
signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)
loop = asyncio.get_event_loop()
for s in signals:
loop.add_signal_handler(
s, lambda s=s: asyncio.create_task(shutdown(s, loop)))
# Init client
sc = StreamClient(
signaling=signaling,
url=args.url,
play_from=args.play_from,
ping_pong=args.ping_pong
)
await sc.start()
async for msg in sc.get_reader():
print(msg)
if __name__ == "__main__":
asyncio.run(main())

14
server/docker-compose.yml Normal file
View File

@@ -0,0 +1,14 @@
version: "3.9"
services:
server:
build:
context: .
ports:
- 1250:1250
environment:
LLM_URL: "${LLM_URL}"
volumes:
- model-cache:/root/.cache
volumes:
model-cache:

View File

@@ -1,61 +0,0 @@
pyaudio==0.2.13
keyboard==0.13.5
pynput==1.7.6
wave==0.0.2
async-timeout==4.0.2
attrs==23.1.0
certifi==2023.5.7
charset-normalizer==3.1.0
decorator==4.4.2
filelock==3.12.0
frozenlist==1.3.3
idna==3.4
imageio==2.29.0
imageio-ffmpeg==0.4.8
Jinja2==3.1.2
llvmlite==0.40.0
loguru==0.7.0
MarkupSafe==2.1.2
more-itertools==9.1.0
moviepy==1.0.3
mpmath==1.3.0
multidict==6.0.4
networkx==3.1
numba==0.57.0
numpy==1.24.3
openai==0.27.7
openai-whisper@ git+https://github.com/openai/whisper.git@248b6cb124225dd263bb9bd32d060b6517e067f8
Pillow==9.5.0
proglog==0.1.10
pytube==15.0.0
regex==2023.5.5
six==1.16.0
sympy==1.12
tiktoken==0.3.3
torch==2.0.1
tqdm==4.65.0
typing_extensions==4.6.2
urllib3
yarl==1.9.2
boto3==1.26.151
nltk==3.8.1
wordcloud==1.9.2
spacy==3.5.4
scattertext==0.1.19
pandas==2.0.3
jupyter==1.0.0
seaborn==0.12.2
matplotlib==3.7.2
matplotlib-inline==0.1.6
termcolor==2.3.0
ffmpeg==1.4
cached_property==1.5.2
stamina==23.1.0
httpx==0.24.1
https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz
gpt4all==1.0.5
aiohttp==3.8.5
aiohttp-cors==0.7.0
aioice==0.9.0
aiortc==1.5.0
aiosignal==1.3.1

2069
server/poetry.lock generated Normal file

File diff suppressed because it is too large Load Diff

40
server/pyproject.toml Normal file
View File

@@ -0,0 +1,40 @@
[tool.poetry]
name = "reflector-server"
version = "0.1.0"
description = ""
authors = ["Monadical team <ops@monadical.com>"]
readme = "README.md"
packages = []
[tool.poetry.dependencies]
python = "^3.11"
aiohttp = "^3.8.5"
aiohttp-cors = "^0.7.0"
av = "^10.0.0"
requests = "^2.31.0"
aiortc = "^1.5.0"
faster-whisper = "^0.7.1"
sortedcontainers = "^2.4.0"
loguru = "^0.7.0"
pydantic-settings = "^2.0.2"
structlog = "^23.1.0"
[tool.poetry.group.dev.dependencies]
black = "^23.7.0"
[tool.poetry.group.client.dependencies]
httpx = "^0.24.1"
pyaudio = "^0.2.13"
stamina = "^23.1.0"
[tool.poetry.group.tests.dependencies]
pytest-aiohttp = "^1.0.4"
pytest-asyncio = "^0.21.1"
pytest = "^7.4.0"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

View File

@@ -0,0 +1,75 @@
import argparse
import asyncio
import signal
from aiortc.contrib.signaling import add_signaling_arguments, create_signaling
from reflector.logger import logger
from reflector.stream_client import StreamClient
from typing import NoReturn
async def main() -> NoReturn:
"""
Reflector's entry point to the python client for WebRTC streaming if not
using the browser based UI-application
:return:
"""
parser = argparse.ArgumentParser(description="Data channels ping/pong")
parser.add_argument(
"--url", type=str, nargs="?", default="http://0.0.0.0:1250/offer"
)
parser.add_argument(
"--ping-pong",
help="Benchmark data channel with ping pong",
type=eval,
choices=[True, False],
default="False",
)
parser.add_argument(
"--play-from",
type=str,
default="",
)
add_signaling_arguments(parser)
args = parser.parse_args()
signaling = create_signaling(args)
async def shutdown(signal, loop):
"""Cleanup tasks tied to the service's shutdown."""
logger.info(f"Received exit signal {signal.name}...")
logger.info("Closing database connections")
logger.info("Nacking outstanding messages")
tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
[task.cancel() for task in tasks]
logger.info(f"Cancelling {len(tasks)} outstanding tasks")
await asyncio.gather(*tasks, return_exceptions=True)
logger.info(f'{"Flushing metrics"}')
loop.stop()
signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)
loop = asyncio.get_event_loop()
for s in signals:
loop.add_signal_handler(s, lambda s=s: asyncio.create_task(shutdown(s, loop)))
# Init client
sc = StreamClient(
signaling=signaling,
url=args.url,
play_from=args.play_from,
ping_pong=args.ping_pong,
)
await sc.start()
async for msg in sc.get_reader():
print(msg)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,3 @@
import structlog
logger = structlog.get_logger()

View File

@@ -6,6 +6,7 @@ the input and output parameters of functions
import datetime
from dataclasses import dataclass
from typing import List
from sortedcontainers import SortedDict
import av
@@ -16,6 +17,7 @@ class TitleSummaryInput:
Data class for the input to generate title and summaries.
The outcome will be used to send query to the LLM for processing.
"""
input_text = str
transcribed_time = float
prompt = str
@@ -24,8 +26,7 @@ class TitleSummaryInput:
def __init__(self, transcribed_time, input_text=""):
self.input_text = input_text
self.transcribed_time = transcribed_time
self.prompt = \
f"""
self.prompt = f"""
### Human:
Create a JSON object as response.The JSON object must have 2 fields:
i) title and ii) summary.For the title field,generate a short title
@@ -36,7 +37,7 @@ class TitleSummaryInput:
### Assistant:
"""
self.data = {"data": self.prompt}
self.data = {"prompt": self.prompt}
self.headers = {"Content-Type": "application/json"}
@@ -46,14 +47,17 @@ class IncrementalResult:
Data class for the result of generating one title and summaries.
Defines how a single "topic" looks like.
"""
title = str
description = str
transcript = str
timestamp = str
def __init__(self, title, desc, transcript):
def __init__(self, title, desc, transcript, timestamp):
self.title = title
self.description = desc
self.transcript = transcript
self.timestamp = timestamp
@dataclass
@@ -62,17 +66,20 @@ class TitleSummaryOutput:
Data class for the result of all generated titles and summaries.
The result will be sent back to the client
"""
cmd = str
topics = List[IncrementalResult]
def __init__(self, inc_responses):
self.topics = inc_responses
self.cmd = "UPDATE_TOPICS"
def get_result(self):
return {
"cmd": self.cmd,
"topics": self.topics
}
def get_result(self) -> dict:
"""
Return the result dict for displaying the transcription
:return:
"""
return {"cmd": self.cmd, "topics": self.topics}
@dataclass
@@ -81,21 +88,28 @@ class ParseLLMResult:
Data class to parse the result returned by the LLM while generating title
and summaries. The result will be sent back to the client.
"""
title = str
description = str
transcript = str
timestamp = str
def __init__(self, param: TitleSummaryInput, output: dict):
self.title = output["title"]
self.transcript = param.input_text
self.description = output.pop("summary")
self.timestamp = \
str(datetime.timedelta(seconds=round(param.transcribed_time)))
self.timestamp = str(datetime.timedelta(seconds=round(param.transcribed_time)))
def get_result(self):
def get_result(self) -> dict:
"""
Return the result dict after parsing the response from LLM
:return:
"""
return {
"description": self.description,
"transcript": self.transcript,
"timestamp": self.timestamp
"title": self.title,
"description": self.description,
"transcript": self.transcript,
"timestamp": self.timestamp,
}
@@ -105,6 +119,7 @@ class TranscriptionInput:
Data class to define the input to the transcription function
AudioFrames -> input
"""
frames = List[av.audio.frame.AudioFrame]
def __init__(self, frames):
@@ -117,6 +132,7 @@ class TranscriptionOutput:
Dataclass to define the result of the transcription function.
The result will be sent back to the client
"""
cmd = str
result_text = str
@@ -124,11 +140,12 @@ class TranscriptionOutput:
self.cmd = "SHOW_TRANSCRIPTION"
self.result_text = result_text
def get_result(self):
return {
"cmd": self.cmd,
"text": self.result_text
}
def get_result(self) -> dict:
"""
Return the result dict for displaying the transcription
:return:
"""
return {"cmd": self.cmd, "text": self.result_text}
@dataclass
@@ -137,6 +154,7 @@ class FinalSummaryResult:
Dataclass to define the result of the final summary function.
The result will be sent back to the client.
"""
cmd = str
final_summary = str
duration = str
@@ -144,13 +162,17 @@ class FinalSummaryResult:
def __init__(self, final_summary, time):
self.duration = str(datetime.timedelta(seconds=round(time)))
self.final_summary = final_summary
self.cmd = ""
self.cmd = "DISPLAY_FINAL_SUMMARY"
def get_result(self):
def get_result(self) -> dict:
"""
Return the result dict for displaying the final summary
:return:
"""
return {
"cmd": self.cmd,
"duration": self.duration,
"summary": self.final_summary
"cmd": self.cmd,
"duration": self.duration,
"summary": self.final_summary,
}
@@ -159,6 +181,29 @@ class BlackListedMessages:
Class to hold the blacklisted messages. These messages should be filtered
out and not sent back to the client as part of the transcription.
"""
messages = [" Thank you.", " See you next time!",
" Thank you for watching!", " Bye!",
" And that's what I'm talking about."]
messages = [
" Thank you.",
" See you next time!",
" Thank you for watching!",
" Bye!",
" And that's what I'm talking about.",
]
@dataclass
class TranscriptionContext:
transcription_text: str
last_transcribed_time: float
incremental_responses: List[IncrementalResult]
sorted_transcripts: dict
data_channel: None # FIXME
logger: None
def __init__(self, logger):
self.transcription_text = ""
self.last_transcribed_time = 0.0
self.incremental_responses = []
self.data_channel = None
self.sorted_transcripts = SortedDict()
self.logger = logger

381
server/reflector/server.py Normal file
View File

@@ -0,0 +1,381 @@
import argparse
import asyncio
import datetime
import json
import os
import wave
import uuid
from concurrent.futures import ThreadPoolExecutor
from typing import NoReturn, Union
import aiohttp_cors
import av
import requests
from aiohttp import web
from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription
from aiortc.contrib.media import MediaRelay
from faster_whisper import WhisperModel
from reflector.models import (
BlackListedMessages,
FinalSummaryResult,
ParseLLMResult,
TitleSummaryInput,
TitleSummaryOutput,
TranscriptionInput,
TranscriptionOutput,
TranscriptionContext,
)
from reflector.logger import logger
from reflector.utils.run_utils import run_in_executor
from reflector.settings import settings
# WebRTC components
pcs = set()
relay = MediaRelay()
executor = ThreadPoolExecutor()
# Transcription model
model = WhisperModel("tiny", device="cpu", compute_type="float32", num_workers=12)
# LLM
LLM_URL = settings.LLM_URL
if not LLM_URL:
assert settings.LLM_BACKEND == "oobagooda"
LLM_URL = f"http://{settings.LLM_HOST}:{settings.LLM_PORT}/api/v1/generate"
logger.info(f"Using LLM [{settings.LLM_BACKEND}]: {LLM_URL}")
def parse_llm_output(
param: TitleSummaryInput, response: requests.Response
) -> Union[None, ParseLLMResult]:
"""
Function to parse the LLM response
:param param:
:param response:
:return:
"""
try:
output = json.loads(response.json()["results"][0]["text"])
return ParseLLMResult(param, output)
except Exception:
logger.exception("Exception while parsing LLM output")
return None
def get_title_and_summary(
ctx: TranscriptionContext, param: TitleSummaryInput
) -> Union[None, TitleSummaryOutput]:
"""
From the input provided (transcript), query the LLM to generate
topics and summaries
:param param:
:return:
"""
logger.info("Generating title and summary")
# TODO : Handle unexpected output formats from the model
try:
response = requests.post(LLM_URL, headers=param.headers, json=param.data)
output = parse_llm_output(param, response)
if output:
result = output.get_result()
ctx.incremental_responses.append(result)
return TitleSummaryOutput(ctx.incremental_responses)
except Exception:
logger.exception("Exception while generating title and summary")
return None
def channel_send(channel, message: str) -> NoReturn:
"""
Send text messages via the data channel
:param channel:
:param message:
:return:
"""
if channel:
channel.send(message)
def channel_send_increment(
channel, param: Union[FinalSummaryResult, TitleSummaryOutput]
) -> NoReturn:
"""
Send the incremental topics and summaries via the data channel
:param channel:
:param param:
:return:
"""
if channel and param:
message = param.get_result()
channel.send(json.dumps(message))
def channel_send_transcript(ctx: TranscriptionContext) -> NoReturn:
"""
Send the transcription result via the data channel
:param channel:
:return:
"""
if not ctx.data_channel:
return
try:
least_time = next(iter(ctx.sorted_transcripts))
message = ctx.sorted_transcripts[least_time].get_result()
if message:
del ctx.sorted_transcripts[least_time]
if message["text"] not in BlackListedMessages.messages:
ctx.data_channel.send(json.dumps(message))
# Due to exceptions if one of the earlier batches can't return
# a transcript, we don't want to be stuck waiting for the result
# With the threshold size of 3, we pop the first(lost) element
else:
if len(ctx.sorted_transcripts) >= 3:
del ctx.sorted_transcripts[least_time]
except Exception:
logger.exception("Exception while sending transcript")
def get_transcription(
ctx: TranscriptionContext, input_frames: TranscriptionInput
) -> Union[None, TranscriptionOutput]:
"""
From the collected audio frames create transcription by inferring from
the chosen transcription model
:param input_frames:
:return:
"""
ctx.logger.info("Transcribing..")
ctx.sorted_transcripts[input_frames.frames[0].time] = None
# TODO: Find cleaner way, watch "no transcription" issue below
# Passing IO objects instead of temporary files throws an error
# Passing ndarray (type casted with float) does not give any
# transcription. Refer issue,
# https://github.com/guillaumekln/faster-whisper/issues/369
audio_file = "test" + str(datetime.datetime.now())
wf = wave.open(audio_file, "wb")
wf.setnchannels(settings.AUDIO_CHANNELS)
wf.setframerate(settings.AUDIO_SAMPLING_RATE)
wf.setsampwidth(settings.AUDIO_SAMPLING_WIDTH)
for frame in input_frames.frames:
wf.writeframes(b"".join(frame.to_ndarray()))
wf.close()
result_text = ""
try:
segments, _ = model.transcribe(
audio_file,
language="en",
beam_size=5,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500},
)
os.remove(audio_file)
segments = list(segments)
result_text = ""
duration = 0.0
for segment in segments:
result_text += segment.text
start_time = segment.start
end_time = segment.end
if not segment.start:
start_time = 0.0
if not segment.end:
end_time = 5.5
duration += end_time - start_time
ctx.last_transcribed_time += duration
ctx.transcription_text += result_text
except Exception:
logger.exception("Exception while transcribing")
result = TranscriptionOutput(result_text)
ctx.sorted_transcripts[input_frames.frames[0].time] = result
return result
def get_final_summary_response(ctx: TranscriptionContext) -> FinalSummaryResult:
"""
Collate the incremental summaries generated so far and return as the final
summary
:return:
"""
final_summary = ""
# Collate inc summaries
for topic in ctx.incremental_responses:
final_summary += topic["description"]
response = FinalSummaryResult(final_summary, ctx.last_transcribed_time)
with open(
"./artefacts/meeting_titles_and_summaries.txt", "a", encoding="utf-8"
) as file:
file.write(json.dumps(ctx.incremental_responses))
return response
class AudioStreamTrack(MediaStreamTrack):
"""
An audio stream track.
"""
kind = "audio"
def __init__(self, ctx: TranscriptionContext, track):
super().__init__()
self.ctx = ctx
self.track = track
self.audio_buffer = av.AudioFifo()
async def recv(self) -> av.audio.frame.AudioFrame:
ctx = self.ctx
frame = await self.track.recv()
self.audio_buffer.write(frame)
if local_frames := self.audio_buffer.read_many(
settings.AUDIO_BUFFER_SIZE, partial=False
):
whisper_result = run_in_executor(
get_transcription,
ctx,
TranscriptionInput(local_frames),
executor=executor,
)
whisper_result.add_done_callback(
lambda f: channel_send_transcript(ctx) if f.result() else None
)
if len(ctx.transcription_text) > 25:
llm_input_text = ctx.transcription_text
ctx.transcription_text = ""
param = TitleSummaryInput(
input_text=llm_input_text, transcribed_time=ctx.last_transcribed_time
)
llm_result = run_in_executor(
get_title_and_summary, ctx, param, executor=executor
)
llm_result.add_done_callback(
lambda f: channel_send_increment(ctx.data_channel, llm_result.result())
if f.result()
else None
)
return frame
async def offer(request: requests.Request) -> web.Response:
"""
Establish the WebRTC connection with the client
:param request:
:return:
"""
params = await request.json()
offer = RTCSessionDescription(sdp=params["sdp"], type=params["type"])
# client identification
peername = request.transport.get_extra_info("peername")
if peername is not None:
clientid = f"{peername[0]}:{peername[1]}"
else:
clientid = uuid.uuid4()
# create a context for the whole rtc transaction
# add a customised logger to the context
ctx = TranscriptionContext(logger=logger.bind(client=clientid))
# handle RTC peer connection
pc = RTCPeerConnection()
pcs.add(pc)
@pc.on("datachannel")
def on_datachannel(channel) -> NoReturn:
ctx.data_channel = channel
ctx.logger = ctx.logger.bind(channel=channel.label)
ctx.logger.info("Channel created by remote party")
@channel.on("message")
def on_message(message: str) -> NoReturn:
ctx.logger.info(f"Message: {message}")
if json.loads(message)["cmd"] == "STOP":
# Placeholder final summary
response = get_final_summary_response()
channel_send_increment(channel, response)
# To-do Add code to stop connection from server side here
# But have to handshake with client once
if isinstance(message, str) and message.startswith("ping"):
channel_send(channel, "pong" + message[4:])
@pc.on("connectionstatechange")
async def on_connectionstatechange() -> NoReturn:
ctx.logger.info(f"Connection state changed: {pc.connectionState}")
if pc.connectionState == "failed":
await pc.close()
pcs.discard(pc)
@pc.on("track")
def on_track(track) -> NoReturn:
ctx.logger.info(f"Track {track.kind} received")
pc.addTrack(AudioStreamTrack(ctx, relay.subscribe(track)))
await pc.setRemoteDescription(offer)
answer = await pc.createAnswer()
await pc.setLocalDescription(answer)
return web.Response(
content_type="application/json",
text=json.dumps(
{"sdp": pc.localDescription.sdp, "type": pc.localDescription.type}
),
)
async def on_shutdown(application: web.Application) -> NoReturn:
"""
On shutdown, the coroutines that shutdown client connections are
executed
:param application:
:return:
"""
coroutines = [pc.close() for pc in pcs]
await asyncio.gather(*coroutines)
pcs.clear()
def create_app() -> web.Application:
"""
Create the web application
"""
app = web.Application()
cors = aiohttp_cors.setup(
app,
defaults={
"*": aiohttp_cors.ResourceOptions(
allow_credentials=True, expose_headers="*", allow_headers="*"
)
},
)
offer_resource = cors.add(app.router.add_resource("/offer"))
cors.add(offer_resource.add_route("POST", offer))
app.on_shutdown.append(on_shutdown)
return app
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="WebRTC based server for Reflector")
parser.add_argument(
"--host", default="0.0.0.0", help="Server host IP (def: 0.0.0.0)"
)
parser.add_argument(
"--port", type=int, default=1250, help="Server port (def: 1250)"
)
args = parser.parse_args()
app = create_app()
web.run_app(app, access_log=None, host=args.host, port=args.port)

View File

@@ -0,0 +1,45 @@
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
OPENMP_KMP_DUPLICATE_LIB_OK: bool = False
# Whisper
WHISPER_MODEL_SIZE: str = "tiny"
WHISPER_REAL_TIME_MODEL_SIZE: str = "tiny"
# Summarizer
SUMMARIZER_MODEL: str = "facebook/bart-large-cnn"
SUMMARIZER_INPUT_ENCODING_MAX_LENGTH: int = 1024
SUMMARIZER_MAX_LENGTH: int = 2048
SUMMARIZER_BEAM_SIZE: int = 6
SUMMARIZER_MAX_CHUNK_LENGTH: int = 1024
SUMMARIZER_USING_CHUNKS: bool = True
# Audio
AUDIO_BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME: str = "aggregator"
AUDIO_AV_FOUNDATION_DEVICE_ID: int = 1
AUDIO_CHANNELS: int = 2
AUDIO_SAMPLING_RATE: int = 48000
AUDIO_SAMPLING_WIDTH: int = 2
AUDIO_BUFFER_SIZE: int = 256 * 960
# LLM
LLM_BACKEND: str = "oobagooda"
LLM_URL: str | None = None
LLM_HOST: str = "localhost"
LLM_PORT: int = 7860
# Storage
STORAGE_BACKEND: str = "aws"
STORAGE_AWS_ACCESS_KEY: str = ""
STORAGE_AWS_SECRET_KEY: str = ""
STORAGE_AWS_BUCKET: str = ""
# OpenAI
OPENAI_API_KEY: str = ""
settings = Settings()

View File

@@ -4,22 +4,21 @@ import uuid
import httpx
import pyaudio
import requests
import stamina
from aiortc import (RTCPeerConnection, RTCSessionDescription)
from aiortc.contrib.media import (MediaPlayer, MediaRelay)
from aiortc import RTCPeerConnection, RTCSessionDescription
from aiortc.contrib.media import MediaPlayer, MediaRelay
from utils.log_utils import LOGGER
from utils.run_utils import CONFIG
from reflector.logger import logger
from reflector.settings import settings
class StreamClient:
def __init__(
self,
signaling,
url="http://0.0.0.0:1250",
play_from=None,
ping_pong=False
self,
signaling,
url="http://0.0.0.0:1250/offer",
play_from=None,
ping_pong=False,
):
self.signaling = signaling
self.server_url = url
@@ -29,20 +28,15 @@ class StreamClient:
self.pc = RTCPeerConnection()
self.loop = asyncio.get_event_loop()
self.relay = None
self.pcs = set()
self.time_start = None
self.queue = asyncio.Queue()
self.player = MediaPlayer(
':' + str(CONFIG['AUDIO']["AV_FOUNDATION_DEVICE_ID"]),
format='avfoundation',
options={'channels': '2'})
self.logger = logger.bind(stream_client=id(self))
def stop(self):
self.loop.run_until_complete(self.signaling.close())
self.loop.run_until_complete(self.pc.close())
# self.loop.close()
async def stop(self):
await self.signaling.close()
await self.pc.close()
def create_local_tracks(self, play_from):
if play_from:
@@ -51,11 +45,13 @@ class StreamClient:
else:
if self.relay is None:
self.relay = MediaRelay()
self.player = MediaPlayer(
f":{settings.AUDIO_AV_FOUNDATION_DEVICE_ID}",
format="avfoundation",
options={"channels": "2"},
)
return self.relay.subscribe(self.player.audio), None
def channel_log(self, channel, t, message):
print("channel(%s) %s %s" % (channel.label, t, message))
def channel_send(self, channel, message):
# self.channel_log(channel, ">", message)
channel.send(message)
@@ -70,32 +66,31 @@ class StreamClient:
async def run_offer(self, pc, signaling):
# microphone
audio, video = self.create_local_tracks(self.play_from)
pc_id = "PeerConnection(%s)" % uuid.uuid4()
pc_id = uuid.uuid4().hex
self.pcs.add(pc)
def log_info(msg, *args):
LOGGER.info(pc_id + " " + msg, *args)
self.logger = self.logger.bind(pc_id=pc_id)
@pc.on("connectionstatechange")
async def on_connectionstatechange():
print("Connection state is %s" % pc.connectionState)
self.logger.info(f"Connection state is {pc.connectionState}")
if pc.connectionState == "failed":
await pc.close()
self.pcs.discard(pc)
@pc.on("track")
def on_track(track):
print("Sending %s" % track.kind)
self.logger.info(f"Sending {track.kind}")
self.pc.addTrack(track)
@track.on("ended")
async def on_ended():
log_info("Track %s ended", track.kind)
self.logger.info(f"Track {track.kind} ended")
self.pc.addTrack(audio)
channel = pc.createDataChannel("data-channel")
self.channel_log(channel, "-", "created by local party")
self.logger = self.logger.bind(channel=channel.label)
self.logger.info("Created by local party")
async def send_pings():
while True:
@@ -111,27 +106,24 @@ class StreamClient:
def on_message(message):
self.queue.put_nowait(message)
if self.ping_pong:
self.channel_log(channel, "<", message)
self.logger.info(f"Message: {message}")
if isinstance(message, str) and message.startswith("pong"):
elapsed_ms = (self.current_stamp() - int(message[5:])) \
/ 1000
print(" RTT %.2f ms" % elapsed_ms)
elapsed_ms = (self.current_stamp() - int(message[5:])) / 1000
self.logger.debug("RTT %.2f ms" % elapsed_ms)
await pc.setLocalDescription(await pc.createOffer())
sdp = {
"sdp": pc.localDescription.sdp,
"type": pc.localDescription.type
}
sdp = {"sdp": pc.localDescription.sdp, "type": pc.localDescription.type}
@stamina.retry(on=httpx.HTTPError, attempts=5)
def connect_to_server():
response = requests.post(self.server_url, json=sdp, timeout=10)
response.raise_for_status()
return response
async def connect_to_server():
async with httpx.AsyncClient() as client:
response = await client.post(self.server_url, json=sdp, timeout=10)
response.raise_for_status()
return response.json()
params = connect_to_server().json()
params = await connect_to_server()
answer = RTCSessionDescription(sdp=params["sdp"], type=params["type"])
await pc.setRemoteDescription(answer)

View File

@@ -4,21 +4,24 @@ uploads to cloud storage
"""
import sys
from typing import List, NoReturn
import boto3
import botocore
from .log_utils import LOGGER
from .run_utils import CONFIG
from .run_utils import SECRETS
BUCKET_NAME = CONFIG["AWS"]["BUCKET_NAME"]
BUCKET_NAME = SECRETS["AWS-S3"]["BUCKET_NAME"]
s3 = boto3.client('s3',
aws_access_key_id=CONFIG["AWS"]["AWS_ACCESS_KEY"],
aws_secret_access_key=CONFIG["AWS"]["AWS_SECRET_KEY"])
s3 = boto3.client(
"s3",
aws_access_key_id=SECRETS["AWS-S3"]["AWS_ACCESS_KEY"],
aws_secret_access_key=SECRETS["AWS-S3"]["AWS_SECRET_KEY"],
)
def upload_files(files_to_upload):
def upload_files(files_to_upload: List[str]) -> NoReturn:
"""
Upload a list of files to the configured S3 bucket
:param files_to_upload: List of files to upload
@@ -32,7 +35,7 @@ def upload_files(files_to_upload):
print(exception.response)
def download_files(files_to_download):
def download_files(files_to_download: List[str]) -> NoReturn:
"""
Download a list of files from the configured S3 bucket
:param files_to_download: List of files to download
@@ -43,7 +46,7 @@ def download_files(files_to_download):
try:
s3.download_file(BUCKET_NAME, key, key)
except botocore.exceptions.ClientError as exception:
if exception.response['Error']['Code'] == "404":
if exception.response["Error"]["Code"] == "404":
print("The object does not exist.")
else:
raise

View File

@@ -4,21 +4,16 @@ Utility function to format the artefacts created during Reflector run
import json
with open("../artefacts/meeting_titles_and_summaries.txt", "r",
encoding='utf-8') as f:
with open("../artefacts/meeting_titles_and_summaries.txt", "r", encoding="utf-8") as f:
outputs = f.read()
outputs = json.loads(outputs)
transcript_file = open("../artefacts/meeting_transcript.txt",
"a",
encoding='utf-8')
title_desc_file = open("../artefacts/meeting_title_description.txt",
"a",
encoding='utf-8')
summary_file = open("../artefacts/meeting_summary.txt",
"a",
encoding='utf-8')
transcript_file = open("../artefacts/meeting_transcript.txt", "a", encoding="utf-8")
title_desc_file = open(
"../artefacts/meeting_title_description.txt", "a", encoding="utf-8"
)
summary_file = open("../artefacts/meeting_summary.txt", "a", encoding="utf-8")
for item in outputs["topics"]:
transcript_file.write(item["transcript"])

View File

@@ -3,30 +3,12 @@ Utility file for server side asynchronous task running and config objects
"""
import asyncio
import configparser
import contextlib
from functools import partial
from threading import Lock
from typing import ContextManager, Generic, TypeVar
class ReflectorConfig:
"""
Create a single config object to share across the project
"""
__config = None
@staticmethod
def get_config():
if ReflectorConfig.__config is None:
ReflectorConfig.__config = configparser.ConfigParser()
ReflectorConfig.__config.read('utils/config.ini')
return ReflectorConfig.__config
CONFIG = ReflectorConfig.get_config()
def run_in_executor(func, *args, executor=None, **kwargs):
"""
Run the function in an executor, unblocking the main loop

View File

@@ -1,6 +1,8 @@
"""
Utility file for all text processing related functionalities
"""
import datetime
from typing import List
import nltk
import torch
@@ -13,18 +15,22 @@ from transformers import BartForConditionalGeneration, BartTokenizer
from log_utils import LOGGER
from run_utils import CONFIG
nltk.download('punkt', quiet=True)
nltk.download("punkt", quiet=True)
def preprocess_sentence(sentence):
stop_words = set(stopwords.words('english'))
def preprocess_sentence(sentence: str) -> str:
"""
Filter out undesirable tokens from thr sentence
:param sentence:
:return:
"""
stop_words = set(stopwords.words("english"))
tokens = word_tokenize(sentence.lower())
tokens = [token for token in tokens
if token.isalnum() and token not in stop_words]
return ' '.join(tokens)
tokens = [token for token in tokens if token.isalnum() and token not in stop_words]
return " ".join(tokens)
def compute_similarity(sent1, sent2):
def compute_similarity(sent1: str, sent2: str) -> float:
"""
Compute the similarity
"""
@@ -35,7 +41,7 @@ def compute_similarity(sent1, sent2):
return 0.0
def remove_almost_alike_sentences(sentences, threshold=0.7):
def remove_almost_alike_sentences(sentences: List[str], threshold=0.7) -> List[str]:
"""
Filter sentences that are similar beyond a set threshold
:param sentences:
@@ -60,18 +66,18 @@ def remove_almost_alike_sentences(sentences, threshold=0.7):
sentence1 = preprocess_sentence(sentences[i])
sentence2 = preprocess_sentence(sentences[j])
if len(sentence1) != 0 and len(sentence2) != 0:
similarity = compute_similarity(sentence1,
sentence2)
similarity = compute_similarity(sentence1, sentence2)
if similarity >= threshold:
removed_indices.add(max(i, j))
filtered_sentences = [sentences[i] for i in range(num_sentences)
if i not in removed_indices]
filtered_sentences = [
sentences[i] for i in range(num_sentences) if i not in removed_indices
]
return filtered_sentences
def remove_outright_duplicate_sentences_from_chunk(chunk):
def remove_outright_duplicate_sentences_from_chunk(chunk: str) -> List[str]:
"""
Remove repetitive sentences
:param chunk:
@@ -83,7 +89,9 @@ def remove_outright_duplicate_sentences_from_chunk(chunk):
return nonduplicate_sentences
def remove_whisper_repetitive_hallucination(nonduplicate_sentences):
def remove_whisper_repetitive_hallucination(
nonduplicate_sentences: List[str],
) -> List[str]:
"""
Remove sentences that are repeated as a result of Whisper
hallucinations
@@ -98,20 +106,23 @@ def remove_whisper_repetitive_hallucination(nonduplicate_sentences):
words = nltk.word_tokenize(sent)
n_gram_filter = 3
for i in range(len(words)):
if str(words[i:i + n_gram_filter]) in seen and \
seen[str(words[i:i + n_gram_filter])] == \
words[i + 1:i + n_gram_filter + 2]:
if (
str(words[i : i + n_gram_filter]) in seen
and seen[str(words[i : i + n_gram_filter])]
== words[i + 1 : i + n_gram_filter + 2]
):
pass
else:
seen[str(words[i:i + n_gram_filter])] = \
words[i + 1:i + n_gram_filter + 2]
seen[str(words[i : i + n_gram_filter])] = words[
i + 1 : i + n_gram_filter + 2
]
temp_result += words[i]
temp_result += " "
chunk_sentences.append(temp_result)
return chunk_sentences
def post_process_transcription(whisper_result):
def post_process_transcription(whisper_result: dict) -> dict:
"""
Parent function to perform post-processing on the transcription result
:param whisper_result:
@@ -119,19 +130,18 @@ def post_process_transcription(whisper_result):
"""
transcript_text = ""
for chunk in whisper_result["chunks"]:
nonduplicate_sentences = \
remove_outright_duplicate_sentences_from_chunk(chunk)
chunk_sentences = \
remove_whisper_repetitive_hallucination(nonduplicate_sentences)
similarity_matched_sentences = \
remove_almost_alike_sentences(chunk_sentences)
nonduplicate_sentences = remove_outright_duplicate_sentences_from_chunk(chunk)
chunk_sentences = remove_whisper_repetitive_hallucination(
nonduplicate_sentences
)
similarity_matched_sentences = remove_almost_alike_sentences(chunk_sentences)
chunk["text"] = " ".join(similarity_matched_sentences)
transcript_text += chunk["text"]
whisper_result["text"] = transcript_text
return whisper_result
def summarize_chunks(chunks, tokenizer, model):
def summarize_chunks(chunks: List[str], tokenizer, model) -> List[str]:
"""
Summarize each chunk using a summarizer model
:param chunks:
@@ -142,23 +152,24 @@ def summarize_chunks(chunks, tokenizer, model):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
summaries = []
for c in chunks:
input_ids = tokenizer.encode(c, return_tensors='pt')
input_ids = tokenizer.encode(c, return_tensors="pt")
input_ids = input_ids.to(device)
with torch.no_grad():
summary_ids = \
model.generate(input_ids,
num_beams=int(CONFIG["SUMMARIZER"]["BEAM_SIZE"]),
length_penalty=2.0,
max_length=int(CONFIG["SUMMARIZER"]["MAX_LENGTH"]),
early_stopping=True)
summary = tokenizer.decode(summary_ids[0],
skip_special_tokens=True)
summary_ids = model.generate(
input_ids,
num_beams=int(CONFIG["SUMMARIZER"]["BEAM_SIZE"]),
length_penalty=2.0,
max_length=int(CONFIG["SUMMARIZER"]["MAX_LENGTH"]),
early_stopping=True,
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
summaries.append(summary)
return summaries
def chunk_text(text,
max_chunk_length=int(CONFIG["SUMMARIZER"]["MAX_CHUNK_LENGTH"])):
def chunk_text(
text: str, max_chunk_length: int = int(CONFIG["SUMMARIZER"]["MAX_CHUNK_LENGTH"])
) -> List[str]:
"""
Split text into smaller chunks.
:param text: Text to be chunked
@@ -178,9 +189,12 @@ def chunk_text(text,
return chunks
def summarize(transcript_text, timestamp,
real_time=False,
chunk_summarize=CONFIG["SUMMARIZER"]["SUMMARIZE_USING_CHUNKS"]):
def summarize(
transcript_text: str,
timestamp: datetime.datetime.timestamp,
real_time: bool = False,
chunk_summarize: str = CONFIG["SUMMARIZER"]["SUMMARIZE_USING_CHUNKS"],
):
"""
Summarize the given text either as a whole or as chunks as needed
:param transcript_text:
@@ -206,39 +220,45 @@ def summarize(transcript_text, timestamp,
if chunk_summarize != "YES":
max_length = int(CONFIG["SUMMARIZER"]["INPUT_ENCODING_MAX_LENGTH"])
inputs = tokenizer. \
batch_encode_plus([transcript_text], truncation=True,
padding='longest',
max_length=max_length,
return_tensors='pt')
inputs = tokenizer.batch_encode_plus(
[transcript_text],
truncation=True,
padding="longest",
max_length=max_length,
return_tensors="pt",
)
inputs = inputs.to(device)
with torch.no_grad():
num_beans = int(CONFIG["SUMMARIZER"]["BEAM_SIZE"])
max_length = int(CONFIG["SUMMARIZER"]["MAX_LENGTH"])
summaries = model.generate(inputs['input_ids'],
num_beams=num_beans,
length_penalty=2.0,
max_length=max_length,
early_stopping=True)
summaries = model.generate(
inputs["input_ids"],
num_beams=num_beans,
length_penalty=2.0,
max_length=max_length,
early_stopping=True,
)
decoded_summaries = \
[tokenizer.decode(summary,
skip_special_tokens=True,
clean_up_tokenization_spaces=False)
for summary in summaries]
decoded_summaries = [
tokenizer.decode(
summary, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
for summary in summaries
]
summary = " ".join(decoded_summaries)
with open("./artefacts/" + output_file, 'w', encoding="utf-8") as file:
with open("./artefacts/" + output_file, "w", encoding="utf-8") as file:
file.write(summary.strip() + "\n")
else:
LOGGER.info("Breaking transcript into smaller chunks")
chunks = chunk_text(transcript_text)
LOGGER.info(f"Transcript broken into {len(chunks)} "
f"chunks of at most 500 words")
LOGGER.info(
f"Transcript broken into {len(chunks)} " f"chunks of at most 500 words"
)
LOGGER.info(f"Writing summary text to: {output_file}")
with open(output_file, 'w') as f:
with open(output_file, "w") as f:
summaries = summarize_chunks(chunks, tokenizer, model)
for summary in summaries:
f.write(summary.strip() + " ")

View File

@@ -4,8 +4,10 @@ Utility file for all visualization related functions
import ast
import collections
import datetime
import os
import pickle
from typing import NoReturn
import matplotlib.pyplot as plt
import pandas as pd
@@ -14,22 +16,30 @@ import spacy
from nltk.corpus import stopwords
from wordcloud import STOPWORDS, WordCloud
en = spacy.load('en_core_web_md')
en = spacy.load("en_core_web_md")
spacy_stopwords = en.Defaults.stop_words
STOPWORDS = set(STOPWORDS).union(set(stopwords.words("english"))). \
union(set(spacy_stopwords))
STOPWORDS = (
set(STOPWORDS).union(set(stopwords.words("english"))).union(set(spacy_stopwords))
)
def create_wordcloud(timestamp, real_time=False):
def create_wordcloud(
timestamp: datetime.datetime.timestamp, real_time: bool = False
) -> NoReturn:
"""
Create a basic word cloud visualization of transcribed text
:return: None. The wordcloud image is saved locally
"""
filename = "transcript"
if real_time:
filename = "real_time_" + filename + "_" + \
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
filename = (
"real_time_"
+ filename
+ "_"
+ timestamp.strftime("%m-%d-%Y_%H:%M:%S")
+ ".txt"
)
else:
filename += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
@@ -38,10 +48,13 @@ def create_wordcloud(timestamp, real_time=False):
# python_mask = np.array(PIL.Image.open("download1.png"))
wordcloud = WordCloud(height=800, width=800,
background_color='white',
stopwords=STOPWORDS,
min_font_size=8).generate(transcription_text)
wordcloud = WordCloud(
height=800,
width=800,
background_color="white",
stopwords=STOPWORDS,
min_font_size=8,
).generate(transcription_text)
# Plot wordcloud and save image
plt.figure(facecolor=None)
@@ -51,15 +64,22 @@ def create_wordcloud(timestamp, real_time=False):
wordcloud = "wordcloud"
if real_time:
wordcloud = "real_time_" + wordcloud + "_" + \
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
wordcloud = (
"real_time_"
+ wordcloud
+ "_"
+ timestamp.strftime("%m-%d-%Y_%H:%M:%S")
+ ".png"
)
else:
wordcloud += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
plt.savefig("./artefacts/" + wordcloud)
def create_talk_diff_scatter_viz(timestamp, real_time=False):
def create_talk_diff_scatter_viz(
timestamp: datetime.datetime.timestamp, real_time: bool = False
) -> NoReturn:
"""
Perform agenda vs transcription diff to see covered topics.
Create a scatter plot of words in topics.
@@ -67,7 +87,7 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
"""
spacy_model = "en_core_web_md"
nlp = spacy.load(spacy_model)
nlp.add_pipe('sentencizer')
nlp.add_pipe("sentencizer")
agenda_topics = []
agenda = []
@@ -80,11 +100,17 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
# Load the transcription with timestamp
if real_time:
filename = "./artefacts/real_time_transcript_with_timestamp_" + \
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
filename = (
"./artefacts/real_time_transcript_with_timestamp_"
+ timestamp.strftime("%m-%d-%Y_%H:%M:%S")
+ ".txt"
)
else:
filename = "./artefacts/transcript_with_timestamp_" + \
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
filename = (
"./artefacts/transcript_with_timestamp_"
+ timestamp.strftime("%m-%d-%Y_%H:%M:%S")
+ ".txt"
)
with open(filename) as file:
transcription_timestamp_text = file.read()
@@ -124,23 +150,33 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
covered_items[agenda[topic_similarities[i][0]]] = True
# top1 match
if i == 0:
ts_to_topic_mapping_top_1[c["timestamp"]] = agenda_topics[topic_similarities[i][0]]
topic_to_ts_mapping_top_1[agenda_topics[topic_similarities[i][0]]].append(c["timestamp"])
ts_to_topic_mapping_top_1[c["timestamp"]] = agenda_topics[
topic_similarities[i][0]
]
topic_to_ts_mapping_top_1[
agenda_topics[topic_similarities[i][0]]
].append(c["timestamp"])
# top2 match
else:
ts_to_topic_mapping_top_2[c["timestamp"]] = agenda_topics[topic_similarities[i][0]]
topic_to_ts_mapping_top_2[agenda_topics[topic_similarities[i][0]]].append(c["timestamp"])
ts_to_topic_mapping_top_2[c["timestamp"]] = agenda_topics[
topic_similarities[i][0]
]
topic_to_ts_mapping_top_2[
agenda_topics[topic_similarities[i][0]]
].append(c["timestamp"])
def create_new_columns(record):
def create_new_columns(record: dict) -> dict:
"""
Accumulate the mapping information into the df
:param record:
:return:
"""
record["ts_to_topic_mapping_top_1"] = \
ts_to_topic_mapping_top_1[record["timestamp"]]
record["ts_to_topic_mapping_top_2"] = \
ts_to_topic_mapping_top_2[record["timestamp"]]
record["ts_to_topic_mapping_top_1"] = ts_to_topic_mapping_top_1[
record["timestamp"]
]
record["ts_to_topic_mapping_top_2"] = ts_to_topic_mapping_top_2[
record["timestamp"]
]
return record
df = df.apply(create_new_columns, axis=1)
@@ -161,19 +197,33 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
# Save df, mappings for further experimentation
df_name = "df"
if real_time:
df_name = "real_time_" + df_name + "_" + \
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
df_name = (
"real_time_"
+ df_name
+ "_"
+ timestamp.strftime("%m-%d-%Y_%H:%M:%S")
+ ".pkl"
)
else:
df_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
df.to_pickle("./artefacts/" + df_name)
my_mappings = [ts_to_topic_mapping_top_1, ts_to_topic_mapping_top_2,
topic_to_ts_mapping_top_1, topic_to_ts_mapping_top_2]
my_mappings = [
ts_to_topic_mapping_top_1,
ts_to_topic_mapping_top_2,
topic_to_ts_mapping_top_1,
topic_to_ts_mapping_top_2,
]
mappings_name = "mappings"
if real_time:
mappings_name = "real_time_" + mappings_name + "_" + \
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
mappings_name = (
"real_time_"
+ mappings_name
+ "_"
+ timestamp.strftime("%m-%d-%Y_%H:%M:%S")
+ ".pkl"
)
else:
mappings_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
pickle.dump(my_mappings, open("./artefacts/" + mappings_name, "wb"))
@@ -197,21 +247,37 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
# Scatter plot of topics
df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))
corpus = st.CorpusFromParsedDocuments(
df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse'
).build().get_unigram_corpus().compact(st.AssociationCompactor(2000))
corpus = (
st.CorpusFromParsedDocuments(
df, category_col="ts_to_topic_mapping_top_1", parsed_col="parse"
)
.build()
.get_unigram_corpus()
.compact(st.AssociationCompactor(2000))
)
html = st.produce_scattertext_explorer(
corpus,
category=cat_1,
category_name=cat_1_name,
not_category_name=cat_2_name,
minimum_term_frequency=0, pmi_threshold_coefficient=0,
width_in_pixels=1000,
transform=st.Scalers.dense_rank
corpus,
category=cat_1,
category_name=cat_1_name,
not_category_name=cat_2_name,
minimum_term_frequency=0,
pmi_threshold_coefficient=0,
width_in_pixels=1000,
transform=st.Scalers.dense_rank,
)
if real_time:
open('./artefacts/real_time_scatter_' +
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + '.html', 'w').write(html)
with open(
"./artefacts/real_time_scatter_"
+ timestamp.strftime("%m-%d-%Y_%H:%M:%S")
+ ".html",
"w",
) as file:
file.write(html)
else:
open('./artefacts/scatter_' +
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + '.html', 'w').write(html)
with open(
"./artefacts/scatter_"
+ timestamp.strftime("%m-%d-%Y_%H:%M:%S")
+ ".html",
"w",
) as file:
file.write(html)

View File

@@ -1,50 +0,0 @@
aiohttp==3.8.5
aiohttp-cors==0.7.0
aioice==0.9.0
aiortc==1.5.0
aiosignal==1.3.1
anyio==3.7.1
async-timeout==4.0.2
attrs==23.1.0
av==10.0.0
certifi==2023.7.22
cffi==1.15.1
charset-normalizer==3.2.0
coloredlogs==15.0.1
cryptography==41.0.2
ctranslate2==3.17.1
dnspython==2.4.0
faster-whisper==0.7.1
filelock==3.12.2
flatbuffers==23.5.26
frozenlist==1.4.0
fsspec==2023.6.0
google-crc32c==1.5.0
h11==0.14.0
httpcore==0.17.3
huggingface-hub==0.16.4
humanfriendly==10.0
idna==3.4
ifaddr==0.2.0
loguru==0.7.0
mpmath==1.3.0
multidict==6.0.4
numpy==1.25.1
onnxruntime==1.15.1
packaging==23.1
protobuf==4.23.4
pycparser==2.21
pyee==11.0.0
pylibsrtp==0.8.0
pyOpenSSL==23.2.0
PyYAML==6.0.1
requests==2.31.0
sniffio==1.3.0
sortedcontainers==2.4.0
sympy==1.12
tokenizers==0.13.3
tqdm==4.65.0
typing_extensions==4.7.1
urllib3==2.0.4
yarl==1.9.2
wave==0.0.2

View File

@@ -1,324 +0,0 @@
import argparse
import asyncio
import datetime
import json
import os
import uuid
import wave
from concurrent.futures import ThreadPoolExecutor
from typing import Union, NoReturn
import aiohttp_cors
import av
import requests
from aiohttp import web
from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription
from aiortc.contrib.media import MediaRelay
from faster_whisper import WhisperModel
from sortedcontainers import SortedDict
from reflector_dataclasses import FinalSummaryResult, ParseLLMResult,\
TitleSummaryInput, TitleSummaryOutput, TranscriptionInput,\
TranscriptionOutput, BlackListedMessages
from utils.run_utils import CONFIG, run_in_executor
from utils.log_utils import LOGGER
pcs = set()
relay = MediaRelay()
data_channel = None
model = WhisperModel("tiny", device="cpu",
compute_type="float32",
num_workers=12)
CHANNELS = 2
RATE = 48000
audio_buffer = av.AudioFifo()
executor = ThreadPoolExecutor()
transcription_text = ""
last_transcribed_time = 0.0
LLM_MACHINE_IP = CONFIG["LLM"]["LLM_MACHINE_IP"]
LLM_MACHINE_PORT = CONFIG["LLM"]["LLM_MACHINE_PORT"]
LLM_URL = f"http://{LLM_MACHINE_IP}:{LLM_MACHINE_PORT}/api/v1/generate"
incremental_responses = []
sorted_transcripts = SortedDict()
def parse_llm_output(param: TitleSummaryInput, response: requests.Response) -> Union[None, ParseLLMResult]:
try:
output = json.loads(response.json()["results"][0]["text"])
return ParseLLMResult(param, output)
except Exception as e:
LOGGER.info("Exception" + str(e))
return None
def get_title_and_summary(param: TitleSummaryInput) -> Union[None, TitleSummaryOutput]:
LOGGER.info("Generating title and summary")
# TODO : Handle unexpected output formats from the model
try:
response = requests.post(LLM_URL,
headers=param.headers,
json=param.data)
output = parse_llm_output(param, response)
if output:
result = output.get_result()
incremental_responses.append(result)
return TitleSummaryOutput(incremental_responses)
except Exception as e:
LOGGER.info("Exception" + str(e))
return None
def channel_log(channel, t: str, message: str) -> NoReturn:
LOGGER.info("channel(%s) %s %s" % (channel.label, t, message))
def channel_send(channel, message: str) -> NoReturn:
if channel:
channel.send(message)
def channel_send_increment(channel, param: Union[FinalSummaryResult, TitleSummaryOutput]) -> NoReturn:
if channel and param:
message = param.get_result()
channel.send(json.dumps(message))
def channel_send_transcript(channel) -> NoReturn:
# channel_log(channel, ">", message)
if channel:
try:
least_time = next(iter(sorted_transcripts))
message = sorted_transcripts[least_time].get_result()
if message:
del sorted_transcripts[least_time]
if message["text"] not in BlackListedMessages.messages:
channel.send(json.dumps(message))
# Due to exceptions if one of the earlier batches can't return
# a transcript, we don't want to be stuck waiting for the result
# With the threshold size of 3, we pop the first(lost) element
else:
if len(sorted_transcripts) >= 3:
del sorted_transcripts[least_time]
except Exception as exception:
LOGGER.info("Exception", str(exception))
def get_transcription(input_frames: TranscriptionInput) -> Union[None, TranscriptionOutput]:
LOGGER.info("Transcribing..")
sorted_transcripts[input_frames.frames[0].time] = None
# TODO: Find cleaner way, watch "no transcription" issue below
# Passing IO objects instead of temporary files throws an error
# Passing ndarray (type casted with float) does not give any
# transcription. Refer issue,
# https://github.com/guillaumekln/faster-whisper/issues/369
audio_file = "test" + str(datetime.datetime.now())
wf = wave.open(audio_file, "wb")
wf.setnchannels(CHANNELS)
wf.setframerate(RATE)
wf.setsampwidth(2)
for frame in input_frames.frames:
wf.writeframes(b"".join(frame.to_ndarray()))
wf.close()
result_text = ""
try:
segments, _ = \
model.transcribe(audio_file,
language="en",
beam_size=5,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500})
os.remove(audio_file)
segments = list(segments)
result_text = ""
duration = 0.0
for segment in segments:
result_text += segment.text
start_time = segment.start
end_time = segment.end
if not segment.start:
start_time = 0.0
if not segment.end:
end_time = 5.5
duration += (end_time - start_time)
global last_transcribed_time, transcription_text
last_transcribed_time += duration
transcription_text += result_text
except Exception as exception:
LOGGER.info("Exception" + str(exception))
result = TranscriptionOutput(result_text)
sorted_transcripts[input_frames.frames[0].time] = result
return result
def get_final_summary_response() -> FinalSummaryResult:
"""
Collate the incremental summaries generated so far and return as the final
summary
:return:
"""
final_summary = ""
# Collate inc summaries
for topic in incremental_responses:
final_summary += topic["description"]
response = FinalSummaryResult(final_summary, last_transcribed_time)
with open("./artefacts/meeting_titles_and_summaries.txt", "a",
encoding="utf-8") as file:
file.write(json.dumps(incremental_responses))
return response
class AudioStreamTrack(MediaStreamTrack):
"""
An audio stream track.
"""
kind = "audio"
def __init__(self, track):
super().__init__()
self.track = track
async def recv(self) -> av.audio.frame.AudioFrame:
global transcription_text
frame = await self.track.recv()
audio_buffer.write(frame)
if local_frames := audio_buffer.read_many(256 * 960, partial=False):
whisper_result = run_in_executor(
get_transcription,
TranscriptionInput(local_frames),
executor=executor
)
whisper_result.add_done_callback(
lambda f: channel_send_transcript(data_channel)
if f.result()
else None
)
if len(transcription_text) > 25:
llm_input_text = transcription_text
transcription_text = ""
param = TitleSummaryInput(input_text=llm_input_text,
transcribed_time=last_transcribed_time)
llm_result = run_in_executor(get_title_and_summary,
param,
executor=executor)
llm_result.add_done_callback(
lambda f: channel_send_increment(data_channel,
llm_result.result())
if f.result()
else None
)
return frame
async def offer(request: requests.Request) -> web.Response:
"""
Establish the WebRTC connection with the client
:param request:
:return:
"""
params = await request.json()
offer = RTCSessionDescription(sdp=params["sdp"], type=params["type"])
pc = RTCPeerConnection()
pc_id = "PeerConnection(%s)" % uuid.uuid4()
pcs.add(pc)
def log_info(msg, *args) -> NoReturn:
LOGGER.info(pc_id + " " + msg, *args)
log_info("Created for " + request.remote)
@pc.on("datachannel")
def on_datachannel(channel) -> NoReturn:
global data_channel
data_channel = channel
channel_log(channel, "-", "created by remote party")
@channel.on("message")
def on_message(message: str) -> NoReturn:
channel_log(channel, "<", message)
if json.loads(message)["cmd"] == "STOP":
# Placeholder final summary
response = get_final_summary_response()
channel_send_increment(data_channel, response)
# To-do Add code to stop connection from server side here
# But have to handshake with client once
if isinstance(message, str) and message.startswith("ping"):
channel_send(channel, "pong" + message[4:])
@pc.on("connectionstatechange")
async def on_connectionstatechange() -> NoReturn:
log_info("Connection state is " + pc.connectionState)
if pc.connectionState == "failed":
await pc.close()
pcs.discard(pc)
@pc.on("track")
def on_track(track) -> NoReturn:
log_info("Track " + track.kind + " received")
pc.addTrack(AudioStreamTrack(relay.subscribe(track)))
await pc.setRemoteDescription(offer)
answer = await pc.createAnswer()
await pc.setLocalDescription(answer)
return web.Response(
content_type="application/json",
text=json.dumps(
{
"sdp": pc.localDescription.sdp,
"type": pc.localDescription.type
}
),
)
async def on_shutdown(application: web.Application) -> NoReturn:
coroutines = [pc.close() for pc in pcs]
await asyncio.gather(*coroutines)
pcs.clear()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="WebRTC based server for Reflector"
)
parser.add_argument(
"--host", default="0.0.0.0", help="Server host IP (def: 0.0.0.0)"
)
parser.add_argument(
"--port", type=int, default=1250, help="Server port (def: 1250)"
)
args = parser.parse_args()
app = web.Application()
cors = aiohttp_cors.setup(
app,
defaults={
"*": aiohttp_cors.ResourceOptions(
allow_credentials=True,
expose_headers="*",
allow_headers="*"
)
},
)
offer_resource = cors.add(app.router.add_resource("/offer"))
cors.add(offer_resource.add_route("POST", offer))
app.on_shutdown.append(on_shutdown)
web.run_app(app, access_log=None, host=args.host, port=args.port)

Binary file not shown.

View File

@@ -0,0 +1,63 @@
import pytest
from unittest.mock import patch
@pytest.mark.asyncio
async def test_basic_rtc_server(aiohttp_server, event_loop):
# goal is to start the server, and send rtc audio to it
# validate the events received
import argparse
import json
from pathlib import Path
from reflector.server import create_app
from reflector.stream_client import StreamClient
from reflector.models import TitleSummaryOutput
from aiortc.contrib.signaling import add_signaling_arguments, create_signaling
# customize settings to have a mock LLM server
with patch("reflector.server.get_title_and_summary") as mock_llm:
# any response from mock_llm will be test topic
mock_llm.return_value = TitleSummaryOutput(["topic_test"])
# create the server
app = create_app()
server = await aiohttp_server(app)
url = f"http://{server.host}:{server.port}/offer"
# create signaling
parser = argparse.ArgumentParser()
add_signaling_arguments(parser)
args = parser.parse_args(["-s", "tcp-socket"])
signaling = create_signaling(args)
# create the client
path = Path(__file__).parent / "records" / "test_mathieu_hello.wav"
client = StreamClient(signaling, url=url, play_from=path.as_posix())
await client.start()
# we just want the first transcription
# and topic update messages
marks = {
"SHOW_TRANSCRIPTION": False,
"UPDATE_TOPICS": False,
}
async for rawmsg in client.get_reader():
msg = json.loads(rawmsg)
cmd = msg["cmd"]
if cmd == "SHOW_TRANSCRIPTION":
assert "text" in msg
assert "want to share my incredible experience" in msg["text"]
elif cmd == "UPDATE_TOPICS":
assert "topics" in msg
assert "topic_test" in msg["topics"]
marks[cmd] = True
# break if we have all the events we need
if all(marks.values()):
break
# stop the server
await server.close()
await client.stop()

View File

@@ -1,26 +0,0 @@
"""
Utility file for logging
"""
import loguru
class SingletonLogger:
"""
Use Singleton design pattern to create a logger object and share it
across the entire project
"""
__instance = None
@staticmethod
def get_logger():
"""
Create or return the singleton instance for the SingletonLogger class
:return: SingletonLogger instance
"""
if not SingletonLogger.__instance:
SingletonLogger.__instance = loguru.logger
return SingletonLogger.__instance
LOGGER = SingletonLogger.get_logger()

61
www/Dockerfile Normal file
View File

@@ -0,0 +1,61 @@
#syntax=docker/dockerfile:1.4
FROM node:18-alpine AS base
# Install dependencies only when needed
FROM base AS deps
# Check https://github.com/nodejs/docker-node/tree/b4117f9333da4138b03a546ec926ef50a31506c3#nodealpine to understand why libc6-compat might be needed.
RUN apk add --no-cache libc6-compat
WORKDIR /app
# Install dependencies based on the preferred package manager
COPY --link package.json yarn.lock* package-lock.json* pnpm-lock.yaml* ./
RUN \
if [ -f yarn.lock ]; then yarn --frozen-lockfile; \
elif [ -f package-lock.json ]; then npm ci; \
elif [ -f pnpm-lock.yaml ]; then yarn global add pnpm && pnpm i --frozen-lockfile; \
else echo "Lockfile not found." && exit 1; \
fi
# Rebuild the source code only when needed
FROM base AS builder
WORKDIR /app
COPY --from=deps --link /app/node_modules ./node_modules
COPY --link . .
# Next.js collects completely anonymous telemetry data about general usage.
# Learn more here: https://nextjs.org/telemetry
# Uncomment the following line in case you want to disable telemetry during the build.
ENV NEXT_TELEMETRY_DISABLED 1
# If using npm comment out above and use below instead
RUN yarn build
# RUN npm run build
# Production image, copy all the files and run next
FROM base AS runner
WORKDIR /app
ENV NODE_ENV production
# Uncomment the following line in case you want to disable telemetry during runtime.
# ENV NEXT_TELEMETRY_DISABLED 1
RUN \
addgroup --system --gid 1001 nodejs; \
adduser --system --uid 1001 nextjs
COPY --from=builder --link /app/public ./public
# Automatically leverage output traces to reduce image size
# https://nextjs.org/docs/advanced-features/output-file-tracing
COPY --from=builder --link --chown=1001:1001 /app/.next/standalone ./
COPY --from=builder --link --chown=1001:1001 /app/.next/static ./.next/static
USER nextjs
EXPOSE 3000
ENV PORT 3000
ENV HOSTNAME localhost
CMD ["node", "server.js"]

View File

@@ -21,7 +21,7 @@ Reflector is a React application that uses WebRTC to stream audio from the brows
To install the application, run:
```bash
npm install
yarn install
```
## Run the Application
@@ -29,7 +29,7 @@ npm install
To run the application in development mode, run:
```bash
npm run dev
yarn run dev
```
Then open [http://localhost:3000](http://localhost:3000) to view it in the browser.

11
www/docker-compose.yml Normal file
View File

@@ -0,0 +1,11 @@
version: "3.9"
services:
server:
build:
context: .
ports:
- 80:80
environment:
LLM_URL: "${LLM_URL}"
volumes:
- model-cache:/root/.cache

View File

@@ -1,4 +1,6 @@
/** @type {import('next').NextConfig} */
const nextConfig = {};
const nextConfig = {
output: 'standalone',
};
module.exports = nextConfig;

1725
www/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -15,6 +15,7 @@
"@fortawesome/react-fontawesome": "^0.2.0",
"autoprefixer": "10.4.14",
"fontawesome": "^5.6.3",
"jest-worker": "^29.6.2",
"next": "^13.4.9",
"postcss": "8.4.25",
"react": "^18.2.0",

View File

@@ -12,7 +12,7 @@
resolved "https://registry.npmjs.org/@fortawesome/fontawesome-common-types/-/fontawesome-common-types-6.4.0.tgz"
integrity sha512-HNii132xfomg5QVZw0HwXXpN22s7VBHQBv9CeOu9tfJnhsWQNd2lmTNi8CSrnw5B+5YOmzu1UoPAyxaXsJ6RgQ==
"@fortawesome/fontawesome-svg-core@^6.4.0", "@fortawesome/fontawesome-svg-core@~1 || ~6":
"@fortawesome/fontawesome-svg-core@^6.4.0":
version "6.4.0"
resolved "https://registry.npmjs.org/@fortawesome/fontawesome-svg-core/-/fontawesome-svg-core-6.4.0.tgz"
integrity sha512-Bertv8xOiVELz5raB2FlXDPKt+m94MQ3JgDfsVbrqNpLU9+UE2E18GKjLKw+d3XbeYPqg1pzyQKGsrzbw+pPaw==
@@ -33,6 +33,25 @@
dependencies:
prop-types "^15.8.1"
"@jest/schemas@^29.6.0":
version "29.6.0"
resolved "https://registry.yarnpkg.com/@jest/schemas/-/schemas-29.6.0.tgz#0f4cb2c8e3dca80c135507ba5635a4fd755b0040"
integrity sha512-rxLjXyJBTL4LQeJW3aKo0M/+GkCOXsO+8i9Iu7eDb6KwtP65ayoDsitrdPBtujxQ88k4wI2FNYfa6TOGwSn6cQ==
dependencies:
"@sinclair/typebox" "^0.27.8"
"@jest/types@^29.6.1":
version "29.6.1"
resolved "https://registry.yarnpkg.com/@jest/types/-/types-29.6.1.tgz#ae79080278acff0a6af5eb49d063385aaa897bf2"
integrity sha512-tPKQNMPuXgvdOn2/Lg9HNfUvjYVGolt04Hp03f5hAk878uwOLikN+JzeLY0HcVgKgFl9Hs3EIqpu3WX27XNhnw==
dependencies:
"@jest/schemas" "^29.6.0"
"@types/istanbul-lib-coverage" "^2.0.0"
"@types/istanbul-reports" "^3.0.0"
"@types/node" "*"
"@types/yargs" "^17.0.8"
chalk "^4.0.0"
"@jridgewell/gen-mapping@^0.3.2":
version "0.3.3"
resolved "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.3.tgz"
@@ -52,16 +71,16 @@
resolved "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.1.2.tgz"
integrity sha512-xnkseuNADM0gt2bs+BvhO0p78Mk762YnZdsuzFV018NoG1Sj1SCQvpSqa7XUaTam5vAGasABV9qXASMKnFMwMw==
"@jridgewell/sourcemap-codec@^1.4.10":
version "1.4.15"
resolved "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz"
integrity sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==
"@jridgewell/sourcemap-codec@1.4.14":
version "1.4.14"
resolved "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.14.tgz"
integrity sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw==
"@jridgewell/sourcemap-codec@^1.4.10":
version "1.4.15"
resolved "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz"
integrity sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==
"@jridgewell/trace-mapping@^0.3.9":
version "0.3.18"
resolved "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.18.tgz"
@@ -80,6 +99,46 @@
resolved "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-13.4.9.tgz"
integrity sha512-TVzGHpZoVBk3iDsTOQA/R6MGmFp0+17SWXMEWd6zG30AfuELmSSMe2SdPqxwXU0gbpWkJL1KgfLzy5ReN0crqQ==
"@next/swc-darwin-x64@13.4.9":
version "13.4.9"
resolved "https://registry.yarnpkg.com/@next/swc-darwin-x64/-/swc-darwin-x64-13.4.9.tgz#a08fccdee68201522fe6618ec81f832084b222f8"
integrity sha512-aSfF1fhv28N2e7vrDZ6zOQ+IIthocfaxuMWGReB5GDriF0caTqtHttAvzOMgJgXQtQx6XhyaJMozLTSEXeNN+A==
"@next/swc-linux-arm64-gnu@13.4.9":
version "13.4.9"
resolved "https://registry.yarnpkg.com/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-13.4.9.tgz#1798c2341bb841e96521433eed00892fb24abbd1"
integrity sha512-JhKoX5ECzYoTVyIy/7KykeO4Z2lVKq7HGQqvAH+Ip9UFn1MOJkOnkPRB7v4nmzqAoY+Je05Aj5wNABR1N18DMg==
"@next/swc-linux-arm64-musl@13.4.9":
version "13.4.9"
resolved "https://registry.yarnpkg.com/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-13.4.9.tgz#cee04c51610eddd3638ce2499205083656531ea0"
integrity sha512-OOn6zZBIVkm/4j5gkPdGn4yqQt+gmXaLaSjRSO434WplV8vo2YaBNbSHaTM9wJpZTHVDYyjzuIYVEzy9/5RVZw==
"@next/swc-linux-x64-gnu@13.4.9":
version "13.4.9"
resolved "https://registry.yarnpkg.com/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-13.4.9.tgz#1932d0367916adbc6844b244cda1d4182bd11f7a"
integrity sha512-iA+fJXFPpW0SwGmx/pivVU+2t4zQHNOOAr5T378PfxPHY6JtjV6/0s1vlAJUdIHeVpX98CLp9k5VuKgxiRHUpg==
"@next/swc-linux-x64-musl@13.4.9":
version "13.4.9"
resolved "https://registry.yarnpkg.com/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-13.4.9.tgz#a66aa8c1383b16299b72482f6360facd5cde3c7a"
integrity sha512-rlNf2WUtMM+GAQrZ9gMNdSapkVi3koSW3a+dmBVp42lfugWVvnyzca/xJlN48/7AGx8qu62WyO0ya1ikgOxh6A==
"@next/swc-win32-arm64-msvc@13.4.9":
version "13.4.9"
resolved "https://registry.yarnpkg.com/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-13.4.9.tgz#39482ee856c867177a612a30b6861c75e0736a4a"
integrity sha512-5T9ybSugXP77nw03vlgKZxD99AFTHaX8eT1ayKYYnGO9nmYhJjRPxcjU5FyYI+TdkQgEpIcH7p/guPLPR0EbKA==
"@next/swc-win32-ia32-msvc@13.4.9":
version "13.4.9"
resolved "https://registry.yarnpkg.com/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-13.4.9.tgz#29db85e34b597ade1a918235d16a760a9213c190"
integrity sha512-ojZTCt1lP2ucgpoiFgrFj07uq4CZsq4crVXpLGgQfoFq00jPKRPgesuGPaz8lg1yLfvafkU3Jd1i8snKwYR3LA==
"@next/swc-win32-x64-msvc@13.4.9":
version "13.4.9"
resolved "https://registry.yarnpkg.com/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-13.4.9.tgz#0c2758164cccd61bc5a1c6cd8284fe66173e4a2b"
integrity sha512-QbT03FXRNdpuL+e9pLnu+XajZdm/TtIXVYY4lA9t+9l0fLZbHXDYEKitAqxrOj37o3Vx5ufxiRAniaIebYDCgw==
"@nodelib/fs.scandir@2.1.5":
version "2.1.5"
resolved "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz"
@@ -88,7 +147,7 @@
"@nodelib/fs.stat" "2.0.5"
run-parallel "^1.1.9"
"@nodelib/fs.stat@^2.0.2", "@nodelib/fs.stat@2.0.5":
"@nodelib/fs.stat@2.0.5", "@nodelib/fs.stat@^2.0.2":
version "2.0.5"
resolved "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz"
integrity sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==
@@ -101,6 +160,11 @@
"@nodelib/fs.scandir" "2.1.5"
fastq "^1.6.0"
"@sinclair/typebox@^0.27.8":
version "0.27.8"
resolved "https://registry.yarnpkg.com/@sinclair/typebox/-/typebox-0.27.8.tgz#6667fac16c436b5434a387a34dedb013198f6e6e"
integrity sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==
"@swc/helpers@0.5.1":
version "0.5.1"
resolved "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.1.tgz"
@@ -108,6 +172,49 @@
dependencies:
tslib "^2.4.0"
"@types/istanbul-lib-coverage@*", "@types/istanbul-lib-coverage@^2.0.0":
version "2.0.4"
resolved "https://registry.yarnpkg.com/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.4.tgz#8467d4b3c087805d63580480890791277ce35c44"
integrity sha512-z/QT1XN4K4KYuslS23k62yDIDLwLFkzxOuMplDtObz0+y7VqJCaO2o+SPwHCvLFZh7xazvvoor2tA/hPz9ee7g==
"@types/istanbul-lib-report@*":
version "3.0.0"
resolved "https://registry.yarnpkg.com/@types/istanbul-lib-report/-/istanbul-lib-report-3.0.0.tgz#c14c24f18ea8190c118ee7562b7ff99a36552686"
integrity sha512-plGgXAPfVKFoYfa9NpYDAkseG+g6Jr294RqeqcqDixSbU34MZVJRi/P+7Y8GDpzkEwLaGZZOpKIEmeVZNtKsrg==
dependencies:
"@types/istanbul-lib-coverage" "*"
"@types/istanbul-reports@^3.0.0":
version "3.0.1"
resolved "https://registry.yarnpkg.com/@types/istanbul-reports/-/istanbul-reports-3.0.1.tgz#9153fe98bba2bd565a63add9436d6f0d7f8468ff"
integrity sha512-c3mAZEuK0lvBp8tmuL74XRKn1+y2dcwOUpH7x4WrF6gk1GIgiluDRgMYQtw2OFcBvAJWlt6ASU3tSqxp0Uu0Aw==
dependencies:
"@types/istanbul-lib-report" "*"
"@types/node@*":
version "20.4.5"
resolved "https://registry.yarnpkg.com/@types/node/-/node-20.4.5.tgz#9dc0a5cb1ccce4f7a731660935ab70b9c00a5d69"
integrity sha512-rt40Nk13II9JwQBdeYqmbn2Q6IVTA5uPhvSO+JVqdXw/6/4glI6oR9ezty/A9Hg5u7JH4OmYmuQ+XvjKm0Datg==
"@types/yargs-parser@*":
version "21.0.0"
resolved "https://registry.yarnpkg.com/@types/yargs-parser/-/yargs-parser-21.0.0.tgz#0c60e537fa790f5f9472ed2776c2b71ec117351b"
integrity sha512-iO9ZQHkZxHn4mSakYV0vFHAVDyEOIJQrV2uZ06HxEPcx+mt8swXoZHIbaaJ2crJYFfErySgktuTZ3BeLz+XmFA==
"@types/yargs@^17.0.8":
version "17.0.24"
resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-17.0.24.tgz#b3ef8d50ad4aa6aecf6ddc97c580a00f5aa11902"
integrity sha512-6i0aC7jV6QzQB8ne1joVZ0eSFIstHsCrobmOtghM11yGlH0j43FKL2UhWdELkyps0zuf7qVTUVCCR+tgSlyLLw==
dependencies:
"@types/yargs-parser" "*"
ansi-styles@^4.1.0:
version "4.3.0"
resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937"
integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==
dependencies:
color-convert "^2.0.1"
any-promise@^1.0.0:
version "1.3.0"
resolved "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz"
@@ -168,7 +275,7 @@ braces@^3.0.2, braces@~3.0.2:
dependencies:
fill-range "^7.0.1"
browserslist@^4.21.5, "browserslist@>= 4.21.0":
browserslist@^4.21.5:
version "4.21.9"
resolved "https://registry.npmjs.org/browserslist/-/browserslist-4.21.9.tgz"
integrity sha512-M0MFoZzbUrRU4KNfCrDLnvyE7gub+peetoTid3TBIqtunaDJyXlwhakT+/VkvSXcfIzFfK/nkCs4nmyTmxdNSg==
@@ -203,7 +310,15 @@ caniuse-lite@^1.0.30001406, caniuse-lite@^1.0.30001464, caniuse-lite@^1.0.300015
resolved "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001515.tgz"
integrity sha512-eEFDwUOZbE24sb+Ecsx3+OvNETqjWIdabMy52oOkIgcUtAsQifjUG9q4U9dgTHJM2mfk4uEPxc0+xuFdJ629QA==
chokidar@^3.5.3, "chokidar@>=3.0.0 <4.0.0":
chalk@^4.0.0:
version "4.1.2"
resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.2.tgz#aac4e2b7734a740867aeb16bf02aad556a1e7a01"
integrity sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==
dependencies:
ansi-styles "^4.1.0"
supports-color "^7.1.0"
"chokidar@>=3.0.0 <4.0.0", chokidar@^3.5.3:
version "3.5.3"
resolved "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz"
integrity sha512-Dr3sfKRP6oTcjf2JmUmFJfeVMvXBdegxB0iVQ5eb2V10uFJUCAS8OByZdVAyVb8xXNz3GjjTgj9kLWsZTqE6kw==
@@ -218,6 +333,11 @@ chokidar@^3.5.3, "chokidar@>=3.0.0 <4.0.0":
optionalDependencies:
fsevents "~2.3.2"
ci-info@^3.2.0:
version "3.8.0"
resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-3.8.0.tgz#81408265a5380c929f0bc665d62256628ce9ef91"
integrity sha512-eXTggHWSooYhq49F2opQhuHWgzucfF2YgODK4e1566GQs5BIfP30B0oenwBJHfWxAs2fyPB1s7Mg949zLf61Yw==
classnames@^2.2.3:
version "2.3.2"
resolved "https://registry.npmjs.org/classnames/-/classnames-2.3.2.tgz"
@@ -228,6 +348,18 @@ client-only@0.0.1:
resolved "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz"
integrity sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==
color-convert@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3"
integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==
dependencies:
color-name "~1.1.4"
color-name@~1.1.4:
version "1.1.4"
resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2"
integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
commander@^4.0.0:
version "4.1.1"
resolved "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz"
@@ -330,7 +462,7 @@ get-browser-rtc@^1.1.0:
resolved "https://registry.npmjs.org/get-browser-rtc/-/get-browser-rtc-1.1.0.tgz"
integrity sha512-MghbMJ61EJrRsDe7w1Bvqt3ZsBuqhce5nrn/XAwgwOXhcsz53/ltdxOse1h/8eKXj5slzxdsz56g5rzOFSGwfQ==
glob-parent@^5.1.2:
glob-parent@^5.1.2, glob-parent@~5.1.2:
version "5.1.2"
resolved "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz"
integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==
@@ -344,13 +476,6 @@ glob-parent@^6.0.2:
dependencies:
is-glob "^4.0.3"
glob-parent@~5.1.2:
version "5.1.2"
resolved "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz"
integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==
dependencies:
is-glob "^4.0.1"
glob-to-regexp@^0.4.1:
version "0.4.1"
resolved "https://registry.npmjs.org/glob-to-regexp/-/glob-to-regexp-0.4.1.tgz"
@@ -368,11 +493,16 @@ glob@7.1.6:
once "^1.3.0"
path-is-absolute "^1.0.0"
graceful-fs@^4.1.2:
graceful-fs@^4.1.2, graceful-fs@^4.2.9:
version "4.2.11"
resolved "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz"
resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3"
integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==
has-flag@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b"
integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==
has@^1.0.3:
version "1.0.3"
resolved "https://registry.npmjs.org/has/-/has-1.0.3.tgz"
@@ -398,7 +528,7 @@ inflight@^1.0.4:
once "^1.3.0"
wrappy "1"
inherits@^2.0.3, inherits@2:
inherits@2, inherits@^2.0.3:
version "2.0.4"
resolved "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz"
integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
@@ -434,6 +564,28 @@ is-number@^7.0.0:
resolved "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz"
integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==
jest-util@^29.6.2:
version "29.6.2"
resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-29.6.2.tgz#8a052df8fff2eebe446769fd88814521a517664d"
integrity sha512-3eX1qb6L88lJNCFlEADKOkjpXJQyZRiavX1INZ4tRnrBVr2COd3RgcTLyUiEXMNBlDU/cgYq6taUS0fExrWW4w==
dependencies:
"@jest/types" "^29.6.1"
"@types/node" "*"
chalk "^4.0.0"
ci-info "^3.2.0"
graceful-fs "^4.2.9"
picomatch "^2.2.3"
jest-worker@^29.6.2:
version "29.6.2"
resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-29.6.2.tgz#682fbc4b6856ad0aa122a5403c6d048b83f3fb44"
integrity sha512-l3ccBOabTdkng8I/ORCkADz4eSMKejTYv1vB/Z83UiubqhC1oQ5Li6dWCyqOIvSifGjUBxuvxvlm6KGK2DtuAQ==
dependencies:
"@types/node" "*"
jest-util "^29.6.2"
merge-stream "^2.0.0"
supports-color "^8.0.0"
jiti@^1.18.2:
version "1.19.1"
resolved "https://registry.npmjs.org/jiti/-/jiti-1.19.1.tgz"
@@ -461,6 +613,11 @@ loose-envify@^1.1.0, loose-envify@^1.4.0:
dependencies:
js-tokens "^3.0.0 || ^4.0.0"
merge-stream@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/merge-stream/-/merge-stream-2.0.0.tgz#52823629a14dd00c9770fb6ad47dc6310f2c1f60"
integrity sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==
merge2@^1.3.0:
version "1.4.1"
resolved "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz"
@@ -571,9 +728,9 @@ picocolors@^1.0.0:
resolved "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz"
integrity sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==
picomatch@^2.0.4, picomatch@^2.2.1, picomatch@^2.3.1:
picomatch@^2.0.4, picomatch@^2.2.1, picomatch@^2.2.3, picomatch@^2.3.1:
version "2.3.1"
resolved "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz"
resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42"
integrity sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==
pify@^2.3.0:
@@ -630,15 +787,6 @@ postcss-value-parser@^4.0.0, postcss-value-parser@^4.2.0:
resolved "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz"
integrity sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==
postcss@^8.0.0, postcss@^8.1.0, postcss@^8.2.14, postcss@^8.4.21, postcss@^8.4.23, postcss@>=8.0.9, postcss@8.4.25:
version "8.4.25"
resolved "https://registry.npmjs.org/postcss/-/postcss-8.4.25.tgz"
integrity sha512-7taJ/8t2av0Z+sQEvNzCkpDynl0tX3uJMCODi6nT3PfASC7dYCWV9aQ+uiCf+KBD4SEFcu+GvJdGdwzQ6OSjCw==
dependencies:
nanoid "^3.3.6"
picocolors "^1.0.0"
source-map-js "^1.0.2"
postcss@8.4.14:
version "8.4.14"
resolved "https://registry.npmjs.org/postcss/-/postcss-8.4.14.tgz"
@@ -648,6 +796,15 @@ postcss@8.4.14:
picocolors "^1.0.0"
source-map-js "^1.0.2"
postcss@8.4.25, postcss@^8.4.23:
version "8.4.25"
resolved "https://registry.npmjs.org/postcss/-/postcss-8.4.25.tgz"
integrity sha512-7taJ/8t2av0Z+sQEvNzCkpDynl0tX3uJMCODi6nT3PfASC7dYCWV9aQ+uiCf+KBD4SEFcu+GvJdGdwzQ6OSjCw==
dependencies:
nanoid "^3.3.6"
picocolors "^1.0.0"
source-map-js "^1.0.2"
prettier@^3.0.0:
version "3.0.0"
resolved "https://registry.npmjs.org/prettier/-/prettier-3.0.0.tgz"
@@ -674,7 +831,7 @@ randombytes@^2.1.0:
dependencies:
safe-buffer "^5.1.0"
"react-dom@^0.14.7 || ^15.0.0-0 || ^16.0.0 || ^17.0.0|| ^18.0.0", react-dom@^18.2.0:
react-dom@^18.2.0:
version "18.2.0"
resolved "https://registry.npmjs.org/react-dom/-/react-dom-18.2.0.tgz"
integrity sha512-6IMTriUmvsjHUjNtEDudZfuDQUoWXVxKHhlEGSk81n4YFS+r/Kl99wXiwlVXtPBtJenozv2P+hxDsw9eA7Xo6g==
@@ -694,7 +851,7 @@ react-is@^16.13.1:
resolved "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz"
integrity sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==
"react@^0.14.7 || ^15.0.0-0 || ^16.0.0 || ^17.0.0 || ^18.0.0", react@^18.2.0, "react@>= 16.8.0 || 17.x.x || ^18.0.0-0", react@>=16.3:
react@^18.2.0:
version "18.2.0"
resolved "https://registry.npmjs.org/react/-/react-18.2.0.tgz"
integrity sha512-/3IjMdb2L9QbBdWiW5e3P2/npwMBaU9mHCSCUzNln0ZCYbcfTsGbTJrU/kGemdH2IWmB2ioZ+zkxtmq6g09fGQ==
@@ -750,7 +907,7 @@ safe-buffer@^5.1.0, safe-buffer@~5.2.0:
resolved "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz"
integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
sass@^1.3.0, sass@^1.63.6:
sass@^1.63.6:
version "1.63.6"
resolved "https://registry.npmjs.org/sass/-/sass-1.63.6.tgz"
integrity sha512-MJuxGMHzaOW7ipp+1KdELtqKbfAWbH7OLIdoSMnVe3EXPMTmxTmlaZDCTsgIpPCs3w99lLo9/zDKkOrJuT5byw==
@@ -779,7 +936,7 @@ simple-peer@^9.11.1:
randombytes "^2.1.0"
readable-stream "^3.6.0"
source-map-js@^1.0.2, "source-map-js@>=0.6.2 <2.0.0":
"source-map-js@>=0.6.2 <2.0.0", source-map-js@^1.0.2:
version "1.0.2"
resolved "https://registry.npmjs.org/source-map-js/-/source-map-js-1.0.2.tgz"
integrity sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw==
@@ -816,6 +973,20 @@ sucrase@^3.32.0:
pirates "^4.0.1"
ts-interface-checker "^0.1.9"
supports-color@^7.1.0:
version "7.2.0"
resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da"
integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==
dependencies:
has-flag "^4.0.0"
supports-color@^8.0.0:
version "8.1.1"
resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-8.1.1.tgz#cd6fc17e28500cff56c1b86c0a7fd4a54a73005c"
integrity sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==
dependencies:
has-flag "^4.0.0"
supports-color@^9.4.0:
version "9.4.0"
resolved "https://registry.npmjs.org/supports-color/-/supports-color-9.4.0.tgz"