Compare commits

...

6 Commits

Author SHA1 Message Date
Igor Loskutov
d428f9fa93 file upload is file 2025-08-22 11:48:21 -04:00
52f9f533d7 chore(main): release 0.7.2 (#559) 2025-08-21 21:00:05 -06:00
0c3878ac3c fix: docker image not loading libgomp.so.1 for torch (#560)
On ARM64, the docker iamge crash because torch cannot load libgomp.so.1
-- Look like pytorch does not install the same packages depending the
platform.

AMD64:

/app/.venv/lib/python3.12/site-packages/torch/lib/libgomp.so.1
/app/.venv/lib/python3.12/site-packages/ctranslate2.libs/libgomp-a34b3233.so.1.0.0
/app/.venv/lib/python3.12/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0

ARM64:

/app/.venv/lib/python3.12/site-packages/ctranslate2.libs/libgomp-d22c30c5.so.1.0.0
/app/.venv/lib/python3.12/site-packages/scikit_learn.libs/libgomp-947d5fa1.so.1.0.0
/app/.venv/lib/python3.12/site-packages/torch.libs/libgomp-947d5fa1.so.1.0.0
2025-08-21 16:41:35 -06:00
Igor Loskutov
d70beee51b fix: include shared rooms to search (#558)
* include shared rooms to search

* tests vibe

* tests vibe

* tests vibe

* tests vibe

* tests vibe

* tests vibe

* tests vibe

* remove tests, thats too much
2025-08-21 14:52:29 -04:00
bc5b351d2b chore(main): release 0.7.1 (#557) 2025-08-20 23:23:27 -06:00
Igor Loskutov
07981e8090 fix: webvtt db null expectation mismatch (#556) 2025-08-20 23:22:41 -06:00
6 changed files with 56 additions and 13 deletions

View File

@@ -1,5 +1,20 @@
# Changelog
## [0.7.2](https://github.com/Monadical-SAS/reflector/compare/v0.7.1...v0.7.2) (2025-08-21)
### Bug Fixes
* docker image not loading libgomp.so.1 for torch ([#560](https://github.com/Monadical-SAS/reflector/issues/560)) ([773fccd](https://github.com/Monadical-SAS/reflector/commit/773fccd93e887c3493abc2e4a4864dddce610177))
* include shared rooms to search ([#558](https://github.com/Monadical-SAS/reflector/issues/558)) ([499eced](https://github.com/Monadical-SAS/reflector/commit/499eced3360b84fb3a90e1c8a3b554290d21adc2))
## [0.7.1](https://github.com/Monadical-SAS/reflector/compare/v0.7.0...v0.7.1) (2025-08-21)
### Bug Fixes
* webvtt db null expectation mismatch ([#556](https://github.com/Monadical-SAS/reflector/issues/556)) ([e67ad1a](https://github.com/Monadical-SAS/reflector/commit/e67ad1a4a2054467bfeb1e0258fbac5868aaaf21))
## [0.7.0](https://github.com/Monadical-SAS/reflector/compare/v0.6.1...v0.7.0) (2025-08-21)

View File

@@ -27,4 +27,15 @@ COPY migrations /app/migrations
COPY reflector /app/reflector
WORKDIR /app
# Create symlink for libgomp if it doesn't exist (for ARM64 compatibility)
RUN if [ "$(uname -m)" = "aarch64" ] && [ ! -f /usr/lib/libgomp.so.1 ]; then \
LIBGOMP_PATH=$(find /app/.venv/lib -path "*/torch.libs/libgomp*.so.*" 2>/dev/null | head -n1); \
if [ -n "$LIBGOMP_PATH" ]; then \
ln -sf "$LIBGOMP_PATH" /usr/lib/libgomp.so.1; \
fi \
fi
# Pre-check just to make sure the image will not fail
RUN uv run python -c "import silero_vad.model"
CMD ["./runserver.sh"]

View File

@@ -45,7 +45,7 @@ SearchTotal = Annotated[
SearchTotalBase, Field(description="Total number of search results")
]
WEBVTT_SPEC_HEADER = "WEBVTT\n\n"
WEBVTT_SPEC_HEADER = "WEBVTT"
WebVTTContent = Annotated[
str,
@@ -379,7 +379,13 @@ class SearchController:
)
if params.user_id:
base_query = base_query.where(transcripts.c.user_id == params.user_id)
base_query = base_query.where(
sqlalchemy.or_(
transcripts.c.user_id == params.user_id, rooms.c.is_shared
)
)
else:
base_query = base_query.where(rooms.c.is_shared)
if params.room_id:
base_query = base_query.where(transcripts.c.room_id == params.room_id)
if params.source_kind:

View File

@@ -201,7 +201,7 @@ async def transcripts_create(
user_id = user["sub"] if user else None
return await transcripts_controller.add(
info.name,
source_kind=SourceKind.LIVE,
source_kind=SourceKind.FILE,
source_language=info.source_language,
target_language=info.target_language,
user_id=user_id,

View File

@@ -74,11 +74,12 @@ async def test_empty_transcript_title_only_match():
"share_mode": "private",
"source_kind": "room",
"webvtt": None,
"user_id": "test-user-1",
}
await get_database().execute(transcripts.insert().values(**test_data))
params = SearchParameters(query_text="empty")
params = SearchParameters(query_text="empty", user_id="test-user-1")
results, total = await search_controller.search_transcripts(params)
assert total >= 1
@@ -127,11 +128,12 @@ async def test_search_with_long_summary():
00:00:00.000 --> 00:00:10.000
Basic meeting content without special keywords.""",
"user_id": "test-user-2",
}
await get_database().execute(transcripts.insert().values(**test_data))
params = SearchParameters(query_text="quantum computing")
params = SearchParameters(query_text="quantum computing", user_id="test-user-2")
results, total = await search_controller.search_transcripts(params)
assert total >= 1
@@ -191,23 +193,26 @@ The search feature should support complex queries with ranking.
00:00:30.000 --> 00:00:40.000
We need to implement PostgreSQL tsvector for better performance.""",
"user_id": "test-user-3",
}
await get_database().execute(transcripts.insert().values(**test_data))
params = SearchParameters(query_text="planning")
params = SearchParameters(query_text="planning", user_id="test-user-3")
results, total = await search_controller.search_transcripts(params)
assert total >= 1
found = any(r.id == test_id for r in results)
assert found, "Should find test transcript by title word"
params = SearchParameters(query_text="tsvector")
params = SearchParameters(query_text="tsvector", user_id="test-user-3")
results, total = await search_controller.search_transcripts(params)
assert total >= 1
found = any(r.id == test_id for r in results)
assert found, "Should find test transcript by webvtt content"
params = SearchParameters(query_text="engineering planning")
params = SearchParameters(
query_text="engineering planning", user_id="test-user-3"
)
results, total = await search_controller.search_transcripts(params)
assert total >= 1
found = any(r.id == test_id for r in results)
@@ -220,13 +225,17 @@ We need to implement PostgreSQL tsvector for better performance.""",
assert test_result.duration == 1800.0
assert 0 <= test_result.rank <= 1, "Rank should be normalized to 0-1"
params = SearchParameters(query_text="tsvector OR nosuchword")
params = SearchParameters(
query_text="tsvector OR nosuchword", user_id="test-user-3"
)
results, total = await search_controller.search_transcripts(params)
assert total >= 1
found = any(r.id == test_id for r in results)
assert found, "Should find test transcript with OR query"
params = SearchParameters(query_text='"full-text search"')
params = SearchParameters(
query_text='"full-text search"', user_id="test-user-3"
)
results, total = await search_controller.search_transcripts(params)
assert total >= 1
found = any(r.id == test_id for r in results)

View File

@@ -54,12 +54,13 @@ The robotics project is making good progress.
00:00:20.000 --> 00:00:30.000
We need to consider various implementation approaches.""",
"user_id": "test-user-priority",
}
await get_database().execute(transcripts.insert().values(**test_data))
# Search for "robotics" which appears in both long_summary and webvtt
params = SearchParameters(query_text="robotics")
params = SearchParameters(query_text="robotics", user_id="test-user-priority")
results, total = await search_controller.search_transcripts(params)
assert total >= 1
@@ -131,12 +132,13 @@ Team meeting about general project updates.
00:00:10.000 --> 00:00:20.000
Discussion of timeline and deliverables.""",
"user_id": "test-user-long",
}
await get_database().execute(transcripts.insert().values(**test_data))
# Search for terms only in long_summary
params = SearchParameters(query_text="cryptocurrency")
params = SearchParameters(query_text="cryptocurrency", user_id="test-user-long")
results, total = await search_controller.search_transcripts(params)
found = any(r.id == test_id for r in results)
@@ -151,7 +153,7 @@ Discussion of timeline and deliverables.""",
assert "cryptocurrency" in snippet, "Snippet should contain the search term"
# Search for "yield farming" - a more specific term
params2 = SearchParameters(query_text="yield farming")
params2 = SearchParameters(query_text="yield farming", user_id="test-user-long")
results2, total2 = await search_controller.search_transcripts(params2)
found2 = any(r.id == test_id for r in results2)