feat(rooms): add webhook for transcript completion (#578)

* feat(rooms): add webhook notifications for transcript completion

- Add webhook_url and webhook_secret fields to rooms table
- Create Celery task with 24-hour retry window using exponential backoff
- Send transcript metadata, diarized text, topics, and summaries via webhook
- Add HMAC signature verification for webhook security
- Add test endpoint POST /v1/rooms/{room_id}/webhook/test
- Update frontend with webhook configuration UI and test button
- Auto-generate webhook secret if not provided
- Trigger webhook after successful file pipeline processing for room recordings

* style: linting

* fix: remove unwanted files

* fix: update openapi gen

* fix: self-review

* docs: add comprehensive webhook documentation

- Document webhook configuration, events, and payloads
- Include transcript.completed and test event examples
- Add security considerations and best practices
- Provide example webhook receiver implementation
- Document retry policy and signature verification

* fix: remove audio_mp3_url from webhook payload

- Remove audio download URL generation from webhook
- Update documentation to reflect the change
- Keep only frontend_url for accessing transcripts

* docs: remove unwanted section

* fix: correct API method name and type imports for rooms

- Fix v1RoomsRetrieve to v1RoomsGet
- Update Room type to RoomDetails throughout frontend
- Fix type imports in useRoomList, RoomList, RoomTable, and RoomCards

* feat: add show/hide toggle for webhook secret field

- Add eye icon button to reveal/hide webhook secret when editing
- Show password dots when webhook secret is hidden
- Reset visibility state when opening/closing dialog
- Only show toggle button when editing existing room with secret

* fix: resolve event loop conflict in webhook test endpoint

- Extract webhook test logic into shared async function
- Call async function directly from FastAPI endpoint
- Keep Celery task wrapper for background processing
- Fixes RuntimeError: event loop already running

* refactor: remove unnecessary Celery task for webhook testing

- Webhook testing is synchronous and provides immediate feedback
- No need for background processing via Celery
- Keep only the async function called directly from API endpoint

* feat: improve webhook test error messages and display

- Show HTTP status code in error messages
- Parse JSON error responses to extract meaningful messages
- Improved UI layout for webhook test results
- Added colored background for success/error states
- Better text wrapping for long error messages

* docs: adjust doc

* fix: review

* fix: update attempts to match close 24h

* fix: add event_id

* fix: changed to uuid, to have new event_id when reprocess.

* style: linting

* fix: alembic revision
This commit is contained in:
2025-08-29 10:07:49 -06:00
committed by GitHub
parent 6f0c7c1a5e
commit 88ed7cfa78
14 changed files with 1102 additions and 42 deletions

View File

@@ -1,3 +1,4 @@
import secrets
from datetime import datetime, timezone
from sqlite3 import IntegrityError
from typing import Literal
@@ -40,6 +41,8 @@ rooms = sqlalchemy.Table(
sqlalchemy.Column(
"is_shared", sqlalchemy.Boolean, nullable=False, server_default=false()
),
sqlalchemy.Column("webhook_url", sqlalchemy.String),
sqlalchemy.Column("webhook_secret", sqlalchemy.String),
sqlalchemy.Index("idx_room_is_shared", "is_shared"),
)
@@ -59,6 +62,8 @@ class Room(BaseModel):
"none", "prompt", "automatic", "automatic-2nd-participant"
] = "automatic-2nd-participant"
is_shared: bool = False
webhook_url: str = ""
webhook_secret: str = ""
class RoomController:
@@ -107,10 +112,15 @@ class RoomController:
recording_type: str,
recording_trigger: str,
is_shared: bool,
webhook_url: str = "",
webhook_secret: str = "",
):
"""
Add a new room
"""
if webhook_url and not webhook_secret:
webhook_secret = secrets.token_urlsafe(32)
room = Room(
name=name,
user_id=user_id,
@@ -122,6 +132,8 @@ class RoomController:
recording_type=recording_type,
recording_trigger=recording_trigger,
is_shared=is_shared,
webhook_url=webhook_url,
webhook_secret=webhook_secret,
)
query = rooms.insert().values(**room.model_dump())
try:
@@ -134,6 +146,9 @@ class RoomController:
"""
Update a room fields with key/values in values
"""
if values.get("webhook_url") and not values.get("webhook_secret"):
values["webhook_secret"] = secrets.token_urlsafe(32)
query = rooms.update().where(rooms.c.id == room.id).values(**values)
try:
await get_database().execute(query)

View File

@@ -7,6 +7,7 @@ Uses parallel processing for transcription, diarization, and waveform generation
"""
import asyncio
import uuid
from pathlib import Path
import av
@@ -14,7 +15,9 @@ import structlog
from celery import shared_task
from reflector.asynctask import asynctask
from reflector.db.rooms import rooms_controller
from reflector.db.transcripts import (
SourceKind,
Transcript,
TranscriptStatus,
transcripts_controller,
@@ -48,6 +51,7 @@ from reflector.processors.types import (
)
from reflector.settings import settings
from reflector.storage import get_transcripts_storage
from reflector.worker.webhook import send_transcript_webhook
class EmptyPipeline:
@@ -385,7 +389,6 @@ async def task_pipeline_file_process(*, transcript_id: str):
raise Exception(f"Transcript {transcript_id} not found")
pipeline = PipelineMainFile(transcript_id=transcript_id)
try:
await pipeline.set_status(transcript_id, "processing")
@@ -402,3 +405,17 @@ async def task_pipeline_file_process(*, transcript_id: str):
except Exception:
await pipeline.set_status(transcript_id, "error")
raise
# Trigger webhook if this is a room recording with webhook configured
if transcript.source_kind == SourceKind.ROOM and transcript.room_id:
room = await rooms_controller.get_by_id(transcript.room_id)
if room and room.webhook_url:
logger.info(
"Dispatching webhook task",
transcript_id=transcript_id,
room_id=room.id,
webhook_url=room.webhook_url,
)
send_transcript_webhook.delay(
transcript_id, room.id, event_id=uuid.uuid4().hex
)

View File

@@ -15,6 +15,7 @@ from reflector.db.meetings import meetings_controller
from reflector.db.rooms import rooms_controller
from reflector.settings import settings
from reflector.whereby import create_meeting, upload_logo
from reflector.worker.webhook import test_webhook
logger = logging.getLogger(__name__)
@@ -44,6 +45,11 @@ class Room(BaseModel):
is_shared: bool
class RoomDetails(Room):
webhook_url: str
webhook_secret: str
class Meeting(BaseModel):
id: str
room_name: str
@@ -64,6 +70,8 @@ class CreateRoom(BaseModel):
recording_type: str
recording_trigger: str
is_shared: bool
webhook_url: str
webhook_secret: str
class UpdateRoom(BaseModel):
@@ -76,16 +84,26 @@ class UpdateRoom(BaseModel):
recording_type: str
recording_trigger: str
is_shared: bool
webhook_url: str
webhook_secret: str
class DeletionStatus(BaseModel):
status: str
@router.get("/rooms", response_model=Page[Room])
class WebhookTestResult(BaseModel):
success: bool
message: str = ""
error: str = ""
status_code: int | None = None
response_preview: str | None = None
@router.get("/rooms", response_model=Page[RoomDetails])
async def rooms_list(
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
) -> list[Room]:
) -> list[RoomDetails]:
if not user and not settings.PUBLIC_MODE:
raise HTTPException(status_code=401, detail="Not authenticated")
@@ -99,6 +117,18 @@ async def rooms_list(
)
@router.get("/rooms/{room_id}", response_model=RoomDetails)
async def rooms_get(
room_id: str,
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
):
user_id = user["sub"] if user else None
room = await rooms_controller.get_by_id_for_http(room_id, user_id=user_id)
if not room:
raise HTTPException(status_code=404, detail="Room not found")
return room
@router.post("/rooms", response_model=Room)
async def rooms_create(
room: CreateRoom,
@@ -117,10 +147,12 @@ async def rooms_create(
recording_type=room.recording_type,
recording_trigger=room.recording_trigger,
is_shared=room.is_shared,
webhook_url=room.webhook_url,
webhook_secret=room.webhook_secret,
)
@router.patch("/rooms/{room_id}", response_model=Room)
@router.patch("/rooms/{room_id}", response_model=RoomDetails)
async def rooms_update(
room_id: str,
info: UpdateRoom,
@@ -209,3 +241,24 @@ async def rooms_create_meeting(
meeting.host_room_url = ""
return meeting
@router.post("/rooms/{room_id}/webhook/test", response_model=WebhookTestResult)
async def rooms_test_webhook(
room_id: str,
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
):
"""Test webhook configuration by sending a sample payload."""
user_id = user["sub"] if user else None
room = await rooms_controller.get_by_id(room_id)
if not room:
raise HTTPException(status_code=404, detail="Room not found")
if user_id and room.user_id != user_id:
raise HTTPException(
status_code=403, detail="Not authorized to test this room's webhook"
)
result = await test_webhook(room_id)
return WebhookTestResult(**result)

View File

@@ -0,0 +1,258 @@
"""Webhook task for sending transcript notifications."""
import hashlib
import hmac
import json
import uuid
from datetime import datetime, timezone
import httpx
import structlog
from celery import shared_task
from celery.utils.log import get_task_logger
from reflector.db.rooms import rooms_controller
from reflector.db.transcripts import transcripts_controller
from reflector.pipelines.main_live_pipeline import asynctask
from reflector.settings import settings
from reflector.utils.webvtt import topics_to_webvtt
logger = structlog.wrap_logger(get_task_logger(__name__))
def generate_webhook_signature(payload: bytes, secret: str, timestamp: str) -> str:
"""Generate HMAC signature for webhook payload."""
signed_payload = f"{timestamp}.{payload.decode('utf-8')}"
hmac_obj = hmac.new(
secret.encode("utf-8"),
signed_payload.encode("utf-8"),
hashlib.sha256,
)
return hmac_obj.hexdigest()
@shared_task(
bind=True,
max_retries=30,
default_retry_delay=60,
retry_backoff=True,
retry_backoff_max=3600, # Max 1 hour between retries
)
@asynctask
async def send_transcript_webhook(
self,
transcript_id: str,
room_id: str,
event_id: str,
):
log = logger.bind(
transcript_id=transcript_id,
room_id=room_id,
retry_count=self.request.retries,
)
try:
# Fetch transcript and room
transcript = await transcripts_controller.get_by_id(transcript_id)
if not transcript:
log.error("Transcript not found, skipping webhook")
return
room = await rooms_controller.get_by_id(room_id)
if not room:
log.error("Room not found, skipping webhook")
return
if not room.webhook_url:
log.info("No webhook URL configured for room, skipping")
return
# Generate WebVTT content from topics
topics_data = []
if transcript.topics:
# Build topics data with diarized content per topic
for topic in transcript.topics:
topic_webvtt = topics_to_webvtt([topic]) if topic.words else ""
topics_data.append(
{
"title": topic.title,
"summary": topic.summary,
"timestamp": topic.timestamp,
"duration": topic.duration,
"webvtt": topic_webvtt,
}
)
# Build webhook payload
frontend_url = f"{settings.UI_BASE_URL}/transcripts/{transcript.id}"
participants = [
{"id": p.id, "name": p.name, "speaker": p.speaker}
for p in (transcript.participants or [])
]
payload_data = {
"event": "transcript.completed",
"event_id": event_id,
"timestamp": datetime.now(timezone.utc).isoformat(),
"transcript": {
"id": transcript.id,
"room_id": transcript.room_id,
"created_at": transcript.created_at.isoformat(),
"duration": transcript.duration,
"title": transcript.title,
"short_summary": transcript.short_summary,
"long_summary": transcript.long_summary,
"webvtt": transcript.webvtt,
"topics": topics_data,
"participants": participants,
"source_language": transcript.source_language,
"target_language": transcript.target_language,
"status": transcript.status,
"frontend_url": frontend_url,
},
"room": {
"id": room.id,
"name": room.name,
},
}
# Convert to JSON
payload_json = json.dumps(payload_data, separators=(",", ":"))
payload_bytes = payload_json.encode("utf-8")
# Generate signature if secret is configured
headers = {
"Content-Type": "application/json",
"User-Agent": "Reflector-Webhook/1.0",
"X-Webhook-Event": "transcript.completed",
"X-Webhook-Retry": str(self.request.retries),
}
if room.webhook_secret:
timestamp = str(int(datetime.now(timezone.utc).timestamp()))
signature = generate_webhook_signature(
payload_bytes, room.webhook_secret, timestamp
)
headers["X-Webhook-Signature"] = f"t={timestamp},v1={signature}"
# Send webhook with timeout
async with httpx.AsyncClient(timeout=30.0) as client:
log.info(
"Sending webhook",
url=room.webhook_url,
payload_size=len(payload_bytes),
)
response = await client.post(
room.webhook_url,
content=payload_bytes,
headers=headers,
)
response.raise_for_status()
log.info(
"Webhook sent successfully",
status_code=response.status_code,
response_size=len(response.content),
)
except httpx.HTTPStatusError as e:
log.error(
"Webhook failed with HTTP error",
status_code=e.response.status_code,
response_text=e.response.text[:500], # First 500 chars
)
# Don't retry on client errors (4xx)
if 400 <= e.response.status_code < 500:
log.error("Client error, not retrying")
return
# Retry on server errors (5xx)
raise self.retry(exc=e)
except (httpx.ConnectError, httpx.TimeoutException) as e:
# Retry on network errors
log.error("Webhook failed with connection error", error=str(e))
raise self.retry(exc=e)
except Exception as e:
# Retry on unexpected errors
log.exception("Unexpected error in webhook task", error=str(e))
raise self.retry(exc=e)
async def test_webhook(room_id: str) -> dict:
"""
Test webhook configuration by sending a sample payload.
Returns immediately with success/failure status.
This is the shared implementation used by both the API endpoint and Celery task.
"""
try:
room = await rooms_controller.get_by_id(room_id)
if not room:
return {"success": False, "error": "Room not found"}
if not room.webhook_url:
return {"success": False, "error": "No webhook URL configured"}
now = (datetime.now(timezone.utc).isoformat(),)
payload_data = {
"event": "test",
"event_id": uuid.uuid4().hex,
"timestamp": now,
"message": "This is a test webhook from Reflector",
"room": {
"id": room.id,
"name": room.name,
},
}
payload_json = json.dumps(payload_data, separators=(",", ":"))
payload_bytes = payload_json.encode("utf-8")
# Generate headers with signature
headers = {
"Content-Type": "application/json",
"User-Agent": "Reflector-Webhook/1.0",
"X-Webhook-Event": "test",
}
if room.webhook_secret:
timestamp = str(int(datetime.now(timezone.utc).timestamp()))
signature = generate_webhook_signature(
payload_bytes, room.webhook_secret, timestamp
)
headers["X-Webhook-Signature"] = f"t={timestamp},v1={signature}"
# Send test webhook with short timeout
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.post(
room.webhook_url,
content=payload_bytes,
headers=headers,
)
return {
"success": response.is_success,
"status_code": response.status_code,
"message": f"Webhook test {'successful' if response.is_success else 'failed'}",
"response_preview": response.text if response.text else None,
}
except httpx.TimeoutException:
return {
"success": False,
"error": "Webhook request timed out (10 seconds)",
}
except httpx.ConnectError as e:
return {
"success": False,
"error": f"Could not connect to webhook URL: {str(e)}",
}
except Exception as e:
return {
"success": False,
"error": f"Unexpected error: {str(e)}",
}