diff --git a/server/reflector/processors/audio_transcript.py b/server/reflector/processors/audio_transcript.py
index f029b587..3f9dc85b 100644
--- a/server/reflector/processors/audio_transcript.py
+++ b/server/reflector/processors/audio_transcript.py
@@ -1,6 +1,4 @@
-from profanityfilter import ProfanityFilter
 from prometheus_client import Counter, Histogram
-
 from reflector.processors.base import Processor
 from reflector.processors.types import AudioFile, Transcript
 
@@ -40,8 +38,6 @@ class AudioTranscriptProcessor(Processor):
         self.m_transcript_call = self.m_transcript_call.labels(name)
         self.m_transcript_success = self.m_transcript_success.labels(name)
         self.m_transcript_failure = self.m_transcript_failure.labels(name)
-        self.profanity_filter = ProfanityFilter()
-        self.profanity_filter.set_censor("*")
         super().__init__(*args, **kwargs)
 
     async def _push(self, data: AudioFile):
@@ -60,9 +56,3 @@ class AudioTranscriptProcessor(Processor):
 
     async def _transcript(self, data: AudioFile):
         raise NotImplementedError
-
-    def filter_profanity(self, text: str) -> str:
-        """
-        Remove censored words from the transcript
-        """
-        return self.profanity_filter.censor(text)
diff --git a/server/reflector/processors/audio_transcript_modal.py b/server/reflector/processors/audio_transcript_modal.py
index 201ed9d4..23c9d74e 100644
--- a/server/reflector/processors/audio_transcript_modal.py
+++ b/server/reflector/processors/audio_transcript_modal.py
@@ -48,10 +48,7 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
             )
             response.raise_for_status()
             result = response.json()
-            text = result["text"][source_language]
-            text = self.filter_profanity(text)
             transcript = Transcript(
-                text=text,
                 words=[
                     Word(
                         text=word["text"],
diff --git a/server/reflector/processors/audio_transcript_whisper.py b/server/reflector/processors/audio_transcript_whisper.py
index e3bd595b..cd96e01a 100644
--- a/server/reflector/processors/audio_transcript_whisper.py
+++ b/server/reflector/processors/audio_transcript_whisper.py
@@ -30,7 +30,6 @@ class AudioTranscriptWhisperProcessor(AudioTranscriptProcessor):
         ts = data.timestamp
 
         for segment in segments:
-            transcript.text += segment.text
             for word in segment.words:
                 transcript.words.append(
                     Word(
diff --git a/server/reflector/processors/transcript_liner.py b/server/reflector/processors/transcript_liner.py
index c1aa14a0..b4e7b5e3 100644
--- a/server/reflector/processors/transcript_liner.py
+++ b/server/reflector/processors/transcript_liner.py
@@ -36,7 +36,6 @@ class TranscriptLinerProcessor(Processor):
         # cut to the next .
         partial = Transcript(words=[])
         for word in self.transcript.words[:]:
-            partial.text += word.text
             partial.words.append(word)
             if not self.is_sentence_terminated(word.text):
                 continue
diff --git a/server/reflector/processors/types.py b/server/reflector/processors/types.py
index e867becf..686c5785 100644
--- a/server/reflector/processors/types.py
+++ b/server/reflector/processors/types.py
@@ -2,8 +2,12 @@ import io
 import tempfile
 from pathlib import Path
 
+from profanityfilter import ProfanityFilter
 from pydantic import BaseModel, PrivateAttr
 
+profanity_filter = ProfanityFilter()
+profanity_filter.set_censor("*")
+
 
 class AudioFile(BaseModel):
     name: str
@@ -43,13 +47,29 @@ class Word(BaseModel):
     text: str
     start: float
     end: float
+    speaker: int = 0
+
+
+class TranscriptSegment(BaseModel):
+    text: str
+    start: float
+    speaker: int = 0
 
 
 class Transcript(BaseModel):
-    text: str = ""
     translation: str | None = None
     words: list[Word] = None
 
+    @property
+    def raw_text(self):
+        # Uncensored text
+        return "".join([word.text for word in self.words])
+
+    @property
+    def text(self):
+        # Censored text
+        return profanity_filter.censor(self.raw_text).strip()
+
     @property
     def human_timestamp(self):
         minutes = int(self.timestamp / 60)
@@ -74,7 +94,6 @@ class Transcript(BaseModel):
             self.words = other.words
         else:
             self.words.extend(other.words)
-        self.text += other.text
 
     def add_offset(self, offset: float):
         for word in self.words:
@@ -87,6 +106,48 @@ class Transcript(BaseModel):
         ]
         return Transcript(text=self.text, translation=self.translation, words=words)
 
+    def as_segments(self):
+        # from a list of word, create a list of segments
+        # join the word that are less than 2 seconds apart
+        # but separate if the speaker changes, or if the punctuation is a . , ; : ? !
+        segments = []
+        current_segment = None
+        last_word = None
+        BLANK_TIME_SECS = 2
+        MAX_SEGMENT_LENGTH = 80
+        for word in self.words:
+            if current_segment is None:
+                current_segment = TranscriptSegment(
+                    text=word.text,
+                    start=word.start,
+                    speaker=word.speaker,
+                )
+                continue
+            is_blank = False
+            if last_word:
+                is_blank = word.start - last_word.end > BLANK_TIME_SECS
+            if (
+                word.speaker != current_segment.speaker
+                or (
+                    word.text in ".;:?!…"
+                    and len(current_segment.text) > MAX_SEGMENT_LENGTH
+                )
+                or is_blank
+            ):
+                # check which condition triggered
+                segments.append(current_segment)
+                current_segment = TranscriptSegment(
+                    text=word.text,
+                    start=word.start,
+                    speaker=word.speaker,
+                )
+            else:
+                current_segment.text += word.text
+            last_word = word
+        if current_segment:
+            segments.append(current_segment)
+        return segments
+
 
 class TitleSummary(BaseModel):
     title: str
diff --git a/server/reflector/views/transcripts.py b/server/reflector/views/transcripts.py
index a7e01b8c..0a068c17 100644
--- a/server/reflector/views/transcripts.py
+++ b/server/reflector/views/transcripts.py
@@ -49,12 +49,18 @@ class TranscriptText(BaseModel):
     translation: str | None
 
 
+class TranscriptSegmentTopic(BaseModel):
+    speaker: int
+    text: str
+    timestamp: float
+
+
 class TranscriptTopic(BaseModel):
     id: str = Field(default_factory=generate_uuid4)
     title: str
     summary: str
-    transcript: str | None = None
     timestamp: float
+    segments: list[TranscriptSegmentTopic] = []
 
 
 class TranscriptFinalShortSummary(BaseModel):
@@ -523,8 +529,15 @@ async def handle_rtc_event(event: PipelineEvent, args, data):
         topic = TranscriptTopic(
             title=data.title,
             summary=data.summary,
-            transcript=data.transcript.text,
             timestamp=data.timestamp,
+            segments=[
+                TranscriptSegmentTopic(
+                    speaker=segment.speaker,
+                    text=segment.text,
+                    timestamp=segment.start,
+                )
+                for segment in data.transcript.as_segments()
+            ],
         )
         resp = transcript.add_event(event=event, data=topic)
         transcript.upsert_topic(topic)