# Visualization Experiments

Lets load the data artefacts to local memory. These files are to be downloaded from S3 as the pipeline automatically uploads them to the pre-configured S3 bucket.

In [24]:
from file_util import download_files
import pickle

# Download files from S3 bucket. You can download multiple files at a time by passing a list of names
files_to_download = ["df.pkl", "mappings.pkl"]
download_files(files_to_download)



[32m2023-06-14 18:01:52.751[0m | [1mINFO    [0m | [36mfile_util[0m:[36mdownload_files[0m:[36m36[0m - [1mDownloading file df.pkl[0m
[32m2023-06-14 18:01:56.546[0m | [1mINFO    [0m | [36mfile_util[0m:[36mdownload_files[0m:[36m36[0m - [1mDownloading file mappings.pkl[0m


In [None]:
# Download spacy model for the first time
!spacy download en_core_web_md


In [16]:
import spacy

spaCy_model = "en_core_web_md"
nlp = spacy.load(spaCy_model)
stopwords = nlp.Defaults.stop_words


## Example template 1

## Scatter plot of transcription with Topic modelling

Change the values of "category", "category_name" to one agenda topic and change the value of "not_category_name" and see different plots.

In [17]:
import pandas as pd
import scattertext as st


def plot_topic_modelling_and_word_to_sentence_search(df, cat_1, cat_1_name, cat_2_name):
    df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))

    corpus = st.CorpusFromParsedDocuments(
        df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse'
    ).build().get_unigram_corpus().remove_terms(stopwords, ignore_absences=True).compact(st.AssociationCompactor(2000))
    
    html = st.produce_scattertext_explorer(
        corpus,
        category=cat_1, category_name=cat_1_name, not_category_name=cat_2_name,
        minimum_term_frequency=0, pmi_threshold_coefficient=0,
        width_in_pixels=1000,
        transform=st.Scalers.dense_rank
    )
    open('./demo_compact.html', 'w').write(html)

plot_topic_modelling_and_word_to_sentence_search(df,
                                                cat_1="TAM",
                                                cat_1_name="TAM",
                                                cat_2_name="Churn")

# once you are done, check the generated HTML file


## Timeline visualizer

In [25]:
mappings = pickle.load(open("mappings.pkl", "rb"))

In [26]:
mappings

[{(0.0, 46.16): 'TAM',
  (46.16, 52.32): 'Founders',
  (52.32, 58.0): 'Founders',
  (59.92, 69.92): 'Founders',
  (69.92, 75.6): 'Founders',
  (75.6, 80.48): 'AGENDA',
  (80.48, 86.24): 'Founders',
  (86.24, 91.6): 'Founders',
  (92.32, 98.64): 'Founders',
  (99.36, 103.12): 'Founders',
  (103.12, 110.0): 'Founders',
  (110.0, 114.0): 'Founders',
  (114.0, 125.92): 'Founders',
  (125.92, 129.6): 'Founders',
  (129.6, 134.64): 'TAM',
  (134.64, 140.32): 'TAM',
  (140.32, 144.24): 'Founders',
  (144.24, 148.64): 'Founders',
  (148.64, 156.08): 'Founders',
  (156.08, 167.0): 'Founders',
  (167.0, 186.14): 'Founders',
  (186.14, 188.18): 'Founders',
  (188.18, 191.46): 'Founders',
  (191.46, 193.14): 'TAM',
  (193.14, 196.38): 'Founders',
  (196.38, 200.08): 'TAM',
  (200.08, 202.54): 'Founders',
  (202.54, 226.0): 'Founders',
  (226.0, 228.0): 'Founders',
  (228.0, 230.0): 'TAM',
  (230.0, 236.0): 'TAM',
  (236.0, 240.0): 'Product market fit',
  (240.0, 244.0): 'TAM',
  (244.0, 246.96): '