mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 12:19:06 +00:00
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5f8209ec",
|
||||
"id": "a5ace857",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Visualization Experiments"
|
||||
@@ -10,7 +10,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f20e9fa1",
|
||||
"id": "9bfc569d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Lets load the data artefacts to local memory. These files are to be downloaded from S3 as the pipeline automatically uploads them to the pre-configured S3 bucket."
|
||||
@@ -18,17 +18,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"id": "16178ad6",
|
||||
"execution_count": 5,
|
||||
"id": "edc584b2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32m2023-06-14 19:24:02.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mfile_util\u001b[0m:\u001b[36mdownload_files\u001b[0m:\u001b[36m36\u001b[0m - \u001b[1mDownloading file df.pkl\u001b[0m\n",
|
||||
"\u001b[32m2023-06-14 19:24:04.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mfile_util\u001b[0m:\u001b[36mdownload_files\u001b[0m:\u001b[36m36\u001b[0m - \u001b[1mDownloading file mappings.pkl\u001b[0m\n",
|
||||
"\u001b[32m2023-06-14 19:24:05.518\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mfile_util\u001b[0m:\u001b[36mdownload_files\u001b[0m:\u001b[36m36\u001b[0m - \u001b[1mDownloading file transcript_timestamps.txt\u001b[0m\n"
|
||||
"\u001b[32m2023-06-14 22:52:15.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mfile_util\u001b[0m:\u001b[36mdownload_files\u001b[0m:\u001b[36m36\u001b[0m - \u001b[1mDownloading file df.pkl\u001b[0m\n",
|
||||
"\u001b[32m2023-06-14 22:52:19.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mfile_util\u001b[0m:\u001b[36mdownload_files\u001b[0m:\u001b[36m36\u001b[0m - \u001b[1mDownloading file mappings.pkl\u001b[0m\n",
|
||||
"\u001b[32m2023-06-14 22:52:19.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mfile_util\u001b[0m:\u001b[36mdownload_files\u001b[0m:\u001b[36m36\u001b[0m - \u001b[1mDownloading file transcript_timestamps.txt\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -38,59 +38,15 @@
|
||||
"\n",
|
||||
"# Download files from S3 bucket. You can download multiple files at a time by passing a list of names\n",
|
||||
"files_to_download = [\"df.pkl\", \"mappings.pkl\", 'transcript_timestamps.txt']\n",
|
||||
"download_files(files_to_download)\n",
|
||||
"\n"
|
||||
"download_files(files_to_download)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"id": "b03033e1",
|
||||
"execution_count": null,
|
||||
"id": "5027fe25",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33mDEPRECATION: https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.2.0/en_core_web_md-3.2.0-py3-none-any.whl#egg=en_core_web_md==3.2.0 contains an egg fragment with a non-PEP 508 name pip 25.0 will enforce this behaviour change. A possible replacement is to use the req @ url syntax, and remove the egg fragment. Discussion can be found at https://github.com/pypa/pip/issues/11617\u001b[0m\u001b[33m\n",
|
||||
"\u001b[0mCollecting en-core-web-md==3.2.0\n",
|
||||
" Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.2.0/en_core_web_md-3.2.0-py3-none-any.whl (45.7 MB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
||||
"\u001b[?25hRequirement already satisfied: spacy<3.3.0,>=3.2.0 in /opt/anaconda3/lib/python3.8/site-packages (from en-core-web-md==3.2.0) (3.2.3)\n",
|
||||
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.8 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (3.0.9)\n",
|
||||
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (1.0.1)\n",
|
||||
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (1.0.6)\n",
|
||||
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (2.0.6)\n",
|
||||
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (3.0.6)\n",
|
||||
"Requirement already satisfied: thinc<8.1.0,>=8.0.12 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (8.0.15)\n",
|
||||
"Requirement already satisfied: blis<0.8.0,>=0.4.0 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (0.7.7)\n",
|
||||
"Requirement already satisfied: wasabi<1.1.0,>=0.8.1 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (0.9.0)\n",
|
||||
"Requirement already satisfied: srsly<3.0.0,>=2.4.1 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (2.4.2)\n",
|
||||
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (2.0.7)\n",
|
||||
"Requirement already satisfied: typer<0.5.0,>=0.3.0 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (0.3.2)\n",
|
||||
"Requirement already satisfied: pathy>=0.3.5 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (0.6.1)\n",
|
||||
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (4.64.1)\n",
|
||||
"Requirement already satisfied: numpy>=1.15.0 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (1.23.5)\n",
|
||||
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (2.27.1)\n",
|
||||
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (1.8.2)\n",
|
||||
"Requirement already satisfied: jinja2 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (2.11.3)\n",
|
||||
"Requirement already satisfied: setuptools in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (67.7.2)\n",
|
||||
"Requirement already satisfied: packaging>=20.0 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (21.3)\n",
|
||||
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /opt/anaconda3/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (3.3.0)\n",
|
||||
"Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/anaconda3/lib/python3.8/site-packages (from packaging>=20.0->spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (2.4.7)\n",
|
||||
"Requirement already satisfied: smart-open<6.0.0,>=5.0.0 in /opt/anaconda3/lib/python3.8/site-packages (from pathy>=0.3.5->spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (5.2.1)\n",
|
||||
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/anaconda3/lib/python3.8/site-packages (from pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4->spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (4.5.0)\n",
|
||||
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/anaconda3/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (1.26.4)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /opt/anaconda3/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (2020.12.5)\n",
|
||||
"Requirement already satisfied: charset-normalizer~=2.0.0 in /opt/anaconda3/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (2.0.10)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /opt/anaconda3/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (2.10)\n",
|
||||
"Requirement already satisfied: click<7.2.0,>=7.1.1 in /opt/anaconda3/lib/python3.8/site-packages (from typer<0.5.0,>=0.3.0->spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (7.1.2)\n",
|
||||
"Requirement already satisfied: MarkupSafe>=0.23 in /opt/anaconda3/lib/python3.8/site-packages (from jinja2->spacy<3.3.0,>=3.2.0->en-core-web-md==3.2.0) (2.0.1)\n",
|
||||
"\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
|
||||
"You can now load the package via spacy.load('en_core_web_md')\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Download spacy model for the first time\n",
|
||||
"!spacy download en_core_web_md\n"
|
||||
@@ -98,8 +54,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"id": "ee3a7ad9",
|
||||
"execution_count": 6,
|
||||
"id": "a1fc2846",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -112,7 +68,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9ba072d9",
|
||||
"id": "8abc435d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example template 1"
|
||||
@@ -120,7 +76,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "21d5dcd5",
|
||||
"id": "2b1a4834",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Scatter plot of transcription with Topic modelling"
|
||||
@@ -128,7 +84,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ff6acd05",
|
||||
"id": "a795137e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Change the values of \"category\", \"category_name\" to one agenda topic and change the value of \"not_category_name\" and see different plots."
|
||||
@@ -136,7 +92,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"execution_count": 21,
|
||||
"id": "43e01074",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -144,6 +100,7 @@
|
||||
"import pandas as pd\n",
|
||||
"import scattertext as st\n",
|
||||
"\n",
|
||||
"df = pd.read_pickle(\"df.pkl\")\n",
|
||||
"\n",
|
||||
"def plot_topic_modelling_and_word_to_sentence_search(df, cat_1, cat_1_name, cat_2_name):\n",
|
||||
" df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))\n",
|
||||
@@ -162,16 +119,16 @@
|
||||
" open('./demo_compact.html', 'w').write(html)\n",
|
||||
"\n",
|
||||
"plot_topic_modelling_and_word_to_sentence_search(df,\n",
|
||||
" cat_1=\"TAM\",\n",
|
||||
" cat_1_name=\"TAM\",\n",
|
||||
" cat_2_name=\"Churn\")\n",
|
||||
" cat_1=\"Founders\",\n",
|
||||
" cat_1_name=\"Founders\",\n",
|
||||
" cat_2_name=\"TAM\")\n",
|
||||
"\n",
|
||||
"# once you are done, check the generated HTML file\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e0610165",
|
||||
"id": "e9994c87",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example template 2"
|
||||
@@ -179,7 +136,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8b1684df",
|
||||
"id": "35c4f7fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Time driven Insights"
|
||||
@@ -187,8 +144,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"id": "68eb04f7",
|
||||
"execution_count": 9,
|
||||
"id": "7cdcd66f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -201,8 +158,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"id": "eaf9c5ed",
|
||||
"execution_count": 10,
|
||||
"id": "11221022",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -252,9 +209,64 @@
|
||||
"plot_time_spent_for_topic(timestamp_to_topic_second_match, \"second\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "a691664f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import collections\n",
|
||||
"topic_times = collections.defaultdict(int)\n",
|
||||
"for key in timestamp_to_topic_first_match.keys():\n",
|
||||
" duration = key[1] - key[0]\n",
|
||||
" topic_times[timestamp_to_topic_first_match[key]] += duration\n",
|
||||
"\n",
|
||||
"topic_times = sorted(topic_times.items(), key=lambda x:x[1], reverse=True)\n",
|
||||
"cat_1 = topic_times[0][0]\n",
|
||||
"cat_1_name = topic_times[0][0]\n",
|
||||
"cat_2_name = topic_times[1][0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "6451e86d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import scattertext as st\n",
|
||||
"\n",
|
||||
"df = pd.read_pickle(\"df.pkl\")\n",
|
||||
"\n",
|
||||
"def plot_topic_modelling_and_word_to_sentence_search(df, cat_1, cat_1_name, cat_2_name):\n",
|
||||
" df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))\n",
|
||||
"\n",
|
||||
" corpus = st.CorpusFromParsedDocuments(\n",
|
||||
" df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse'\n",
|
||||
" ).build().get_unigram_corpus().remove_terms(stopwords, ignore_absences=True).compact(st.AssociationCompactor(2000))\n",
|
||||
" \n",
|
||||
" html = st.produce_scattertext_explorer(\n",
|
||||
" corpus,\n",
|
||||
" category=cat_1, category_name=cat_1_name, not_category_name=cat_2_name,\n",
|
||||
" minimum_term_frequency=0, pmi_threshold_coefficient=0,\n",
|
||||
" width_in_pixels=1000,\n",
|
||||
" transform=st.Scalers.dense_rank\n",
|
||||
" )\n",
|
||||
" open('./demo_compact.html', 'w').write(html)\n",
|
||||
"\n",
|
||||
"plot_topic_modelling_and_word_to_sentence_search(df,\n",
|
||||
" cat_1=cat_1,\n",
|
||||
" cat_1_name=cat_1_name,\n",
|
||||
" cat_2_name=cat_2_name)\n",
|
||||
"\n",
|
||||
"# once you are done, check the generated HTML file\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "60bda970",
|
||||
"id": "e9ae6e25",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example template 3"
|
||||
@@ -262,7 +274,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e1707621",
|
||||
"id": "69be38ce",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Enhanced search for timelines"
|
||||
@@ -270,7 +282,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d2d574de",
|
||||
"id": "f8a47348",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can already search for a particular word in the interactive HTML document from example 1 to see a list of all transcribed sentences having an occurence of the word (in the context of the chosen topic). \n",
|
||||
@@ -288,8 +300,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 86,
|
||||
"id": "a5d1ea29",
|
||||
"execution_count": 13,
|
||||
"id": "69d814c9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -325,7 +337,7 @@
|
||||
" (2472.44, 2474.96)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 86,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -342,7 +354,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "10478204",
|
||||
"id": "b587da79",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Selective segregation of content"
|
||||
@@ -351,7 +363,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 122,
|
||||
"id": "9c0ee0a2",
|
||||
"id": "5dc2014f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -374,7 +386,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 121,
|
||||
"id": "2501c721",
|
||||
"id": "caeff7f1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -394,7 +406,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1a61a12e",
|
||||
"id": "a20896b4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Selective topic summarization"
|
||||
@@ -402,7 +414,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "490da9a0",
|
||||
"id": "6f8ab415",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can use this selective content to now summarize using the already available pipeline !"
|
||||
@@ -410,19 +422,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "53b525e3",
|
||||
"id": "06f009d5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# And Much More !!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "46b4730a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -9,8 +9,3 @@ AWS_ACCESS_KEY=***REMOVED***
|
||||
AWS_SECRET_KEY=***REMOVED***
|
||||
BUCKET_NAME='reflector-bucket'
|
||||
|
||||
# For the topic modelling viz chart
|
||||
CATEGORY_1=TAM
|
||||
CATEGORY_1_NAME=TAM
|
||||
CATEGORY_2_NAME=Churn
|
||||
|
||||
|
||||
18
whisjax.py
18
whisjax.py
@@ -222,6 +222,18 @@ def create_talk_diff_scatter_viz():
|
||||
|
||||
# to load, my_mappings = pickle.load( open ("mappings.pkl", "rb") )
|
||||
|
||||
# pick the 2 most matched topic to be used for plotting
|
||||
topic_times = collections.defaultdict(int)
|
||||
for key in ts_to_topic_mapping_top_1.keys():
|
||||
duration = key[1] - key[0]
|
||||
topic_times[ts_to_topic_mapping_top_1[key]] += duration
|
||||
|
||||
topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True)
|
||||
|
||||
cat_1 = topic_times[0][0]
|
||||
cat_1_name = topic_times[0][0]
|
||||
cat_2_name = topic_times[1][0]
|
||||
|
||||
# Scatter plot of topics
|
||||
df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))
|
||||
corpus = st.CorpusFromParsedDocuments(
|
||||
@@ -229,9 +241,9 @@ def create_talk_diff_scatter_viz():
|
||||
).build().get_unigram_corpus().compact(st.AssociationCompactor(2000))
|
||||
html = st.produce_scattertext_explorer(
|
||||
corpus,
|
||||
category=config["DEFAULT"]["CATEGORY_1"],
|
||||
category_name=config["DEFAULT"]["CATEGORY_1_NAME"],
|
||||
not_category_name=config["DEFAULT"]["CATEGORY_2_NAME"],
|
||||
category=cat_1,
|
||||
category_name=cat_1_name,
|
||||
not_category_name=cat_2_name,
|
||||
minimum_term_frequency=0, pmi_threshold_coefficient=0,
|
||||
width_in_pixels=1000,
|
||||
transform=st.Scalers.dense_rank
|
||||
|
||||
Reference in New Issue
Block a user