mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
Push new features
This commit is contained in:
147
Viz-experiments.ipynb
Normal file
147
Viz-experiments.ipynb
Normal file
@@ -0,0 +1,147 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f604fe38",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Visualization Experiments"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cad594ed",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Lets load the data artefacts to local memory. These files are to be downloaded from S3 as the pipeline automatically uploads them to the pre-configured S3 bucket."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dbd7b93d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from file_util import download_files\n",
|
||||
"import pickle\n",
|
||||
"\n",
|
||||
"# Download files from S3 bucket. You can download multiple files at a time by passing a list of names\n",
|
||||
"files_to_download = [\"df.pkl\", \"mapping.pkl\"]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f59ff46b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Download spacy model for the first time\n",
|
||||
"!spacy download en_core_web_md\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "61aee352",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import spacy\n",
|
||||
"\n",
|
||||
"spaCy_model = \"en_core_web_md\"\n",
|
||||
"nlp = spacy.load(spaCy_model)\n",
|
||||
"stopwords = nlp.Defaults.stop_words\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5584c887",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Scatter plot of transcription with Topic modelling"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5fae1776",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Change the values of \"category\", \"category_name\" to one agenda topic and change the value of \"not_category_name\" and see different plots."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "43e01074",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import scattertext as st\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def plot_topic_modelling_and_word_to_sentence_search(df, cat_1, cat_1_name, cat_2_name):\n",
|
||||
" df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))\n",
|
||||
"\n",
|
||||
" corpus = st.CorpusFromParsedDocuments(\n",
|
||||
" df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse'\n",
|
||||
" ).build().get_unigram_corpus().remove_terms(stopwords, ignore_absences=True).compact(st.AssociationCompactor(2000))\n",
|
||||
" \n",
|
||||
" html = st.produce_scattertext_explorer(\n",
|
||||
" corpus,\n",
|
||||
" category=cat_1, category_name=cat_1_name, not_category_name=cat_2_name,\n",
|
||||
" minimum_term_frequency=0, pmi_threshold_coefficient=0,\n",
|
||||
" width_in_pixels=1000,\n",
|
||||
" transform=st.Scalers.dense_rank\n",
|
||||
" )\n",
|
||||
" open('./demo_compact.html', 'w').write(html)\n",
|
||||
"\n",
|
||||
"plot_topic_modelling_and_word_to_sentence_search(df,\n",
|
||||
" cat_1=\"TAM\",\n",
|
||||
" cat_1_name=\"TAM\",\n",
|
||||
" cat_2_name=\"Churn\")\n",
|
||||
"\n",
|
||||
"# once you are done, check the generated HTML file\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e2d6ec49",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Timeline visualizer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "08e83128",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user