deslop
This commit is contained in:
131
comparison.py
131
comparison.py
@@ -3,7 +3,6 @@
|
|||||||
# dependencies = [
|
# dependencies = [
|
||||||
# "marimo",
|
# "marimo",
|
||||||
# "httpx",
|
# "httpx",
|
||||||
# "polars",
|
|
||||||
# ]
|
# ]
|
||||||
# ///
|
# ///
|
||||||
|
|
||||||
@@ -25,9 +24,7 @@ def _():
|
|||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import polars as pl
|
return Path, csv, hashlib, httpx, io, json, mo, os, time
|
||||||
|
|
||||||
return Path, csv, hashlib, httpx, io, json, mo, os, pl, time
|
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
@@ -138,71 +135,6 @@ def _(
|
|||||||
return (people,)
|
return (people,)
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
|
||||||
def _():
|
|
||||||
def extract_apollo(data):
|
|
||||||
p = data.get("person") or {}
|
|
||||||
org = p.get("organization") or {}
|
|
||||||
loc = [p.get("city"), p.get("state"), p.get("country")]
|
|
||||||
return {
|
|
||||||
"name": p.get("name") or "",
|
|
||||||
"title": p.get("title") or "",
|
|
||||||
"company": org.get("name") or "",
|
|
||||||
"industry": org.get("industry") or "",
|
|
||||||
"location": ", ".join(x for x in loc if x),
|
|
||||||
"linkedin": p.get("linkedin_url") or "",
|
|
||||||
"phones": ", ".join(p.get("phone_numbers") or []),
|
|
||||||
"found_emails": p.get("email") or "",
|
|
||||||
}
|
|
||||||
|
|
||||||
def extract_pdl(data):
|
|
||||||
d = data.get("data") or data
|
|
||||||
phones = d.get("mobile_phone") or ""
|
|
||||||
if not phones and d.get("phone_numbers"):
|
|
||||||
phones = ", ".join(d["phone_numbers"][:3])
|
|
||||||
emails_parts = []
|
|
||||||
if d.get("work_email"):
|
|
||||||
emails_parts.append(d["work_email"])
|
|
||||||
if d.get("personal_emails"):
|
|
||||||
emails_parts.extend(d["personal_emails"][:2])
|
|
||||||
return {
|
|
||||||
"name": d.get("full_name") or "",
|
|
||||||
"title": d.get("job_title") or "",
|
|
||||||
"company": d.get("job_company_name") or "",
|
|
||||||
"industry": d.get("job_company_industry") or "",
|
|
||||||
"location": d.get("location_name") or "",
|
|
||||||
"linkedin": d.get("linkedin_url") or "",
|
|
||||||
"phones": phones,
|
|
||||||
"found_emails": ", ".join(emails_parts),
|
|
||||||
}
|
|
||||||
|
|
||||||
def extract_fullenrich(data):
|
|
||||||
ci = data.get("contact_info") or {}
|
|
||||||
prof = data.get("profile") or {}
|
|
||||||
inp = data.get("input") or {}
|
|
||||||
emails_parts = []
|
|
||||||
if ci.get("most_probable_work_email"):
|
|
||||||
emails_parts.append(ci["most_probable_work_email"])
|
|
||||||
if ci.get("work_emails"):
|
|
||||||
for e in ci["work_emails"]:
|
|
||||||
if e not in emails_parts:
|
|
||||||
emails_parts.append(e)
|
|
||||||
if ci.get("personal_email"):
|
|
||||||
emails_parts.append(ci["personal_email"])
|
|
||||||
return {
|
|
||||||
"name": prof.get("full_name") or "",
|
|
||||||
"title": prof.get("headline") or "",
|
|
||||||
"company": inp.get("company_name") or "",
|
|
||||||
"industry": "",
|
|
||||||
"location": prof.get("location") or "",
|
|
||||||
"linkedin": inp.get("linkedin_url") or "",
|
|
||||||
"phones": ", ".join(ci.get("phones") or []),
|
|
||||||
"found_emails": ", ".join(emails_parts),
|
|
||||||
}
|
|
||||||
|
|
||||||
return extract_apollo, extract_fullenrich, extract_pdl
|
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo, people, apollo_key, httpx, read_cache, write_cache):
|
def _(mo, people, apollo_key, httpx, read_cache, write_cache):
|
||||||
apollo_results = {}
|
apollo_results = {}
|
||||||
@@ -257,10 +189,19 @@ def _(mo, people, pdl_key, httpx, read_cache, write_cache):
|
|||||||
pdl_results[_email] = _cached
|
pdl_results[_email] = _cached
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
|
_params = {"email": _email, "min_likelihood": 5}
|
||||||
|
if _person.get("first_name"):
|
||||||
|
_params["first_name"] = _person["first_name"]
|
||||||
|
if _person.get("last_name"):
|
||||||
|
_params["last_name"] = _person["last_name"]
|
||||||
|
if _person.get("linkedin_url"):
|
||||||
|
_params["profile"] = _person["linkedin_url"]
|
||||||
|
if _person.get("domain"):
|
||||||
|
_params["website"] = _person["domain"]
|
||||||
_r = httpx.get(
|
_r = httpx.get(
|
||||||
"https://api.peopledatalabs.com/v5/person/enrich",
|
"https://api.peopledatalabs.com/v5/person/enrich",
|
||||||
headers={"X-Api-Key": pdl_key.value},
|
headers={"X-Api-Key": pdl_key.value},
|
||||||
params={"email": _email, "min_likelihood": 5},
|
params=_params,
|
||||||
timeout=30,
|
timeout=30,
|
||||||
)
|
)
|
||||||
_data = _r.json()
|
_data = _r.json()
|
||||||
@@ -302,10 +243,10 @@ def _(mo, people, fullenrich_key, httpx, read_cache, write_cache, time):
|
|||||||
"custom": {"email": _person["email"]},
|
"custom": {"email": _person["email"]},
|
||||||
}
|
}
|
||||||
_has_id = False
|
_has_id = False
|
||||||
if _person.get("first_name") and _person.get("last_name"):
|
if _person.get("first_name") and _person.get("last_name") and _person.get("domain"):
|
||||||
_entry["first_name"] = _person["first_name"]
|
_entry["first_name"] = _person["first_name"]
|
||||||
_entry["last_name"] = _person["last_name"]
|
_entry["last_name"] = _person["last_name"]
|
||||||
_entry["domain"] = _person.get("domain") or _person["email"].split("@")[1]
|
_entry["domain"] = _person["domain"]
|
||||||
_has_id = True
|
_has_id = True
|
||||||
if _person.get("linkedin_url"):
|
if _person.get("linkedin_url"):
|
||||||
_entry["linkedin_url"] = _person["linkedin_url"]
|
_entry["linkedin_url"] = _person["linkedin_url"]
|
||||||
@@ -366,40 +307,13 @@ def _(mo, people, fullenrich_key, httpx, read_cache, write_cache, time):
|
|||||||
return (fullenrich_results,)
|
return (fullenrich_results,)
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
|
||||||
def _(
|
|
||||||
mo, people, apollo_results, pdl_results, fullenrich_results,
|
|
||||||
extract_apollo, extract_pdl, extract_fullenrich, pl,
|
|
||||||
):
|
|
||||||
_rows = []
|
|
||||||
for _person in people:
|
|
||||||
_email = _person["email"]
|
|
||||||
for _provider, _results, _extractor in [
|
|
||||||
("Apollo", apollo_results, extract_apollo),
|
|
||||||
("PDL", pdl_results, extract_pdl),
|
|
||||||
("FullEnrich", fullenrich_results, extract_fullenrich),
|
|
||||||
]:
|
|
||||||
if _email in _results and "error" not in _results[_email]:
|
|
||||||
_extracted = _extractor(_results[_email])
|
|
||||||
_rows.append({"email": _email, "provider": _provider, **_extracted})
|
|
||||||
|
|
||||||
comparison_df = pl.DataFrame(_rows) if _rows else None
|
|
||||||
|
|
||||||
if comparison_df is not None:
|
|
||||||
mo.vstack([mo.md("## Comparison"), mo.ui.table(comparison_df)])
|
|
||||||
else:
|
|
||||||
mo.md("## Comparison\n\n*No results yet*")
|
|
||||||
|
|
||||||
return (comparison_df,)
|
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo, apollo_results, pdl_results, fullenrich_results, json):
|
def _(mo, apollo_results, pdl_results, fullenrich_results, json):
|
||||||
def _fmt(d):
|
def _fmt(d):
|
||||||
return mo.md(f"```json\n{json.dumps(d, indent=2, default=str)}\n```")
|
return mo.md(f"```json\n{json.dumps(d, indent=2, default=str)}\n```")
|
||||||
|
|
||||||
mo.vstack([
|
mo.vstack([
|
||||||
mo.md("## Raw Results"),
|
mo.md("## Results"),
|
||||||
mo.ui.tabs({
|
mo.ui.tabs({
|
||||||
"Apollo": _fmt(apollo_results),
|
"Apollo": _fmt(apollo_results),
|
||||||
"PDL": _fmt(pdl_results),
|
"PDL": _fmt(pdl_results),
|
||||||
@@ -410,24 +324,17 @@ def _(mo, apollo_results, pdl_results, fullenrich_results, json):
|
|||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo, comparison_df, apollo_results, pdl_results, fullenrich_results, json):
|
def _(mo, apollo_results, pdl_results, fullenrich_results, json):
|
||||||
_items = []
|
|
||||||
if comparison_df is not None:
|
|
||||||
_csv_bytes = comparison_df.write_csv().encode()
|
|
||||||
_items.append(
|
|
||||||
mo.download(_csv_bytes, filename="enrichment_comparison.csv", label="Download CSV")
|
|
||||||
)
|
|
||||||
|
|
||||||
_raw = json.dumps(
|
_raw = json.dumps(
|
||||||
{"apollo": apollo_results, "pdl": pdl_results, "fullenrich": fullenrich_results},
|
{"apollo": apollo_results, "pdl": pdl_results, "fullenrich": fullenrich_results},
|
||||||
indent=2,
|
indent=2,
|
||||||
default=str,
|
default=str,
|
||||||
).encode()
|
).encode()
|
||||||
_items.append(
|
|
||||||
mo.download(_raw, filename="enrichment_raw.json", label="Download Raw JSON")
|
|
||||||
)
|
|
||||||
|
|
||||||
mo.vstack([mo.md("## Export"), mo.hstack(_items)])
|
mo.vstack([
|
||||||
|
mo.md("## Export"),
|
||||||
|
mo.download(_raw, filename="enrichment_raw.json", label="Download Raw JSON"),
|
||||||
|
])
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user