deslop
This commit is contained in:
131
comparison.py
131
comparison.py
@@ -3,7 +3,6 @@
|
||||
# dependencies = [
|
||||
# "marimo",
|
||||
# "httpx",
|
||||
# "polars",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
@@ -25,9 +24,7 @@ def _():
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import polars as pl
|
||||
|
||||
return Path, csv, hashlib, httpx, io, json, mo, os, pl, time
|
||||
return Path, csv, hashlib, httpx, io, json, mo, os, time
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
@@ -138,71 +135,6 @@ def _(
|
||||
return (people,)
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _():
|
||||
def extract_apollo(data):
|
||||
p = data.get("person") or {}
|
||||
org = p.get("organization") or {}
|
||||
loc = [p.get("city"), p.get("state"), p.get("country")]
|
||||
return {
|
||||
"name": p.get("name") or "",
|
||||
"title": p.get("title") or "",
|
||||
"company": org.get("name") or "",
|
||||
"industry": org.get("industry") or "",
|
||||
"location": ", ".join(x for x in loc if x),
|
||||
"linkedin": p.get("linkedin_url") or "",
|
||||
"phones": ", ".join(p.get("phone_numbers") or []),
|
||||
"found_emails": p.get("email") or "",
|
||||
}
|
||||
|
||||
def extract_pdl(data):
|
||||
d = data.get("data") or data
|
||||
phones = d.get("mobile_phone") or ""
|
||||
if not phones and d.get("phone_numbers"):
|
||||
phones = ", ".join(d["phone_numbers"][:3])
|
||||
emails_parts = []
|
||||
if d.get("work_email"):
|
||||
emails_parts.append(d["work_email"])
|
||||
if d.get("personal_emails"):
|
||||
emails_parts.extend(d["personal_emails"][:2])
|
||||
return {
|
||||
"name": d.get("full_name") or "",
|
||||
"title": d.get("job_title") or "",
|
||||
"company": d.get("job_company_name") or "",
|
||||
"industry": d.get("job_company_industry") or "",
|
||||
"location": d.get("location_name") or "",
|
||||
"linkedin": d.get("linkedin_url") or "",
|
||||
"phones": phones,
|
||||
"found_emails": ", ".join(emails_parts),
|
||||
}
|
||||
|
||||
def extract_fullenrich(data):
|
||||
ci = data.get("contact_info") or {}
|
||||
prof = data.get("profile") or {}
|
||||
inp = data.get("input") or {}
|
||||
emails_parts = []
|
||||
if ci.get("most_probable_work_email"):
|
||||
emails_parts.append(ci["most_probable_work_email"])
|
||||
if ci.get("work_emails"):
|
||||
for e in ci["work_emails"]:
|
||||
if e not in emails_parts:
|
||||
emails_parts.append(e)
|
||||
if ci.get("personal_email"):
|
||||
emails_parts.append(ci["personal_email"])
|
||||
return {
|
||||
"name": prof.get("full_name") or "",
|
||||
"title": prof.get("headline") or "",
|
||||
"company": inp.get("company_name") or "",
|
||||
"industry": "",
|
||||
"location": prof.get("location") or "",
|
||||
"linkedin": inp.get("linkedin_url") or "",
|
||||
"phones": ", ".join(ci.get("phones") or []),
|
||||
"found_emails": ", ".join(emails_parts),
|
||||
}
|
||||
|
||||
return extract_apollo, extract_fullenrich, extract_pdl
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo, people, apollo_key, httpx, read_cache, write_cache):
|
||||
apollo_results = {}
|
||||
@@ -257,10 +189,19 @@ def _(mo, people, pdl_key, httpx, read_cache, write_cache):
|
||||
pdl_results[_email] = _cached
|
||||
continue
|
||||
try:
|
||||
_params = {"email": _email, "min_likelihood": 5}
|
||||
if _person.get("first_name"):
|
||||
_params["first_name"] = _person["first_name"]
|
||||
if _person.get("last_name"):
|
||||
_params["last_name"] = _person["last_name"]
|
||||
if _person.get("linkedin_url"):
|
||||
_params["profile"] = _person["linkedin_url"]
|
||||
if _person.get("domain"):
|
||||
_params["website"] = _person["domain"]
|
||||
_r = httpx.get(
|
||||
"https://api.peopledatalabs.com/v5/person/enrich",
|
||||
headers={"X-Api-Key": pdl_key.value},
|
||||
params={"email": _email, "min_likelihood": 5},
|
||||
params=_params,
|
||||
timeout=30,
|
||||
)
|
||||
_data = _r.json()
|
||||
@@ -302,10 +243,10 @@ def _(mo, people, fullenrich_key, httpx, read_cache, write_cache, time):
|
||||
"custom": {"email": _person["email"]},
|
||||
}
|
||||
_has_id = False
|
||||
if _person.get("first_name") and _person.get("last_name"):
|
||||
if _person.get("first_name") and _person.get("last_name") and _person.get("domain"):
|
||||
_entry["first_name"] = _person["first_name"]
|
||||
_entry["last_name"] = _person["last_name"]
|
||||
_entry["domain"] = _person.get("domain") or _person["email"].split("@")[1]
|
||||
_entry["domain"] = _person["domain"]
|
||||
_has_id = True
|
||||
if _person.get("linkedin_url"):
|
||||
_entry["linkedin_url"] = _person["linkedin_url"]
|
||||
@@ -366,40 +307,13 @@ def _(mo, people, fullenrich_key, httpx, read_cache, write_cache, time):
|
||||
return (fullenrich_results,)
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(
|
||||
mo, people, apollo_results, pdl_results, fullenrich_results,
|
||||
extract_apollo, extract_pdl, extract_fullenrich, pl,
|
||||
):
|
||||
_rows = []
|
||||
for _person in people:
|
||||
_email = _person["email"]
|
||||
for _provider, _results, _extractor in [
|
||||
("Apollo", apollo_results, extract_apollo),
|
||||
("PDL", pdl_results, extract_pdl),
|
||||
("FullEnrich", fullenrich_results, extract_fullenrich),
|
||||
]:
|
||||
if _email in _results and "error" not in _results[_email]:
|
||||
_extracted = _extractor(_results[_email])
|
||||
_rows.append({"email": _email, "provider": _provider, **_extracted})
|
||||
|
||||
comparison_df = pl.DataFrame(_rows) if _rows else None
|
||||
|
||||
if comparison_df is not None:
|
||||
mo.vstack([mo.md("## Comparison"), mo.ui.table(comparison_df)])
|
||||
else:
|
||||
mo.md("## Comparison\n\n*No results yet*")
|
||||
|
||||
return (comparison_df,)
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo, apollo_results, pdl_results, fullenrich_results, json):
|
||||
def _fmt(d):
|
||||
return mo.md(f"```json\n{json.dumps(d, indent=2, default=str)}\n```")
|
||||
|
||||
mo.vstack([
|
||||
mo.md("## Raw Results"),
|
||||
mo.md("## Results"),
|
||||
mo.ui.tabs({
|
||||
"Apollo": _fmt(apollo_results),
|
||||
"PDL": _fmt(pdl_results),
|
||||
@@ -410,24 +324,17 @@ def _(mo, apollo_results, pdl_results, fullenrich_results, json):
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo, comparison_df, apollo_results, pdl_results, fullenrich_results, json):
|
||||
_items = []
|
||||
if comparison_df is not None:
|
||||
_csv_bytes = comparison_df.write_csv().encode()
|
||||
_items.append(
|
||||
mo.download(_csv_bytes, filename="enrichment_comparison.csv", label="Download CSV")
|
||||
)
|
||||
|
||||
def _(mo, apollo_results, pdl_results, fullenrich_results, json):
|
||||
_raw = json.dumps(
|
||||
{"apollo": apollo_results, "pdl": pdl_results, "fullenrich": fullenrich_results},
|
||||
indent=2,
|
||||
default=str,
|
||||
).encode()
|
||||
_items.append(
|
||||
mo.download(_raw, filename="enrichment_raw.json", label="Download Raw JSON")
|
||||
)
|
||||
|
||||
mo.vstack([mo.md("## Export"), mo.hstack(_items)])
|
||||
mo.vstack([
|
||||
mo.md("## Export"),
|
||||
mo.download(_raw, filename="enrichment_raw.json", label="Download Raw JSON"),
|
||||
])
|
||||
return
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user