I think this one was for the quote description field - but we got it wrong so we need to seatch and find the correct field......
This commit is contained in:
@@ -144,6 +144,21 @@ def init_db(db_path: Path) -> None:
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS job_metadata (
|
||||
job_uuid TEXT PRIMARY KEY,
|
||||
generated_job_id TEXT,
|
||||
job_address TEXT,
|
||||
company_name TEXT,
|
||||
raw_json TEXT NOT NULL,
|
||||
first_seen_at TEXT NOT NULL,
|
||||
last_seen_at TEXT NOT NULL,
|
||||
source TEXT NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_job_metadata_generated_job_id ON job_metadata(generated_job_id)")
|
||||
conn.commit()
|
||||
|
||||
|
||||
@@ -205,6 +220,109 @@ def fetch_form_responses(
|
||||
return response.status_code, data, filter_expr
|
||||
|
||||
|
||||
def retrieve_job(
|
||||
*,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
job_uuid: str,
|
||||
timeout: int,
|
||||
) -> Dict[str, Any]:
|
||||
response = requests.get(
|
||||
f"{base_url.rstrip('/')}/job/{job_uuid}.json",
|
||||
headers={"X-Api-Key": api_key, "Accept": "application/json"},
|
||||
timeout=timeout,
|
||||
)
|
||||
if not response.ok:
|
||||
raise RuntimeError(f"Job retrieve failed for {job_uuid}: HTTP {response.status_code}: {response.text[:1000]}")
|
||||
data = response.json()
|
||||
if not isinstance(data, dict):
|
||||
raise RuntimeError(f"Job retrieve expected object response, got {type(data).__name__}")
|
||||
return data
|
||||
|
||||
|
||||
def clean_text(value: Any) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
return str(value).replace("\r\n", "\n").replace("\r", "\n").strip()
|
||||
|
||||
|
||||
def first_text(*values: Any) -> str:
|
||||
for value in values:
|
||||
text = clean_text(value)
|
||||
if text:
|
||||
return text
|
||||
return ""
|
||||
|
||||
|
||||
def format_job_address(job: Dict[str, Any]) -> str:
|
||||
direct = first_text(
|
||||
job.get("job_address"),
|
||||
job.get("site_address"),
|
||||
job.get("address"),
|
||||
job.get("location_address"),
|
||||
job.get("billing_address"),
|
||||
)
|
||||
if direct:
|
||||
return direct
|
||||
|
||||
parts = [
|
||||
first_text(job.get("street"), job.get("street_address"), job.get("address_1"), job.get("address1")),
|
||||
first_text(job.get("suburb"), job.get("city")),
|
||||
first_text(job.get("state")),
|
||||
first_text(job.get("postcode"), job.get("postal_code"), job.get("zip")),
|
||||
]
|
||||
return " ".join(part for part in parts if part)
|
||||
|
||||
|
||||
def extract_company_name(job: Dict[str, Any]) -> str:
|
||||
related = job.get("related")
|
||||
if isinstance(related, dict):
|
||||
company = related.get("company")
|
||||
if isinstance(company, dict):
|
||||
company_name = clean_text(company.get("name"))
|
||||
if company_name:
|
||||
return company_name
|
||||
company = job.get("company")
|
||||
if isinstance(company, dict):
|
||||
company_name = clean_text(company.get("name"))
|
||||
if company_name:
|
||||
return company_name
|
||||
return first_text(job.get("company_name"), job.get("customer_name"))
|
||||
|
||||
|
||||
def upsert_job_metadata(conn: sqlite3.Connection, *, job_uuid: str, job: Dict[str, Any], now: str, source: str) -> None:
|
||||
job_uuid = clean_text(job_uuid or job.get("uuid"))
|
||||
if not job_uuid:
|
||||
return
|
||||
|
||||
values = (
|
||||
job_uuid,
|
||||
clean_text(job.get("generated_job_id")),
|
||||
format_job_address(job),
|
||||
extract_company_name(job),
|
||||
json.dumps(job, ensure_ascii=False, sort_keys=True),
|
||||
now,
|
||||
now,
|
||||
source,
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO job_metadata (
|
||||
job_uuid, generated_job_id, job_address, company_name, raw_json,
|
||||
first_seen_at, last_seen_at, source
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(job_uuid) DO UPDATE SET
|
||||
generated_job_id = excluded.generated_job_id,
|
||||
job_address = excluded.job_address,
|
||||
company_name = excluded.company_name,
|
||||
raw_json = excluded.raw_json,
|
||||
last_seen_at = excluded.last_seen_at,
|
||||
source = excluded.source
|
||||
""",
|
||||
values,
|
||||
)
|
||||
|
||||
|
||||
def insert_or_update_raw(
|
||||
conn: sqlite3.Connection,
|
||||
row: Dict[str, Any],
|
||||
@@ -457,6 +575,7 @@ def main() -> int:
|
||||
|
||||
inserted = updated = quote_matches = newly_queued = 0
|
||||
now = utc_now()
|
||||
fetched_job_uuids = set()
|
||||
if conn is not None:
|
||||
for row in rows:
|
||||
was_inserted, is_quote = insert_or_update_raw(
|
||||
@@ -469,6 +588,26 @@ def main() -> int:
|
||||
updated += 0 if was_inserted else 1
|
||||
if is_quote:
|
||||
quote_matches += 1
|
||||
job_uuid = clean_text(row.get("regarding_object_uuid"))
|
||||
if job_uuid and job_uuid not in fetched_job_uuids:
|
||||
try:
|
||||
job = retrieve_job(
|
||||
api_key=api_key,
|
||||
base_url=args.base_url,
|
||||
job_uuid=job_uuid,
|
||||
timeout=args.timeout,
|
||||
)
|
||||
upsert_job_metadata(conn, job_uuid=job_uuid, job=job, now=now, source="formresponse_poll")
|
||||
fetched_job_uuids.add(job_uuid)
|
||||
except Exception as exc:
|
||||
# Polling/parsing should still proceed if job metadata enrichment fails.
|
||||
print(
|
||||
json.dumps(
|
||||
{"warning": "job_metadata_fetch_failed", "job_uuid": job_uuid, "error": str(exc)},
|
||||
ensure_ascii=False,
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
if parse_and_store_quote_response(
|
||||
conn,
|
||||
row,
|
||||
|
||||
Reference in New Issue
Block a user