I think this one was for the quote description field - but we got it wrong so we need to seatch and find the correct field......

This commit is contained in:
2026-05-18 11:54:57 +09:30
parent 069bf11ec2
commit 9dde6b28a1
3 changed files with 283 additions and 14 deletions
+139
View File
@@ -144,6 +144,21 @@ def init_db(db_path: Path) -> None:
)
"""
)
conn.execute(
"""
CREATE TABLE IF NOT EXISTS job_metadata (
job_uuid TEXT PRIMARY KEY,
generated_job_id TEXT,
job_address TEXT,
company_name TEXT,
raw_json TEXT NOT NULL,
first_seen_at TEXT NOT NULL,
last_seen_at TEXT NOT NULL,
source TEXT NOT NULL
)
"""
)
conn.execute("CREATE INDEX IF NOT EXISTS idx_job_metadata_generated_job_id ON job_metadata(generated_job_id)")
conn.commit()
@@ -205,6 +220,109 @@ def fetch_form_responses(
return response.status_code, data, filter_expr
def retrieve_job(
*,
api_key: str,
base_url: str,
job_uuid: str,
timeout: int,
) -> Dict[str, Any]:
response = requests.get(
f"{base_url.rstrip('/')}/job/{job_uuid}.json",
headers={"X-Api-Key": api_key, "Accept": "application/json"},
timeout=timeout,
)
if not response.ok:
raise RuntimeError(f"Job retrieve failed for {job_uuid}: HTTP {response.status_code}: {response.text[:1000]}")
data = response.json()
if not isinstance(data, dict):
raise RuntimeError(f"Job retrieve expected object response, got {type(data).__name__}")
return data
def clean_text(value: Any) -> str:
if value is None:
return ""
return str(value).replace("\r\n", "\n").replace("\r", "\n").strip()
def first_text(*values: Any) -> str:
for value in values:
text = clean_text(value)
if text:
return text
return ""
def format_job_address(job: Dict[str, Any]) -> str:
direct = first_text(
job.get("job_address"),
job.get("site_address"),
job.get("address"),
job.get("location_address"),
job.get("billing_address"),
)
if direct:
return direct
parts = [
first_text(job.get("street"), job.get("street_address"), job.get("address_1"), job.get("address1")),
first_text(job.get("suburb"), job.get("city")),
first_text(job.get("state")),
first_text(job.get("postcode"), job.get("postal_code"), job.get("zip")),
]
return " ".join(part for part in parts if part)
def extract_company_name(job: Dict[str, Any]) -> str:
related = job.get("related")
if isinstance(related, dict):
company = related.get("company")
if isinstance(company, dict):
company_name = clean_text(company.get("name"))
if company_name:
return company_name
company = job.get("company")
if isinstance(company, dict):
company_name = clean_text(company.get("name"))
if company_name:
return company_name
return first_text(job.get("company_name"), job.get("customer_name"))
def upsert_job_metadata(conn: sqlite3.Connection, *, job_uuid: str, job: Dict[str, Any], now: str, source: str) -> None:
job_uuid = clean_text(job_uuid or job.get("uuid"))
if not job_uuid:
return
values = (
job_uuid,
clean_text(job.get("generated_job_id")),
format_job_address(job),
extract_company_name(job),
json.dumps(job, ensure_ascii=False, sort_keys=True),
now,
now,
source,
)
conn.execute(
"""
INSERT INTO job_metadata (
job_uuid, generated_job_id, job_address, company_name, raw_json,
first_seen_at, last_seen_at, source
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(job_uuid) DO UPDATE SET
generated_job_id = excluded.generated_job_id,
job_address = excluded.job_address,
company_name = excluded.company_name,
raw_json = excluded.raw_json,
last_seen_at = excluded.last_seen_at,
source = excluded.source
""",
values,
)
def insert_or_update_raw(
conn: sqlite3.Connection,
row: Dict[str, Any],
@@ -457,6 +575,7 @@ def main() -> int:
inserted = updated = quote_matches = newly_queued = 0
now = utc_now()
fetched_job_uuids = set()
if conn is not None:
for row in rows:
was_inserted, is_quote = insert_or_update_raw(
@@ -469,6 +588,26 @@ def main() -> int:
updated += 0 if was_inserted else 1
if is_quote:
quote_matches += 1
job_uuid = clean_text(row.get("regarding_object_uuid"))
if job_uuid and job_uuid not in fetched_job_uuids:
try:
job = retrieve_job(
api_key=api_key,
base_url=args.base_url,
job_uuid=job_uuid,
timeout=args.timeout,
)
upsert_job_metadata(conn, job_uuid=job_uuid, job=job, now=now, source="formresponse_poll")
fetched_job_uuids.add(job_uuid)
except Exception as exc:
# Polling/parsing should still proceed if job metadata enrichment fails.
print(
json.dumps(
{"warning": "job_metadata_fetch_failed", "job_uuid": job_uuid, "error": str(exc)},
ensure_ascii=False,
),
file=sys.stderr,
)
if parse_and_store_quote_response(
conn,
row,