"""
extractor_redacted.py - redacted CA Case Data Extractor.

This is a non-operational demonstration copy.  It preserves the shape of the
hydration pipeline and the data contract, but removes:
  * internal package imports;
  * API-key lookup and model-client creation;
  * all code that sends material to an LLM provider;
  * private account or route information.

The original working extractor used a vision LLM to transcribe scanned PDFs and
return this structured object.  In this redacted version the LLM call is left as
an explicit error so readers can see exactly where private model access was cut.
"""

import json
import os
import re
from datetime import datetime
from dateutil import parser as dateparser

import openpyxl
from openpyxl.styles import Alignment, Font, PatternFill

SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
CA_FOLDER = SCRIPT_DIR

VARIABLE_DEFINITIONS = {
    "case_num": {"description": "Case number (integer)", "required": True},
    "case_year": {"description": "Year (4-digit integer)", "required": True},
    "hcal_case_num": {"description": "HCAL case number from Court of First Instance", "required": False},
    "hcal_case_year": {"description": "HCAL year from Court of First Instance", "required": False},
    "applicant_name": {"description": "Full name(s) of the applicant(s)", "required": True},
    "applicant_gender": {"description": "Gender: 'male', 'female', or 'neutral'", "required": True},
    "num_applicants": {"description": "Number of applicants", "required": False},
    "judge_name": {"description": "Full name of the CFI judge", "required": True},
    "judge_title_has_nrc": {"description": "TRUE if title includes '(Non-refoulement Claims)'", "required": True},
    "country": {"description": "Country of origin", "required": True},
    "one_line_description": {"description": "Brief description of feared harm", "required": True},
    "dismissal_order_date": {"description": "Date of the CFI dismissal order", "required": True},
    "board_decision_date_1": {"description": "Date of the first Board decision", "required": True},
    "board_decision_date_2": {"description": "Date of the second Board decision, if any", "required": False},
    "director_decision_date_1": {"description": "Date of the first Director decision", "required": False},
    "director_decision_date_2": {"description": "Date of the second Director decision, if any", "required": False},
    "notice_of_appeal_date": {"description": "Date the notice of appeal was filed", "required": True},
    "affirmation_date": {"description": "Date of affirmation/affidavit, if any", "required": False},
    "oath_type": {"description": "'affirmation' or 'affidavit'", "required": False},
    "grounds_in_affirmation": {"description": "Grounds in affirmation/affidavit", "required": False},
    "grounds_in_affirmation_summary": {"description": "Succinct summary", "required": False},
    "form1_has_hyperlink": {"description": "CALL-1 Form has hyperlink: TRUE/FALSE", "required": True},
    "form1_reference_type": {"description": "'CALL-1 Form' or 'Judgment'", "required": True},
    "hyperlink_para_num": {"description": "Paragraph containing hyperlink", "required": False},
    "hyperlink_footnote_num": {"description": "Footnote containing hyperlink", "required": False},
    "case_summary_para_begin": {"description": "Beginning paragraph of case summary", "required": False},
    "case_summary_para_end": {"description": "Ending paragraph of case summary", "required": False},
    "judge_reasons_para_begin": {"description": "Beginning paragraph of the judge's reasons", "required": True},
    "judge_reasons_para_end": {"description": "Ending paragraph of the judge's reasons", "required": True},
    "grounds_in_noa": {"description": "Grounds in notice of appeal", "required": True},
    "grounds_in_noa_summary": {"description": "Succinct NOA summary", "required": True},
    "written_submission_date": {"description": "Date of written submission, if any", "required": False},
    "written_submission_type": {"description": "Type of written submission", "required": False},
    "grounds_in_submission": {"description": "Grounds in written submission", "required": False},
    "grounds_in_submission_summary": {"description": "Succinct submission summary", "required": False},
    "registrar_direction_date": {"description": "Date of Registrar's direction", "required": False},
    "hearing_date_scheduled": {"description": "Scheduled hearing date", "required": False},
    "applicant_failed_to_raise_ground": {"description": "TRUE if no grounds were raised below", "required": False},
}


def parse_date(value):
    if not value or str(value).strip().upper() in {"", "NULL", "NONE"}:
        return None
    if isinstance(value, datetime):
        return value
    return dateparser.parse(str(value), dayfirst=True)


def format_date(value) -> str:
    dt = parse_date(value)
    if not dt:
        return ""
    return dt.strftime("%#d %B %Y") if os.name == "nt" else dt.strftime("%-d %B %Y")


def build_extraction_prompt() -> str:
    """Build the structured extraction contract supplied to the LLM in the private version."""
    lines = [
        "Extract structured data for a routine CA non-refoulement appeal.",
        "Return JSON only.  Do not decide the appeal.",
        "Use the following keys:",
    ]
    for key, spec in VARIABLE_DEFINITIONS.items():
        req = "required" if spec.get("required") else "optional"
        lines.append(f"- {key} ({req}): {spec['description']}")
    return "\n".join(lines)


def call_llm_for_extraction(*_args, **_kwargs) -> dict:
    """Redacted boundary: private model routing and account-specific code removed."""
    raise RuntimeError(
        "[REDACTED] LLM transcription/extraction call removed. "
        "The working version connected to a private model provider and returned "
        "a JSON object matching VARIABLE_DEFINITIONS."
    )


def normalize_extracted(data: dict) -> dict:
    """Small deterministic cleanup that can be shown without exposing model access."""
    out = dict(data or {})
    for key, value in list(out.items()):
        if isinstance(value, str):
            value = re.sub(r"\s+", " ", value).strip()
            out[key] = None if value.upper() in {"", "NULL", "NONE", "N/A"} else value
    if out.get("judge_title_has_nrc") in {True, "true", "TRUE", "Yes", "YES"}:
        out["judge_title_has_nrc"] = "TRUE"
    elif out.get("judge_title_has_nrc") in {False, "false", "FALSE", "No", "NO"}:
        out["judge_title_has_nrc"] = "FALSE"
    return out


def write_xlsx(xlsx_path: str, data: dict):
    """Write the hydrated values into the workbook consumed by the generator."""
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "Case Data"
    headers = ["Variable Name", "Description", "Value", "Required"]
    fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
    font = Font(bold=True, color="FFFFFF")
    for col, header in enumerate(headers, 1):
        cell = ws.cell(row=1, column=col, value=header)
        cell.fill = fill
        cell.font = font
        cell.alignment = Alignment(horizontal="center")
    for row, (name, spec) in enumerate(VARIABLE_DEFINITIONS.items(), 2):
        ws.cell(row=row, column=1, value=name)
        ws.cell(row=row, column=2, value=spec["description"])
        ws.cell(row=row, column=3, value=data.get(name))
        ws.cell(row=row, column=4, value="Yes" if spec.get("required") else "No")
    wb.save(xlsx_path)
    wb.close()


def run_pdf_pipeline(pdf_paths, case_num: int, case_year: int, writer_judge: str = "chu", overrides=None):
    """
    Redacted pipeline skeleton:
      scanned PDFs -> [REDACTED LLM extraction] -> XLSX -> deterministic generator.

    This intentionally raises at the LLM boundary.  In the private version, the
    result of call_llm_for_extraction() was merged with overrides and passed to
    generator.py / generator_redacted.py.
    """
    extracted = call_llm_for_extraction(image_pdf_paths=list(pdf_paths))
    data = normalize_extracted(extracted)
    data.update({"case_num": case_num, "case_year": case_year})
    if overrides:
        data.update(overrides)
    xlsx_path = os.path.join(CA_FOLDER, f"CACV{case_num}-{case_year}.xlsx")
    write_xlsx(xlsx_path, data)
    return {"xlsx": xlsx_path, "data": data, "writer_judge": writer_judge}


if __name__ == "__main__":
    print(build_extraction_prompt())
    print(json.dumps({"status": "redacted demo only"}, indent=2))