import pandas as pd

schema_cols = {
    "claim_id": "string",
    "member_id": "string",
    "member_dob": "string",          # ISO date string
    "plan_type": "string",           # e.g. "Medicare Advantage", "Medicaid", "Commercial"
    "payer": "string",               # e.g. Humana, UHC
    "eligibility_active": "int64",   # 1 = active, 0 = inactive
    "service_category": "string",    # "Medical" | "Behavioral" | "Dental"
    "cpt_or_cdt": "string",          # CPT/HCPCS or CDT
    "modifiers": "string",           # e.g. "25", "95", "", "LT"
    "pos": "string",                 # CMS POS code e.g. "11", "22", "02", "10"
    "icd10_1": "string",
    "icd10_2": "string",
    "icd10_3": "string",
    "icd10_4": "string",
    "auth_required": "int64",        # 1/0
    "auth_obtained": "int64",        # 1/0
    "provider_npi": "string",        # billing or rendering NPI
    "provider_credentialed": "int64",# 1/0
    "in_network": "int64",           # 1/0
    "service_date": "string",        # ISO date string, e.g. "2025-01-15"
    "submit_date": "string",         # ISO date string
    "claim_frequency_code": "string",# "1" original, "7" replace, "8" void
    "charge_amount": "float64",
    "tooth": "string",               # dental detail
    "tooth_surface": "string"        # dental detail
}

claims_template = pd.DataFrame({col: pd.Series(dtype=dtype) for col, dtype in schema_cols.items()})
claims_template.head()

import datetime as dt

def timely_filing_limit_days(plan_type: str) -> int:
    """Simplified payer filing windows (days)."""
    if plan_type == "Medicare Advantage":
        return 365
    if plan_type == "Medicaid":
        return 240
    # Commercial often ~180 days
    return 180

def rule_eligibility_inactive(row):
    """Member not active on service date => instant denial."""
    return 1 if row.get("eligibility_active", 1) == 0 else 0

def rule_missing_auth(row):
    """Auth required but not on file => classic denial for certain procedure types."""
    return 1 if (row.get("auth_required",0) == 1 and row.get("auth_obtained",0) == 0) else 0

def rule_telehealth_mismatch(row):
    """Telehealth codes need telehealth POS (02/10) AND modifier (95/GT)."""
    telehealth_codes = {"90834","90837","90791","99213","99214","99215","H0004","H0031"}
    needs_tele = row.get("cpt_or_cdt") in telehealth_codes
    pos_ok = row.get("pos") in ["02","10"]
    mod_ok = row.get("modifiers") in ["95","GT"]
    if needs_tele and (not pos_ok or not mod_ok):
        return 1
    return 0

def rule_modifier_missing(row):
    """Some codes usually REQUIRE a modifier (25, 59, etc.)."""
    high_modifier_codes = {"99213","99214","99215","97110","96372"}
    needs_modifier = row.get("cpt_or_cdt") in high_modifier_codes
    has_modifier = (row.get("modifiers") or "").strip() != ""
    return 1 if needs_modifier and not has_modifier else 0

def rule_pos_invalid(row):
    """POS doesn't make sense for that service (ex: inpatient POS 21 for outpatient-only code)."""
    inpatient_pos = (row.get("pos") == "21")  # inpatient hospital
    outpatient_only_codes = {"99213","99214","90834","90837","D0120","D1110"}

    # inpatient POS for clearly outpatient code
    if inpatient_pos and row.get("cpt_or_cdt") in outpatient_only_codes:
        return 1

    # dental usually POS 11 (office), 22 (outpatient), 32 (nursing)
    if row.get("service_category") == "Dental" and row.get("pos") not in ["11","22","32"]:
        return 1

    return 0

def rule_noncovered_service(row):
    """Service not covered under this plan type (e.g., dental under Medicare Advantage)."""
    if row.get("service_category") == "Dental" and row.get("plan_type") != "Medicaid":
        return 1
    # Behavioral carve-outs for some Commercial plans
    if row.get("service_category") == "Behavioral" and row.get("plan_type") == "Commercial":
        return 1
    return 0

def rule_dx_proc_mismatch(row):
    """Diagnosis doesn't clinically justify the billed procedure for that domain."""
    domain_hints = {
        "Medical": {"I10","E11.9","M54.5","J06.9","K21.9","R07.9","L40.0","N39.0","Z00.00"},
        "Behavioral": {"F41.1","F32.9","F33.1","Z00.00"},
        "Dental": {"K02.9","K04.7","K05.10","K08.101","Z00.00"}
    }
    dxs = {
        row.get("icd10_1",""),
        row.get("icd10_2",""),
        row.get("icd10_3",""),
        row.get("icd10_4","")
    }
    expected = domain_hints.get(row.get("service_category"), set())
    return 1 if dxs.isdisjoint(expected) else 0

def rule_provider_not_credentialed(row):
    """Provider not credentialed with payer."""
    return 1 if row.get("provider_credentialed",1) == 0 else 0

def rule_out_of_network(row):
    """Out-of-network for NON-Commercial plan types => denial / repricing."""
    if row.get("in_network",1) == 0 and row.get("plan_type") != "Commercial":
        return 1
    return 0

def rule_invalid_npi(row):
    """NPI must be 10 digits."""
    npi = row.get("provider_npi") or ""
    return 1 if len(npi) != 10 else 0

def rule_timely_filing(row):
    """Submit date is past payer's timely filing limit."""
    svc = dt.datetime.fromisoformat(row.get("service_date","2025-01-01"))
    sub = dt.datetime.fromisoformat(row.get("submit_date","2025-01-01"))
    limit_days = timely_filing_limit_days(row.get("plan_type","Commercial"))
    return 1 if (sub - svc).days > limit_days else 0

def rule_duplicate_like(row, seen_keys):
    """Same member + same provider NPI + same date + same code already seen in batch."""
    key = (
        row.get("member_id"),
        row.get("provider_npi"),
        row.get("service_date"),
        row.get("cpt_or_cdt")
    )
    if key in seen_keys:
        return 1
    else:
        seen_keys.add(key)
        return 0

def rule_dental_fields_missing(row):
    """CDT lines like crowns/extractions require tooth/surface."""
    needs_detail = (
        row.get("service_category") == "Dental" and
        row.get("cpt_or_cdt") in {"D2740","D2392","D2950","D4341","D7140","D7210"}
    )
    if needs_detail:
        if (row.get("tooth") or "").strip() == "" or (row.get("tooth_surface") or "").strip() == "":
            return 1
    return 0

def rule_frequency_limit(row, member_history_count_for_code):
    """Example: D1110 (cleaning) > 2 times/year triggers frequency edit."""
    if row.get("cpt_or_cdt") == "D1110" and member_history_count_for_code > 2:
        return 1
    return 0

def apply_rules(claims_df: pd.DataFrame) -> pd.DataFrame:
    """Run all rule checks across a batch of claims and add flag columns."""
    df = claims_df.copy()

    seen_keys = set()          # for duplicate detection
    freq_tracker = {}          # member -> code -> count so far

    flags = {
        "r_eligibility_inactive": [],
        "r_missing_auth": [],
        "r_noncovered_service": [],
        "r_modifier_missing": [],
        "r_telehealth_mismatch": [],
        "r_pos_invalid_for_code": [],
        "r_dx_proc_mismatch": [],
        "r_provider_not_credentialed": [],
        "r_out_of_network": [],
        "r_invalid_npi": [],
        "r_timely_filing_exceeded": [],
        "r_duplicate_claim": [],
        "r_dental_tooth_surface_missing": [],
        "r_frequency_limit_exceeded": []
    }

    for _, row in df.iterrows():
        r = row.to_dict()

        flags["r_eligibility_inactive"].append(rule_eligibility_inactive(r))
        flags["r_missing_auth"].append(rule_missing_auth(r))
        flags["r_noncovered_service"].append(rule_noncovered_service(r))
        flags["r_modifier_missing"].append(rule_modifier_missing(r))
        flags["r_telehealth_mismatch"].append(rule_telehealth_mismatch(r))
        flags["r_pos_invalid_for_code"].append(rule_pos_invalid(r))
        flags["r_dx_proc_mismatch"].append(rule_dx_proc_mismatch(r))
        flags["r_provider_not_credentialed"].append(rule_provider_not_credentialed(r))
        flags["r_out_of_network"].append(rule_out_of_network(r))
        flags["r_invalid_npi"].append(rule_invalid_npi(r))
        flags["r_timely_filing_exceeded"].append(rule_timely_filing(r))
        flags["r_dental_tooth_surface_missing"].append(rule_dental_fields_missing(r))

        dup_val = rule_duplicate_like(r, seen_keys)
        flags["r_duplicate_claim"].append(dup_val)

        member = r.get("member_id")
        code = r.get("cpt_or_cdt")
        freq_tracker.setdefault(member, {})
        freq_tracker[member].setdefault(code, 0)
        freq_tracker[member][code] += 1
        freq_val = rule_frequency_limit(r, freq_tracker[member][code])
        flags["r_frequency_limit_exceeded"].append(freq_val)

    for col, values in flags.items():
        df[col] = values

    return df

claims_with_flags = apply_rules(claims_template).fillna(0)
claims_with_flags.head()

WEIGHTS = {
    "r_eligibility_inactive": 35,
    "r_missing_auth": 28,
    "r_noncovered_service": 24,
    "r_modifier_missing": 15,
    "r_telehealth_mismatch": 14,
    "r_pos_invalid_for_code": 16,
    "r_dx_proc_mismatch": 18,
    "r_provider_not_credentialed": 22,
    "r_out_of_network": 18,
    "r_invalid_npi": 14,
    "r_timely_filing_exceeded": 20,
    "r_duplicate_claim": 26,
    "r_dental_tooth_surface_missing": 16,
    "r_frequency_limit_exceeded": 12
}

def score_claims(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()

    score_raw = 0
    for col, w in WEIGHTS.items():
        if col in out.columns:
            score_raw = score_raw + w * out[col]

    out["aura_presubmit_score"] = score_raw.clip(upper=100)

    def tier_for(s):
        if s >= 70:
            return "High"
        if s >= 40:
            return "Medium"
        return "Low"

    out["aura_priority_tier"] = out["aura_presubmit_score"].apply(tier_for)
    return out

scored_claims = score_claims(claims_with_flags)
scored_claims[[
    "claim_id","cpt_or_cdt","plan_type","payer",
    "aura_presubmit_score","aura_priority_tier"
]].head()

from datetime import datetime, timedelta

REASON_LABELS = {
    "r_eligibility_inactive": "Eligibility inactive",
    "r_missing_auth": "Missing authorization",
    "r_noncovered_service": "Non-covered service",
    "r_modifier_missing": "Modifier likely missing",
    "r_telehealth_mismatch": "Telehealth POS/modifier mismatch",
    "r_pos_invalid_for_code": "Invalid POS for code",
    "r_dx_proc_mismatch": "Diagnosis–procedure mismatch",
    "r_provider_not_credentialed": "Provider not credentialed",
    "r_out_of_network": "Out-of-network plan edit",
    "r_invalid_npi": "Invalid NPI",
    "r_timely_filing_exceeded": "Timely filing exceeded",
    "r_duplicate_claim": "Potential duplicate claim",
    "r_dental_tooth_surface_missing": "Dental tooth/surface missing",
    "r_frequency_limit_exceeded": "Frequency limit exceeded"
}

def summarize_reasons(row):
    msgs = []
    for col, label in REASON_LABELS.items():
        if col in row and row[col] == 1:
            msgs.append(label)
    return "; ".join(msgs) if msgs else "No flags"

def assign_due_by(tier, submit_date_str):
    submit_dt = datetime.fromisoformat(submit_date_str) if submit_date_str else datetime.now()
    days_map = {"High": 1, "Medium": 3, "Low": 7}
    delta_days = days_map.get(tier, 7)
    return (submit_dt + timedelta(days=delta_days)).date().isoformat()

def build_worklist(df: pd.DataFrame) -> pd.DataFrame:
    wl = df.copy()
    wl["aura_reasons"] = wl.apply(summarize_reasons, axis=1)
    wl["due_by"] = [
        assign_due_by(tier, submit_date)
        for tier, submit_date in zip(wl["aura_priority_tier"], wl.get("submit_date", [""]*len(wl)))
    ]

    cols_out = [
        "claim_id",
        "payer",
        "plan_type",
        "service_date",
        "submit_date",
        "service_category",
        "cpt_or_cdt",
        "modifiers",
        "pos",
        "aura_priority_tier",
        "aura_presubmit_score",
        "aura_reasons",
        "due_by"
    ]
    cols_out = [c for c in cols_out if c in wl.columns]
    return wl[cols_out]

worklist_preview = build_worklist(scored_claims)
worklist_preview.head()

import pandas as pd

claims300 = pd.read_csv("aura_claims_synthetic_300.csv")

cols_for_demo = [
    "claim_id",
    "payer",
    "plan_type",
    "service_date",
    "submit_date",
    "service_category",
    "cpt_or_cdt",
    "modifiers",
    "pos",
    "aura_priority_tier",
    "aura_presubmit_score",
    "aura_reasons"
]

claims300[cols_for_demo].head(15)

AURA – AI-Driven Claims Optimizer¶

Module 2 v3 • Pre-Submission Claims Quality Engine¶

1. Operational Problem Statement¶

2. Data Model (What AURA needs per claim line)¶

3. Rule Engine (Pre-Submission Flags)¶

4. Applying All Flags to a Batch¶

5. Risk Scoring & Priority Tier¶

6. Explainability -> Worklist¶

7. KPI Framework (How We Prove Impact)¶

8. Pilot Plan¶

9. Ask for Feedback (Mentorship Close)¶

10. Live Preview of AURA Output (300-row Synthetic Batch)¶

	claim_id	payer	plan_type	service_date	submit_date	service_category	cpt_or_cdt	modifiers	pos	aura_priority_tier	aura_presubmit_score	aura_reasons
0	C000001	UHC	Medicaid	2025-06-22	2025-09-22	Medical	G0439	25	2	Low	0	No flags
1	C000002	Cigna	Medicare Advantage	2025-02-12	2025-03-19	Dental	D7140	25	22	Low	18	Out-of-network plan edit
2	C000003	Cigna	Medicare Advantage	2024-11-02	2024-12-01	Medical	99214	76	2	Low	14	Telehealth POS/modifier mismatch
3	C000004	Aetna	Medicare Advantage	2025-07-19	2025-11-05	Behavioral	90832	25	19	Low	0	No flags
4	C000005	Aetna	Medicare Advantage	2025-06-25	2026-01-28	Behavioral	H0031	NaN	10	Low	14	Telehealth POS/modifier mismatch
5	C000006	Medicaid	Commercial	2025-01-25	2025-03-27	Medical	J1885	59	2	Low	0	No flags
6	C000007	Aetna	Medicare Advantage	2025-09-17	2026-05-13	Behavioral	H0004	76	19	Low	0	No flags
7	C000008	UHC	Medicaid	2025-06-22	2025-10-28	Behavioral	H0031	59	23	Low	0	No flags
8	C000009	Humana	Commercial	2025-05-09	2025-11-03	Behavioral	H0031	LT	21	Low	26	Duplicate claim
9	C000010	Medicaid	Medicare Advantage	2025-02-19	2025-08-01	Dental	D7140	NaN	10	Medium	58	Invalid POS for code; Non-covered service; Out...
10	C000011	Medicaid	Medicare Advantage	2025-07-06	2025-11-05	Behavioral	H0004	25	22	Low	26	Telehealth POS/modifier mismatch; Documentatio...
11	C000012	Medicaid	Medicare Advantage	2025-07-11	2S026-01-27	Behavioral	H0031	76	2	Low	12	Documentation insufficient
12	C000013	Aetna	Commercial	2025-03-24	2025-06-28	Medical	36415	NaN	32	Low	22	Provider not credentialed
13	C000014	BCBS	Medicaid	2024-10-25	2025-04-14	Dental	D2740	NaN	31	Low	16	Invalid POS for code
14	C000015	UHC	Medicare Advantage	2025-07-24	2026-01-18	Medical	93010	95	19	Low	0	No flags