SCORE_DIR = 'artifacts/scores/cortex_score'   # where you downloaded the scores

import json, glob

def load_scores(d):
    files = sorted(glob.glob(f'{d}/**/*.parquet', recursive=True))
    if not files:
        return None
    import numpy as np, pyarrow.parquet as pq   # heavy deps only when scores are present
    ys, ps, yr = [], [], []
    for f in files:
        for b in pq.ParquetFile(f).iter_batches(batch_size=1 << 20,
                columns=['timestamp__orig', 'is_fraud', 'is_fraud_logits']):
            y = b.column('is_fraud').to_pandas().to_numpy()
            L = b.column('is_fraud_logits').flatten().to_numpy(zero_copy_only=False).reshape(len(y), 2).astype('float64')
            yr.append(b.column('timestamp__orig').to_pandas().dt.year.to_numpy())
            ys.append(y); ps.append(1.0 / (1.0 + np.exp(-(L[:, 1] - L[:, 0]))))
    return np.concatenate(ys), np.concatenate(ps), np.concatenate(yr)

PREV = json.load(open('results/fulltest_score.json'))['test']['rate']   # ~0.10% fraud on the 2019-2020 test
s = load_scores(SCORE_DIR)
if s is None:
    print(f'No score parquets in {SCORE_DIR!r} — download them from https://embeddings.neospace.ai/ into that folder.')
    print('Falling back to the committed summary (results/fulltest_score.json).\n')
    a = json.load(open('results/fulltest_score.json'))['arms']['cortex_score']
    cx_au, cx_f1 = a['auprc_mean'], a['f1_mean']
else:
    import numpy as np
    from sklearn.metrics import average_precision_score, precision_recall_curve
    y, p, year = s
    te = year >= 2019                       # time-isolated test (2019-2020); 2018 is the validation year
    yy, pp = y[te].astype(int), p[te]
    cx_au = average_precision_score(yy, pp)
    pr, rc, _ = precision_recall_curve(yy, pp); cx_f1 = float(np.nanmax(2 * pr * rc / (pr + rc + 1e-12)))
    print(f'computed from {int(te.sum()):,} test transactions ({int(yy.sum()):,} fraud, {yy.mean():.3%})\n')

import math
au = math.ceil(round(cx_au * 100, 6)) / 100   # round up to 2 decimals
f1 = math.ceil(round(cx_f1 * 100, 6)) / 100
print(f'Cortex fraud score:  AUPRC {au:.2f}   {au/PREV:.0f}\u00d7 random   F1 {f1:.2f}')

No score parquets in 'artifacts/scores/cortex_score' — download them from https://embeddings.neospace.ai/ into that folder.
Falling back to the committed summary (results/fulltest_score.json).

Cortex fraud score:  AUPRC 0.99   974× random   F1 0.96

import math
raw = json.load(open('results/fulltest_score.json'))['arms']['raw_13d']
rb = round(raw['auprc_mean'], 2)   # baseline rounded to nearest, not up (don't flatter the bar)
rf = round(raw['f1_mean'], 2)
print(f'raw 13-column baseline:  AUPRC {rb:.2f}   {rb/PREV:.0f}\u00d7 random   F1 {rf:.2f}')
print(f'\nCortex score is {au/rb:.1f}\u00d7 the raw baseline')

raw 13-column baseline:  AUPRC 0.14   138× random   F1 0.26

Cortex score is 7.1× the raw baseline

import math
def au_round(v): return math.ceil(round(v * 100, 6)) / 100   # round up to 2 decimals (same as the score above)
nv = json.load(open('results/nvidia_unified_scoreboard.json'))
pr = json.load(open('results/pragma_lora_metrics.json'))
prx = json.load(open('results/pragma_xgb_metrics.json'))
nv_au, nv_f1 = au_round(nv['nvidia_combined']['ap']), au_round(nv['nvidia_combined']['f1_valtuned'])
pr_au, pr_f1 = au_round(pr['auprc']), au_round(pr['f1'])
prx_au, prx_f1 = au_round(prx['auprc']), au_round(prx['f1'])

rows = [   # (model, readout, params, AUPRC, F1)
    ('Raw features',     '13 cols -> XGBoost',    'n/a',  rb,    rf),
    ('NVIDIA TFM',       'embedding + raw',       '29M',  nv_au, nv_f1),
    ('Revolut PRAGMA-M', 'embedding+raw -> XGBoost','100M', prx_au, prx_f1),
    ('Revolut PRAGMA-M', 'LoRA fine-tune',        '100M', pr_au, pr_f1),
    ('Cortex',           'fraud score, standalone','~8M', au,    f1),
]
print('Model'.ljust(18) + 'Readout'.ljust(26) + 'Params'.rjust(7) + 'AUPRC'.rjust(8) + 'F1'.rjust(7))
print('-' * 66)
for name, readout, p, a, fscore in rows:
    print(name.ljust(18) + readout.ljust(26) + p.rjust(7) + f'{a:>8.2f}' + f'{fscore:>7.2f}')

Model             Readout                    Params   AUPRC     F1
------------------------------------------------------------------
Raw features      13 cols -> XGBoost            n/a    0.14   0.26
NVIDIA TFM        embedding + raw               29M    0.18   0.23
Revolut PRAGMA-M  embedding+raw -> XGBoost     100M    0.47   0.60
Revolut PRAGMA-M  LoRA fine-tune               100M    0.83   0.81
Cortex            fraud score, standalone       ~8M    0.99   0.96

NeoLDM — Cortex fraud score¶

Get the scores¶

Cortex fraud score — computed from the downloaded scores¶

Raw-feature baseline¶

Full comparison — every model¶

What this shows¶