diff --git a/pecos/core/base.py b/pecos/core/base.py index 36752d1..8ab467e 100644 --- a/pecos/core/base.py +++ b/pecos/core/base.py @@ -2058,25 +2058,37 @@ def link_calibrator_methods(self): [c_uint64, POINTER(c_double), POINTER(c_double), POINTER(c_double)], ) - def fit_platt_transform(self, logits, tgt_prob): + def fit_platt_transform(self, logits, targets, clip_tgt_prob=True): """Python to C/C++ interface for platt transfrom fit. Ref: https://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.pdf Args: logits (ndarray): 1-d array of logit with length N. - tgt_prob (ndarray): 1-d array of target probability scores within [0, 1] with length N. + targets (ndarray): 1-d array of target probability scores within [0, 1] with length N. + clip_tgt_prob (bool): whether to clip the target probability to + [1/(prior0 + 2), 1 - 1/(prior1 + 2)] + where prior1 = sum(targets), prior0 = N - prior1 Returns: A, B: coefficients for Platt's scale. """ assert isinstance(logits, np.ndarray) - assert isinstance(tgt_prob, np.ndarray) - assert len(logits) == len(tgt_prob) - assert logits.dtype == tgt_prob.dtype + assert isinstance(targets, np.ndarray) + assert len(logits) == len(targets) + assert logits.dtype == targets.dtype - if tgt_prob.min() < 0 or tgt_prob.max() > 1.0: + if targets.min() < 0 or targets.max() > 1.0: raise ValueError("Target probability out of bound!") + min_prob, max_prob = 0.0, 1.0 + if clip_tgt_prob: + prior1 = np.sum(targets) + prior0 = len(targets) - prior1 + min_prob = 1.0 / (prior0 + 2.0) + max_prob = (prior1 + 1.0) / (prior1 + 2.0) + + tgt_prob = np.clip(targets, min_prob, max_prob) + AB = np.array([0, 0], dtype=np.float64) if tgt_prob.dtype == np.float32: diff --git a/pecos/core/utils/newton.hpp b/pecos/core/utils/newton.hpp index f6c088e..9fb7197 100644 --- a/pecos/core/utils/newton.hpp +++ b/pecos/core/utils/newton.hpp @@ -280,23 +280,29 @@ namespace pecos { template uint32_t fit_platt_transform(size_t num_samples, const value_type *logits, const value_type *tgt_probs, double& A, double& B) { - // define the return code - enum { - SUCCESS=0, - LINE_SEARCH_FAIL=1, - MAX_ITER_REACHED=2, - }; + // define the return code + enum { + SUCCESS=0, + LINE_SEARCH_FAIL=1, + MAX_ITER_REACHED=2, + }; // hyper parameters int max_iter = 100; // Maximal number of iterations double min_step = 1e-10; // Minimal step taken in line search double sigma = 1e-12; // For numerically strict PD of Hessian - double eps = 1e-6; + double eps = 1e-5; + + // calculate prior of B + double prior1 = 0; + for (size_t i = 0; i < num_samples; i++) { + prior1 += tgt_probs[i]; + } + double prior0 = double(num_samples) - prior1; - int iter; // Initial Point and Initial Fun Value - A = 0.0; B = 1.0; + A = 0.0; B = log((prior0 + 1.0) / (prior1 + 1.0)); double fval = 0.0; for (size_t i = 0; i < num_samples; i++) { @@ -307,17 +313,18 @@ namespace pecos { fval += (tgt_probs[i] - 1) * fApB + log(1 + exp(fApB)); } } + int iter; for (iter = 0; iter < max_iter; iter++) { // Update Gradient and Hessian (use H' = H + sigma I) double h11 = sigma; double h22 = sigma; // numerically ensures strict PD double h21 = 0.0; - double g1 = 0.0; - double g2 = 0.0; + double g1 = A * sigma; + double g2 = B * sigma; for (size_t i = 0; i < num_samples; i++) { double fApB = logits[i] * A + B; - double p = 0, q = 0; + double p = 0, q = 0; if (fApB >= 0) { p = exp(-fApB) / (1.0 + exp(-fApB)); q = 1.0 / (1.0 + exp(-fApB)); @@ -376,15 +383,15 @@ namespace pecos { if (stepsize < min_step) { printf("WARNING: fit_platt_transform: Line search fails\n"); - return LINE_SEARCH_FAIL; + return LINE_SEARCH_FAIL; } } if (iter >= max_iter) { printf("WARNING: fit_platt_transform: Reaching maximal iterations\n"); - return MAX_ITER_REACHED; + return MAX_ITER_REACHED; } - return SUCCESS; + return SUCCESS; } } // namespace pecos #endif diff --git a/test/pecos/core/test_clib.py b/test/pecos/core/test_clib.py index a6ee072..45ec02d 100644 --- a/test/pecos/core/test_clib.py +++ b/test/pecos/core/test_clib.py @@ -78,12 +78,12 @@ def test_platt_scale(): orig = np.arange(-15, 15, 1, dtype=np.float32) tgt = np.array([1.0 / (1 + np.exp(A * t + B)) for t in orig], dtype=np.float32) - At, Bt = clib.fit_platt_transform(orig, tgt) + At, Bt = clib.fit_platt_transform(orig, tgt, clip_tgt_prob=False) assert B == approx(Bt, abs=1e-6), f"Platt_scale B error: {B} != {Bt}" assert A == approx(At, abs=1e-6), f"Platt_scale A error: {A} != {At}" orig = np.arange(-15, 15, 1, dtype=np.float64) tgt = np.array([1.0 / (1 + np.exp(A * t + B)) for t in orig], dtype=np.float64) - At, Bt = clib.fit_platt_transform(orig, tgt) + At, Bt = clib.fit_platt_transform(orig, tgt, clip_tgt_prob=False) assert B == approx(Bt, abs=1e-6), f"Platt_scale B error: {B} != {Bt}" assert A == approx(At, abs=1e-6), f"Platt_scale A error: {A} != {At}"