Fix for Logistic Regression loss scaling (#1908) (#1943)

* Fix for LogReg loss scaling * Correct deselected test name (cherry picked from commit aaad387) Co-authored-by: Alexander Andreev <alexander.andreev@intel.com>
intel · Jul 18, 2024 · 7f19d4b · 7f19d4b
1 parent 52f27a5
commit 7f19d4b
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 7 deletions.
diff --git a/daal4py/sklearn/linear_model/logistic_path.py b/daal4py/sklearn/linear_model/logistic_path.py
@@ -426,6 +426,10 @@ def __logistic_regression_path(
                 (classes.size, n_features + int(fit_intercept)), order="F", dtype=X.dtype
             )
 
+    # Adoption of https://github.com/scikit-learn/scikit-learn/pull/26721
+    if solver in ["lbfgs", "newton-cg", "newton-cholesky"]:
+        sw_sum = len(X) if sample_weight is None else np.sum(sample_weight)
+
     if coef is not None:
         # it must work both giving the bias term and not
         if multi_class == "ovr":
@@ -592,18 +596,18 @@ def grad(x, *args):
                     X,
                     target,
                     0.0,
-                    1.0 / (2 * C * C_daal_multiplier),
+                    1.0 / (2 * C * C_daal_multiplier * sw_sum),
                     fit_intercept,
                     value=True,
                     gradient=True,
                     hessian=False,
                 )
             else:
                 if sklearn_check_version("1.1"):
-                    l2_reg_strength = 1.0 / C
+                    l2_reg_strength = 1.0 / (C * sw_sum)
                     extra_args = (X, target, sample_weight, l2_reg_strength, n_threads)
                 else:
-                    extra_args = (X, target, 1.0 / C, sample_weight)
+                    extra_args = (X, target, 1.0 / (C * sw_sum), sample_weight)
 
             iprint = [-1, 50, 1, 100, 101][
                 np.searchsorted(np.array([0, 1, 2, 3]), verbose)
@@ -614,7 +618,13 @@ def grad(x, *args):
                 method="L-BFGS-B",
                 jac=True,
                 args=extra_args,
-                options={"iprint": iprint, "gtol": tol, "maxiter": max_iter},
+                options={
+                    "maxiter": max_iter,
+                    "maxls": 50,
+                    "iprint": iprint,
+                    "gtol": tol,
+                    "ftol": 64 * np.finfo(float).eps,
+                },
             )
             n_iter_i = _check_optimize_result(
                 solver,
@@ -629,7 +639,7 @@ def grad(x, *args):
             if _dal_ready:
 
                 def make_ncg_funcs(f, value=False, gradient=False, hessian=False):
-                    daal_penaltyL2 = 1.0 / (2 * C * C_daal_multiplier)
+                    daal_penaltyL2 = 1.0 / (2 * C * C_daal_multiplier * sw_sum)
                     _obj_, X_, y_, n_samples = daal_extra_args_func(
                         classes.size,
                         w0,
@@ -662,10 +672,10 @@ def _func_(x, *args):
                 )
             else:
                 if sklearn_check_version("1.1"):
-                    l2_reg_strength = 1.0 / C
+                    l2_reg_strength = 1.0 / (C * sw_sum)
                     args = (X, target, sample_weight, l2_reg_strength, n_threads)
                 else:
-                    args = (X, target, 1.0 / C, sample_weight)
+                    args = (X, target, 1.0 / (C * sw_sum), sample_weight)
 
                 w0, n_iter_i = _newton_cg(
                     hess, func, grad, w0, args=args, maxiter=max_iter, tol=tol

diff --git a/deselected_tests.yaml b/deselected_tests.yaml
@@ -190,6 +190,12 @@ deselected_tests:
   # margin above the test threshold, see https://github.com/scikit-learn/scikit-learn/pull/13645
   - linear_model/tests/test_logistic.py::test_dtype_match
 
+  # Logistic Regression coeffs change due to fix for loss scaling
+  # (https://github.com/scikit-learn/scikit-learn/pull/26721)
+  - feature_selection/tests/test_from_model.py::test_importance_getter[estimator0-named_steps.logisticregression.coef_]
+  - inspection/_plot/tests/test_boundary_decision_display.py::test_class_of_interest_binary[predict_proba]
+  - linear_model/tests/test_sag.py::test_sag_pobj_matches_logistic_regression
+
   # This fails on certain platforms. While weighted data does not go through DAAL,
   # unweighted does. Since convergence does not occur (comment in the test
   # suggests that) and because coefficients are slightly different,