Update config

avolkov-intel · avolkov-intel · commit ec85b08a6af6 · 2026-05-20T07:22:32.000-07:00
diff --git a/configs/throughput/example.json b/configs/throughput/example.json
@@ -1,70 +1,230 @@
 {
     "INCLUDE": ["../common/sklearn.json"],
     "PARAMETERS_SETS": {
-        "common parameters": {
-            "data": {
-                "split_kwargs": {
-                    "train_size": 8000,
-                    "test_size": 2000,
-                    "shuffle": true,
+        "throughput settings": {
+            "bench": {
+                "num_instances": 7,
+                "cores_per_instance": 8,
+                "measurement_duration": 30
+            }
+        },
+        "linear regression": {
+            "algorithm": {
+                "estimator": "LinearRegression",
+                "estimator_params": {
+                    "fit_intercept": true,
+                    "copy_X": true
+                }
+            }
+        },
+        "random forest regressor": {
+            "algorithm": {
+                "estimator": "RandomForestRegressor",
+                "estimator_params": {
+                    "n_estimators": 100,
+                    "max_depth": 12,
                     "random_state": 42
                 }
+            }
+        },
+        "knn classifier kdtree": {
+            "algorithm": {
+                "estimator": "KNeighborsClassifier",
+                "estimator_params": {
+                    "n_neighbors": 10,
+                    "weights": "uniform",
+                    "algorithm": "kd_tree",
+                    "metric": "minkowski",
+                    "p": 2
+                }
             },
-            "algorithm": { "device": "default" }
+            "data": {
+                "preprocessing_kwargs": { "normalize": "standard" }
+            }
         },
-        "throughput settings": {
-            "bench": {
-                "num_instances": 4,
-                "cores_per_instance": 4,
-                "measurement_duration": 30
+        "kmeans": {
+            "algorithm": {
+                "estimator": "KMeans",
+                "estimator_params": {
+                    "n_clusters": 10,
+                    "n_init": 1,
+                    "max_iter": 100,
+                    "tol": 1e-4,
+                    "random_state": 42,
+                    "algorithm": "lloyd"
+                },
+                "estimator_methods": {
+                    "inference": "predict"
+                }
+            },
+            "data": {
+                "preprocessing_kwargs": { "normalize": "standard" }
             }
         },
-        "datasets": {
+        "linear regression datasets": {
             "data": [
                 {
-                    "source": "make_classification",
+                    "source": "make_regression",
                     "generation_kwargs": {
-                        "n_classes": 2,
-                        "n_samples": 10000,
-                        "n_features": 64,
-                        "n_informative": 32
+                        "n_samples": 500000,
+                        "n_features": 50,
+                        "n_informative": 10,
+                        "noise": 20.0,
+                        "random_state": 42
                     }
+                },
+                {
+                    "source": "make_regression",
+                    "generation_kwargs": {
+                        "n_samples": 100000,
+                        "n_features": 2000,
+                        "n_informative": 10,
+                        "noise": 60.0,
+                        "random_state": 42
+                    }
+                },
+                {
+                    "dataset": "year_prediction_msd",
+                    "split_kwargs": { "train_size": 0.8, "test_size": 0.2 }
+                },
+                {
+                    "dataset": "hepmass",
+                    "split_kwargs": { "train_size": 0.8, "test_size": 0.2 }
                 }
             ]
         },
-        "algorithms": [
-            {
-                "algorithm": {
-                    "estimator": "RandomForestClassifier",
-                    "estimator_params": { "n_estimators": 50 }
+        "random forest datasets": {
+            "data": [
+                {
+                    "source": "make_regression",
+                    "generation_kwargs": {
+                        "n_samples": 100000,
+                        "n_features": 20,
+                        "n_informative": 10,
+                        "noise": 10.0,
+                        "random_state": 42
+                    }
+                },
+                {
+                    "source": "make_regression",
+                    "generation_kwargs": {
+                        "n_samples": 50000,
+                        "n_features": 200,
+                        "n_informative": 20,
+                        "noise": 30.0,
+                        "random_state": 42
+                    }
+                },
+                {
+                    "dataset": "year_prediction_msd",
+                    "split_kwargs": { "train_size": 0.8, "test_size": 0.2 }
+                },
+                {
+                    "dataset": "medical_charges_nominal",
+                    "split_kwargs": { "ignore": true }
                 }
-            },
-            {
-                "algorithm": {
-                    "estimator": "KMeans",
-                    "estimator_params": {
-                        "n_clusters": 10,
-                        "init": "random",
-                        "algorithm": "lloyd",
-                        "max_iter": 100
+            ]
+        },
+        "knn datasets": {
+            "data": [
+                {
+                    "source": "make_classification",
+                    "generation_kwargs": {
+                        "n_classes": 5,
+                        "n_samples": 200000,
+                        "n_features": 10,
+                        "n_informative": 8,
+                        "n_redundant": 2,
+                        "n_repeated": 0,
+                        "random_state": 42
+                    }
+                },
+                {
+                    "source": "make_classification",
+                    "generation_kwargs": {
+                        "n_classes": 5,
+                        "n_samples": 50000,
+                        "n_features": 50,
+                        "n_informative": 20,
+                        "n_redundant": 10,
+                        "n_repeated": 0,
+                        "random_state": 42
                     }
+                },
+                {
+                    "dataset": "letters",
+                    "split_kwargs": { "ignore": true }
+                },
+                {
+                    "dataset": "codrnanorm",
+                    "split_kwargs": { "ignore": true }
                 }
-            },
-            {
-                "algorithm": {
-                    "estimator": "LinearRegression"
+            ]
+        },
+        "kmeans datasets": {
+            "data": [
+                {
+                    "source": "make_blobs",
+                    "generation_kwargs": {
+                        "centers": 10,
+                        "cluster_std": 4.0,
+                        "n_samples": 500000,
+                        "n_features": 20,
+                        "random_state": 42
+                    }
+                },
+                {
+                    "source": "make_blobs",
+                    "generation_kwargs": {
+                        "centers": 10,
+                        "cluster_std": 8.0,
+                        "n_samples": 100000,
+                        "n_features": 200,
+                        "random_state": 42
+                    }
+                },
+                {
+                    "dataset": "creditcard",
+                    "split_kwargs": { "ignore": true }
+                },
+                {
+                    "dataset": "hepmass",
+                    "split_kwargs": { "train_size": 0.8, "test_size": 0.2 }
                 }
-            }
-        ]
+            ]
+        }
     },
     "TEMPLATES": {
-        "throughput_test": {
+        "linear_regression_throughput": {
+            "SETS": [
+                "sklearn-ex[cpu] implementations",
+                "throughput settings",
+                "linear regression",
+                "linear regression datasets"
+            ]
+        },
+        "random_forest_throughput": {
+            "SETS": [
+                "sklearn-ex[cpu] implementations",
+                "throughput settings",
+                "random forest regressor",
+                "random forest datasets"
+            ]
+        },
+        "knn_throughput": {
+            "SETS": [
+                "sklearn-ex[cpu] implementations",
+                "throughput settings",
+                "knn classifier kdtree",
+                "knn datasets"
+            ]
+        },
+        "kmeans_throughput": {
             "SETS": [
                 "sklearn-ex[cpu] implementations",
-                "common parameters",
                 "throughput settings",
-                "datasets",
-                "algorithms"
+                "kmeans",
+                "kmeans datasets"
             ]
         }
     }