RubixML · chouaibcher · Feb 15, 2026 · Feb 15, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,6 @@
+- 2.5.3
+    - Added K Medoids clusterer
+
 - 2.5.2
     - Fix bug in One-class SVM inferencing
 

diff --git a/benchmarks/Clusterers/KMedoidsBench.php b/benchmarks/Clusterers/KMedoidsBench.php
@@ -0,0 +1,60 @@
+<?php
+
+namespace Rubix\ML\Benchmarks\Clusterers;
+
+use Rubix\ML\Clusterers\KMedoids;
+use Rubix\ML\Datasets\Generators\Blob;
+use Rubix\ML\Datasets\Generators\Agglomerate;
+
+/**
+ * @Groups({"Clusterers"})
+ * @BeforeMethods({"setUp"})
+ */
+class KMedoidsBench
+{
+    protected const TRAINING_SIZE = 10000;
+
+    protected const TESTING_SIZE = 10000;
+
+    /**
+     * @var \Rubix\ML\Datasets\Labeled;
+     */
+    protected $training;
+
+    /**
+     * @var \Rubix\ML\Datasets\Labeled;
+     */
+    protected $testing;
+
+    /**
+     * @var KMedoids
+     */
+    protected $estimator;
+
+    public function setUp() : void
+    {
+        $generator = new Agglomerate([
+            'Iris-setosa' => new Blob([5.0, 3.42, 1.46, 0.24], [0.35, 0.38, 0.17, 0.1]),
+            'Iris-versicolor' => new Blob([5.94, 2.77, 4.26, 1.33], [0.51, 0.31, 0.47, 0.2]),
+            'Iris-virginica' => new Blob([6.59, 2.97, 5.55, 2.03], [0.63, 0.32, 0.55, 0.27]),
+        ]);
+
+        $this->training = $generator->generate(self::TRAINING_SIZE);
+
+        $this->testing = $generator->generate(self::TESTING_SIZE);
+
+        $this->estimator = new KMedoids(3);
+    }
+
+    /**
+     * @Subject
+     * @Iterations(5)
+     * @OutputTimeUnit("seconds", precision=3)
+     */
+    public function trainPredict() : void
+    {
+        $this->estimator->train($this->training);
+
+        $this->estimator->predict($this->testing);
+    }
+}
diff --git a/composer.json b/composer.json
@@ -11,7 +11,7 @@
         "data science", "data mining", "dbscan", "deep learning", "dimensionality reduction",
         "ensemble", "estimator", "etl", "feature extraction", "feature selection", "feature importance",
         "gaussian mixture", "gbm", "gmm", "gradient boost", "grid search", "image recognition",
-        "imputation", "inference", "isolation forest", "k-means", "kmeans", "k-nearest neighbors",
+        "imputation", "inference", "isolation forest", "k-means", "kmeans", "k-medoids", "kmedoids", "k-nearest neighbors",
         "knn", "linear regression", "loda", "local outlier factor", "lof", "logistic regression",
         "machine learning", "manifold learning", "mean shift", "ml", "mlp", "multilayer perceptron",
         "naive bayes", "neural network", "natural language processing", "nearest neighbors", "nlp",

diff --git a/docs/clusterers/k-medoids.md b/docs/clusterers/k-medoids.md
@@ -0,0 +1,61 @@
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/Clusterers/KMedoids.php">[source]</a></span>
+
+# K Medoids
+A robust centroid-based hard clustering algorithm that uses actual data points (medoids) as cluster centers instead of computed means. K Medoids is more resistant to outliers than K Means and is suitable for clustering with arbitrary distance metrics. The algorithm minimizes the sum of dissimilarities between samples and their nearest medoid using the Partitioning Around Medoids (PAM) approach.
+
+**Interfaces:** [Estimator](../estimator.md), [Learner](../learner.md), [Online](../online.md), [Probabilistic](../probabilistic.md), [Persistable](../persistable.md), [Verbose](../verbose.md)
+
+**Data Type Compatibility:** Continuous
+
+## Parameters
+| # | Name | Default | Type | Description |
+|---|---|---|---|---|
+| 1 | k | | int | The number of target clusters. |
+| 2 | batch size | 128 | int | The size of each mini batch in samples. |
+| 3 | epochs | 1000 | int | The maximum number of training rounds to execute. |
+| 4 | min change | 1e-4 | float | The minimum change in the inertia for training to continue. |
+| 5 | window | 5 | int | The number of epochs without improvement in the validation score to wait before considering an early stop. |
+| 6 | kernel | Euclidean | Distance | The distance kernel used to compute the distance between sample points. |
+| 7 | seeder | PlusPlus | Seeder | The seeder used to initialize the cluster medoids. |
+
+## Example
+```php
+use Rubix\ML\Clusterers\KMedoids;
+use Rubix\ML\Kernels\Distance\Euclidean;
+use Rubix\ML\Clusterers\Seeders\PlusPlus;
+
+$estimator = new KMedoids(3, 128, 300, 10.0, 10, new Euclidean(), new PlusPlus());
+```
+
+## Additional Methods
+Return the *k* computed medoids of the training set:
+```php
+public medoids() : array[]
+```
+
+Return the number of training samples that each medoid is responsible for:
+```php
+public sizes() : int[]
+```
+
+Return an iterable progress table with the steps from the last training session:
+```php
+public steps() : iterable
+```
+
+```php
+use Rubix\ML\Extractors\CSV;
+
+$extractor = new CSV('progress.csv', true);
+
+$extractor->export($estimator->steps());
+```
+
+Return the loss for each epoch from the last training session:
+```php
+public losses() : float[]|null
+```
+
+## References
+[^1]: L. Kaufman et al. (1987). Clustering by means of Medoids.
+[^2]: H. S. Park et al. (2009). A simple and fast algorithm for K-medoids clustering.
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -97,6 +97,7 @@ nav:
       - Fuzzy C Means: clusterers/fuzzy-c-means.md
       - Gaussian Mixture: clusterers/gaussian-mixture.md
       - K Means: clusterers/k-means.md
+      - K Medoids: clusterers/k-medoids.md
       - Mean Shift: clusterers/mean-shift.md
     - Anomaly Detectors:
       - Gaussian MLE: anomaly-detectors/gaussian-mle.md