diff --git a/docs/methods/linear_model/lasso_regularization.md b/docs/methods/linear_model/lasso_regularization.md new file mode 100644 index 0000000..72a19ad --- /dev/null +++ b/docs/methods/linear_model/lasso_regularization.md @@ -0,0 +1,50 @@ +# Lasso Regularization + +It stands for Least Absolute Shrinkage and Selection Operator. It adds L1 the penalty. + +L1 is the sum of the absolute value of the beta coefficients + +## Parameters + +| Name | Definition | Defaults | Type | +| ------------- | ------------------------------------------------------------------------------------------- | -------- | ---------| +| lambda | Constant that multiplies the L1 term, controlling regularization strength | 0.01 | `double` | + + +## Attributes + +| Name | Definition | Shape | +| ------------ | --------------------------------------------------------- | ---------- | +| Coefficients | Estimated coefficients for the lasso regularization | n_features | + +## Methods + +| Name | Definition | Return value | +| ------------------------------- | ----------------------------------------------------- | ----------------- | +| `lossFunction(vector> x, vector y)` |regularize loss function | `double` | +| `gradient(vector> x,vector y)` | Find gradient | `vector` | +| `gradientDescent(vector> x, vector y, double alpha)` | gradient optimization algorithm | `void` | +| `fit(vector> x, vector y,int epochs,double alpha)` | Fit linear model | `vector` | +| `predict(vector> x)` | Predict using the linear model | `vector` | +| `printCoefficients()` | Print coefficient of determination of the prediction | `void` | + +## Example + +```cpp +double alpha = 0.01; +std::vector> x = {{1, 2, 3}, {2, 3, 4}, {3, 4, 5}, {4, 5, 6}}; +std::vector y = {2, 3, 4, 5}; +double lambda = 0.1; +int epochs = 1000; +RidgeRegularization model(alpha); +model.fit(x, y, epochs, alpha); +model.printCoefficients(); +std::vector yPred; +for (int i = 0; i < x.size(); i++) +{ + yPred.push_back(predict(x[i])); +} +for (int i = 0; i < y.size(); i++) + std::cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i]<< std::endl; + +``` diff --git a/docs/methods/linear_model/ridge_regularization.md b/docs/methods/linear_model/ridge_regularization.md new file mode 100644 index 0000000..eadd22f --- /dev/null +++ b/docs/methods/linear_model/ridge_regularization.md @@ -0,0 +1,50 @@ +# Ridge Regularization + +It adds L2 as the penalty. + +L2 is the sum of the square of the magnitude of beta coefficients. + +## Parameters + +| Name | Definition | Defaults | Type | +| ------------- | ------------------------------------------------------------------------------------------- | -------- | ---------| +| lambda | Constant that multiplies the L1 term, controlling regularization strength | 0.01 | `double` | + + +## Attributes + +| Name | Definition | Shape | +| ------------ | --------------------------------------------------------- | ---------- | +| Coefficients | Estimated coefficients for the ridge regularization | n_features | + +## Methods + +| Name | Definition | Return value | +| ------------------------------- | ----------------------------------------------------- | ----------------- | +| `lossFunction(vector x, double y)` |regularize loss function | `double` | +| `gradient(vector x,double y)` | Find gradient | `vector` | +| `gradientDescent(vector> x, vector y, double alpha,int epochs)` | gradient optimization algorithm | `void` | +| `fit(vector> x, vector y,int epochs,double alpha)` | Fit linear model | `vector` | +| `predict(vector x)` | Predict using the linear model | `double` | +| `printCoefficients()` | Print coefficient of determination of the prediction | `void` | + +## Example + +```cpp +double alpha = 0.01; +std::vector> x = {{1, 2, 3}, {2, 3, 4}, {3, 4, 5}, {4, 5, 6}}; +std::vector y = {2, 3, 4, 5}; +double lambda = 0.1; +int epochs = 1000; +RidgeRegularization model(alpha); +model.fit(x, y, epochs, alpha); +model.printCoefficients(); +std::vector yPred; +for (int i = 0; i < x.size(); i++) +{ + yPred.push_back(predict(x[i])); +} +for (int i = 0; i < y.size(); i++) + std::cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] << std::endl; + +``` diff --git a/docs/methods/metrics/silhouette_score.md b/docs/methods/metrics/silhouette_score.md deleted file mode 100644 index 27d8b78..0000000 --- a/docs/methods/metrics/silhouette_score.md +++ /dev/null @@ -1,33 +0,0 @@ -# Silhouette Score - -The silhouette score is calculated as the average of all points si, where si is the difference of minimum of inter cluster distance to average of intra cluster distance divided by maximum of both. -The silhouette Score reflects how good the clusters are. - -## Parameters - -| Name | Definition | Type | -| ------------- | ------------------------------------------------------------------------------------------- | ----------------| -| numClusters | Takes a number of clusters | `int` | -| typeDist | You require euclidean/manhattan distance to compute silhoutte Score | `string` | -| x | Values of various points | `vector>` | -| y | Cluster number to which each x value belongs to | `vector` | - - -## Methods - -| Name | Definition | Return value | -| -----------------------------------------------| ----------------------------------------------------- | ----------------- | -| `silhouetteScore(vector>x,vector y,int numClusters,string typeDist)`|To find the silhoutte score| `double` | - -## Example - -```cpp - -std::vector> x{{1,2,3},{1.21,2.32,3.24},{5.56,5.45,5.23},{5.35,5.00,5.78}}; -std::vector y{0,0,1,1}; -int numClusters=2; -std::string s = "euclidean"; -double score = silhoutteScore(x,y,numClusters,s); -std::cout< int main() { diff --git a/examples/linear_model/lasso_regression.cpp b/examples/linear_model/lasso_regression.cpp new file mode 100644 index 0000000..5048129 --- /dev/null +++ b/examples/linear_model/lasso_regression.cpp @@ -0,0 +1,22 @@ +// #include "../src/slowmokit/methods/linear_model/lasso_regression.hpp" + +// int main() +// { +// std::vector> x = { +// {1.0, 2.0, 3.0}, {2.0, 3.0, 4.0}, {3.0, 4.0, 5.0}}; +// std::vector y = {1.0, 2.0, 3.0}; +// double alpha = 0.01; +// double lambda = 0.1; +// int epochs = 100; + +// LassoRegularization model(alpha); +// model.fit(x, y, epochs, alpha); +// model.printCoefficients(); + +// std::vector yPred = model.predict(x); +// for (int i = 0; i < y.size(); i++) +// std::cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] +// << std::endl; + +// return 0; +// } \ No newline at end of file diff --git a/examples/linear_model/linear_regression.cpp b/examples/linear_model/linear_regression.cpp new file mode 100644 index 0000000..95e8807 --- /dev/null +++ b/examples/linear_model/linear_regression.cpp @@ -0,0 +1,18 @@ +#include "../../src/slowmokit/methods/linear_model/linear_regression.hpp" +#include "../../src/slowmokit/core.hpp" + +int main() +{ + LinearRegression model; + std::vector> x = {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}; + std::vector y = {2, 3, 4, 5, 6}; + + model.fit(x, y); + model.printCoefficients(); + + std::vector yPred = model.predict(x); + for (int i = 0; i < y.size(); i++) + cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] << endl; + + return 0; +} diff --git a/examples/methods/linear_model/logistic_regression.cpp b/examples/linear_model/logistic_regression.cpp similarity index 85% rename from examples/methods/linear_model/logistic_regression.cpp rename to examples/linear_model/logistic_regression.cpp index ada7f4e..67792c5 100644 --- a/examples/methods/linear_model/logistic_regression.cpp +++ b/examples/linear_model/logistic_regression.cpp @@ -1,4 +1,5 @@ -// #include "../../../src/slowmokit/methods/linear_model/logistic_regression.hpp" +// #include "../../src/slowmokit/methods/linear_model/logistic_regression.hpp" +// #include "../../src/slowmokit/core.hpp" // int main() // { diff --git a/examples/linear_model/ridge_regression.cpp b/examples/linear_model/ridge_regression.cpp new file mode 100644 index 0000000..c4afe1e --- /dev/null +++ b/examples/linear_model/ridge_regression.cpp @@ -0,0 +1,25 @@ +// #include "../src/slowmokit/methods/linear_model/ridge_regression.hpp" + +// int main() +// { +// double alpha = 0.01; +// std::vector> x = { +// {1, 2, 3}, {2, 3, 4}, {3, 4, 5}, {4, 5, 6}}; +// std::vector y = {2, 3, 4, 5}; + +// double lambda = 0.1; +// int epochs = 1000; +// RidgeRegularization model(alpha); +// model.fit(x, y, epochs, alpha); +// model.printCoefficients(); +// std::vector yPred; +// for (int i = 0; i < x.size(); i++) +// { +// yPred.push_back(predict(x[i])); +// } +// for (int i = 0; i < y.size(); i++) +// std::cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] +// << std::endl; + +// return 0; +// } diff --git a/examples/matrix.cpp b/examples/matrix.cpp index 677311c..e5a3707 100644 --- a/examples/matrix.cpp +++ b/examples/matrix.cpp @@ -3,9 +3,7 @@ int main() { int n = 3, m = 3; - Matrix mat(2, 2); - // std::cout << mat << " 2"; return 0; -} \ No newline at end of file +} diff --git a/examples/methods/linear_model/linear_regression.cpp b/examples/methods/linear_model/linear_regression.cpp deleted file mode 100644 index dc58d05..0000000 --- a/examples/methods/linear_model/linear_regression.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// #include "../../../src/slowmokit/methods/linear_model/linear_regression.hpp" - -// int main() -// { -// LinearRegression model; -// std::vector> x = {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}; -// std::vector y = {2, 3, 4, 5, 6}; - -// model.fit(x, y); -// model.printCoefficients(); - -// std::vector yPred = model.predict(x); -// for (int i = 0; i < y.size(); i++) -// cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] << endl; - -// return 0; -// } diff --git a/examples/methods/metrics/silhouette_score.cpp b/examples/methods/metrics/silhouette_score.cpp deleted file mode 100644 index 29120a8..0000000 --- a/examples/methods/metrics/silhouette_score.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// #include "../../src/slowmokit/methods/cluster/silhouette_score.hpp" -// #include "../../../core.hpp" - -// signed main(){ -// std::vector> x{ -// {1,2,3}, -// {1.21,2.32,3.24}, -// {5.56,5.45,5.23}, -// {5.35,5.00,5.78} -// }; -// std::vector y{0,0,1,1}; -// int numClusters=2; -// std::string s = "euclidean"; -// double score = silhouetteScore(x,y,numClusters,s); - // std::cout< pred = {1, 0, 1, 1, 0, 1}; diff --git a/examples/methods/metrics/classification_report_eg.cpp b/examples/metrics/classification_report_eg.cpp similarity index 78% rename from examples/methods/metrics/classification_report_eg.cpp rename to examples/metrics/classification_report_eg.cpp index 5b68a3c..605da7a 100644 --- a/examples/methods/metrics/classification_report_eg.cpp +++ b/examples/metrics/classification_report_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/metrics/classification_report.hpp" +// #include "../src/slowmokit/methods/metrics/classification_report.hpp" // int main() // { diff --git a/examples/methods/metrics/f1score_eg.cpp b/examples/metrics/f1score_eg.cpp similarity index 100% rename from examples/methods/metrics/f1score_eg.cpp rename to examples/metrics/f1score_eg.cpp diff --git a/examples/methods/metrics/mean_squared_error_eg.cpp b/examples/metrics/mean_squared_error_eg.cpp similarity index 78% rename from examples/methods/metrics/mean_squared_error_eg.cpp rename to examples/metrics/mean_squared_error_eg.cpp index a427daf..3fe8489 100644 --- a/examples/methods/metrics/mean_squared_error_eg.cpp +++ b/examples/metrics/mean_squared_error_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/metrics/mean_squared_error.hpp" +// #include "../src/slowmokit/methods/metrics/mean_squared_error.hpp" // int main() { // std::vector actual = {1.0, 2.0, 3.0}; // std::vector pred = {0.5, 1.5, 2.5}; diff --git a/examples/methods/metrics/precision_eg.cpp b/examples/metrics/precision_eg.cpp similarity index 86% rename from examples/methods/metrics/precision_eg.cpp rename to examples/metrics/precision_eg.cpp index 33b354e..f41f005 100644 --- a/examples/methods/metrics/precision_eg.cpp +++ b/examples/metrics/precision_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/metrics/precision.hpp" +// #include "../src/slowmokit/methods/metrics/precision.hpp" // int main() // { // std::vector pred = {0, 1, 2, 1, 0, 2, 1, 0, 1, 2}; diff --git a/examples/methods/metrics/recall_eg.cpp b/examples/metrics/recall_eg.cpp similarity index 86% rename from examples/methods/metrics/recall_eg.cpp rename to examples/metrics/recall_eg.cpp index ee0c911..cb196ac 100644 --- a/examples/methods/metrics/recall_eg.cpp +++ b/examples/metrics/recall_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/metrics/recall.hpp" +// #include "../src/slowmokit/methods/metrics/recall.hpp" // int main() // { diff --git a/examples/methods/neighbors/bernoulli_nb.cpp b/examples/neighbors/bernoulli_nb.cpp similarity index 81% rename from examples/methods/neighbors/bernoulli_nb.cpp rename to examples/neighbors/bernoulli_nb.cpp index bb17dfb..8d707d2 100644 --- a/examples/methods/neighbors/bernoulli_nb.cpp +++ b/examples/neighbors/bernoulli_nb.cpp @@ -1,4 +1,5 @@ -// #include "../../../src/slowmokit/methods/neighbors/bernoulli_nb.hpp" +// #include "../../src/slowmokit/methods/neighbors/bernoulli_nb.hpp" +// #include "../../src/slowmokit/core.hpp" // signed main(){ // std::vector> xTrain{ diff --git a/examples/methods/neighbors/gaussian_nb.cpp b/examples/neighbors/gaussian_nb.cpp similarity index 82% rename from examples/methods/neighbors/gaussian_nb.cpp rename to examples/neighbors/gaussian_nb.cpp index 79ed8a3..ead14c3 100644 --- a/examples/methods/neighbors/gaussian_nb.cpp +++ b/examples/neighbors/gaussian_nb.cpp @@ -1,4 +1,5 @@ -// #include "../../../src/slowmokit/methods/neighbors/gaussian_nb.hpp" +// #include "../../src/slowmokit/methods/neighbors/gaussian_nb.hpp" +// #include "../../src/slowmokit/core.hpp" // signed main(){ // std::vector> x_train{ diff --git a/examples/methods/neighbors/knn.cpp b/examples/neighbors/knn.cpp similarity index 78% rename from examples/methods/neighbors/knn.cpp rename to examples/neighbors/knn.cpp index 0d39c76..5eb1a00 100644 --- a/examples/methods/neighbors/knn.cpp +++ b/examples/neighbors/knn.cpp @@ -1,4 +1,5 @@ -// #include "../../../src/slowmokit/methods/neighbors/knn.hpp" +// #include "../../src/slowmokit/methods/neighbors/knn.hpp" +// #include "../../src/slowmokit/core.hpp" // signed main(){ // std::vector> x{ diff --git a/examples/methods/preprocessing/label_encoder.cpp b/examples/preprocessing/label_encoder.cpp similarity index 79% rename from examples/methods/preprocessing/label_encoder.cpp rename to examples/preprocessing/label_encoder.cpp index 5e8ce5f..0a54461 100644 --- a/examples/methods/preprocessing/label_encoder.cpp +++ b/examples/preprocessing/label_encoder.cpp @@ -1,4 +1,4 @@ -//#include "../../src/slowmokit/methods/preprocessing/label_encoder.hpp" +//#include "src/slowmokit/methods/preprocessing/label_encoder.hpp" //int main() { // std::vector data = {"luffy","zoro","sanji","luffy","law","zoro"}; diff --git a/examples/methods/preprocessing/normalization_eg.cpp b/examples/preprocessing/normalization_eg.cpp similarity index 73% rename from examples/methods/preprocessing/normalization_eg.cpp rename to examples/preprocessing/normalization_eg.cpp index 255ad2b..fb28f4e 100644 --- a/examples/methods/preprocessing/normalization_eg.cpp +++ b/examples/preprocessing/normalization_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/preprocessing/normalization.hpp" +// #include "../src/slowmokit/methods/preprocessing/normalization.hpp" // int main(){ // std::vector values={1,2,3,4,5}; // normalize(values); diff --git a/examples/methods/preprocessing/one_hot_encoder_eg.cpp b/examples/preprocessing/one_hot_encoder_eg.cpp similarity index 84% rename from examples/methods/preprocessing/one_hot_encoder_eg.cpp rename to examples/preprocessing/one_hot_encoder_eg.cpp index b11846d..9fe6529 100644 --- a/examples/methods/preprocessing/one_hot_encoder_eg.cpp +++ b/examples/preprocessing/one_hot_encoder_eg.cpp @@ -1,4 +1,4 @@ -//#include "../../src/slowmokit/methods/preprocessing/one_hot_encoder.hpp" +//#include "src/slowmokit/methods/preprocessing/one_hot_encoder.hpp" //int main() { // std::vector data = {"apples", "banana", "mango", "pear", "mango","apples","pear"}; diff --git a/examples/methods/preprocessing/standardization_eg.cpp b/examples/preprocessing/standardization_eg.cpp similarity index 73% rename from examples/methods/preprocessing/standardization_eg.cpp rename to examples/preprocessing/standardization_eg.cpp index a51a622..fb9c5fc 100644 --- a/examples/methods/preprocessing/standardization_eg.cpp +++ b/examples/preprocessing/standardization_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/preprocessing/standardization.hpp" +// #include "../src/slowmokit/methods/preprocessing/standardization.hpp" // int main(){ // std::vector values={1,2,3,4,5}; diff --git a/src/slowmokit.cpp b/src/slowmokit.cpp deleted file mode 100644 index 0d55b17..0000000 --- a/src/slowmokit.cpp +++ /dev/null @@ -1,7 +0,0 @@ -/** - * @file slowmokit.cpp - * - * File important to read the cpp files inside the src/ - * Just a necessary include, no edits should be made to this file - * - */ \ No newline at end of file diff --git a/src/slowmokit/CMakeLists.txt b/src/slowmokit/CMakeLists.txt deleted file mode 100644 index e57d108..0000000 --- a/src/slowmokit/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -# Recurse into methods/ to get the definitions of any bindings. -#add_subdirectory(methods) - -# At install time, we simply install the src/ directory to include/ -install(FILES - "${CMAKE_CURRENT_SOURCE_DIR}/../slowmokit.hpp" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") - -install(FILES - "${CMAKE_CURRENT_SOURCE_DIR}/base.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/core.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/prereqs.hpp" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/slowmokit/") - -install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/methods" DESTINATION - "${CMAKE_INSTALL_INCLUDEDIR}/slowmokit") \ No newline at end of file diff --git a/src/slowmokit/core.hpp b/src/slowmokit/core.hpp index c72eec4..0684181 100644 --- a/src/slowmokit/core.hpp +++ b/src/slowmokit/core.hpp @@ -10,4 +10,7 @@ // pre-requisites of the library #include "prereqs.hpp" +// standard model class +#include "models/model.hpp" + #endif // SLOWMOKIT_CORE_HPP diff --git a/src/slowmokit/ducks/matrix/matrix_main.cpp b/src/slowmokit/ducks/matrix/matrix.cpp similarity index 98% rename from src/slowmokit/ducks/matrix/matrix_main.cpp rename to src/slowmokit/ducks/matrix/matrix.cpp index bc71834..0e63bc2 100644 --- a/src/slowmokit/ducks/matrix/matrix_main.cpp +++ b/src/slowmokit/ducks/matrix/matrix.cpp @@ -1,7 +1,7 @@ /** - * @file ducks/matrix/matrix_main.cpp + * @file ducks/matrix/matrix.cpp * - * Implementation of the main methods of Matrix + * Implementation of the matrix main program */ #include "matrix.hpp" diff --git a/src/slowmokit/ducks/matrix/matrix.hpp b/src/slowmokit/ducks/matrix/matrix.hpp index d58f85d..6a6343c 100644 --- a/src/slowmokit/ducks/matrix/matrix.hpp +++ b/src/slowmokit/ducks/matrix/matrix.hpp @@ -91,6 +91,16 @@ template class Matrix std::array getShape() const; + /** + * @brief Function for taking dot product of current matrix with another + * matrix + * @param rhs: Take dot product of current Matrix with `rhs` + * @returns: A New Matrix after taking dot product with matrix `rhs` + * @throws: std::invalid_argument incase of incompatible dimensions + */ + Matrix &dot(const Matrix &); + + /** * @brief overloading += operator for adding another matrix to existing matrix * @param1: Matrix `rhs` which is to be added @@ -149,13 +159,14 @@ template class Matrix * @throw: whatever operator *= throws */ Matrix subtract(const Matrix); - - /** - * @brief overloading << for easy printing of Matrix - */ - friend std::ostream &operator<< (std::ostream &, const Matrix &); }; + +/** + * @brief overloading << for easy printing of Matrix + */ +template std::ostream &operator<<(std::ostream &, const Matrix &); + /** * @brief Free Function to multiply a matrix to a number or another matrix * @param lhs: A number or a Matrix @@ -187,4 +198,4 @@ template Matrix operator-(Matrix, const G &); template Matrix operator-(Matrix, const Matrix &); -#endif // SLOWMOKIT_IO_HPP \ No newline at end of file +#endif // SLOWMOKIT_IO_HPP diff --git a/src/slowmokit/ducks/matrix/matrix_free.cpp b/src/slowmokit/ducks/matrix/matrix_free.cpp deleted file mode 100644 index cfc57fe..0000000 --- a/src/slowmokit/ducks/matrix/matrix_free.cpp +++ /dev/null @@ -1,85 +0,0 @@ -/** - * @file ducks/matrix/matrix_free.cpp - * - * Implementation of the free functions associated with matrix - */ - -#include "matrix.hpp" - -template Matrix operator*(Matrix lhs, const Matrix &rhs) -{ - lhs *= rhs; - return lhs; -} - -template Matrix operator+(Matrix lhs, const Matrix &rhs) -{ - lhs += rhs; - return lhs; -} - -template Matrix operator-(Matrix lhs, const Matrix &rhs) -{ - lhs -= rhs; - return lhs; -} - -template Matrix operator+(G num, const Matrix &matrix) -{ - Matrix res = matrix; - res += num; - return res; -} - -template Matrix operator-(G num, const Matrix &matrix) -{ - Matrix res = matrix; - res -= num; - return res; -} - -template Matrix operator*(G num, const Matrix &matrix) -{ - Matrix res = matrix; - res *= num; - return res; -} - -template Matrix operator+(Matrix matrix, const G &num) -{ - matrix += num; - return matrix; -} - -template Matrix operator-(Matrix matrix, const G &num) -{ - matrix -= num; - return matrix; -} - -template Matrix operator*(Matrix matrix, const G &num) -{ - matrix *= num; - return matrix; -} - -template Matrix Matrix::matmul(const Matrix rhs) -{ - Matrix res = *this; - res *= rhs; - return res; -} - -template Matrix Matrix::add(const Matrix rhs) -{ - Matrix res = *this; - res += rhs; - return res; -} - -template Matrix Matrix::subtract(const Matrix rhs) -{ - Matrix res = *this; - res -= rhs; - return res; -} \ No newline at end of file diff --git a/src/slowmokit/methods/cluster/kMeans.hpp b/src/slowmokit/methods/cluster/kMeans.hpp index 82cdd94..a2b006e 100644 --- a/src/slowmokit/methods/cluster/kMeans.hpp +++ b/src/slowmokit/methods/cluster/kMeans.hpp @@ -4,9 +4,9 @@ * Easy include for kMeans algorithm */ -#ifndef SLOWMOKIT_KMEANS_HPP_MAIN -#define SLOWMOKIT_KMEANS_HPP_MAIN +#ifndef SLOWMOKIT_KMEANS_HPP +#define SLOWMOKIT_KMEANS_HPP #include "kMeans/kMeans.hpp" -#endif // SLOWMOKIT_KMEANS_HPP_MAIN +#endif // SLOWMOKIT_KMEANS_HPP diff --git a/src/slowmokit/methods/cluster/kMeans/kMeans.cpp b/src/slowmokit/methods/cluster/kMeans/kMeans.cpp index 1418414..b637584 100644 --- a/src/slowmokit/methods/cluster/kMeans/kMeans.cpp +++ b/src/slowmokit/methods/cluster/kMeans/kMeans.cpp @@ -3,7 +3,6 @@ * * Implementation of the K-Means main program */ - #include "kMeans.hpp" template void kMeans::fit(std::vector> X) diff --git a/src/slowmokit/methods/cluster/kMeans/kMeans.hpp b/src/slowmokit/methods/cluster/kMeans/kMeans.hpp index 29db2b0..575582c 100644 --- a/src/slowmokit/methods/cluster/kMeans/kMeans.hpp +++ b/src/slowmokit/methods/cluster/kMeans/kMeans.hpp @@ -4,14 +4,13 @@ * The header file including the kMeans */ -#ifndef SLOWMOKIT_KMEANS_HPP_1 -#define SLOWMOKIT_KMEANS_HPP_1 +#ifndef SLOWMOKIT_KMEANS_HPP +#define SLOWMOKIT_KMEANS_HPP #include "../../../core.hpp" template class kMeans { - const int default_epoch = 40; const int k; const int epoch; std::vector clusters; @@ -85,17 +84,17 @@ template class kMeans public: kMeans(int k, int epoch) : k(k), epoch(epoch) { - if (k <= 1) + if (k <= 0) { - throw std::invalid_argument("k should be greater than 1"); + throw "k should be a positive integer."; } } - kMeans(int k) : kMeans(k, default_epoch) {} + kMeans(int k) : kMeans(k, 40) {} kMeans(int k, std::vector> initial_centroids, int epoch) - : kMeans(k, epoch) + : kMeans(k, 40) { this->centroids = initial_centroids; } @@ -105,12 +104,12 @@ template class kMeans std::vector predict(std::vector>); /** - * @Returns which cluster point-i belongs to + * Returns which cluster point-i belongs to */ std::vector labels() const; /** - * @Returns the final centroid for each cluster. + * Returns the final centroid for each cluster. */ std::vector> getCentroid() const; }; diff --git a/src/slowmokit/methods/linear_model/lasso_regularization.cpp b/src/slowmokit/methods/linear_model/lasso_regularization.cpp new file mode 100644 index 0000000..fa0d28c --- /dev/null +++ b/src/slowmokit/methods/linear_model/lasso_regularization.cpp @@ -0,0 +1,120 @@ +/** + * @file methods/linear_model/lasso_regularization.cpp + * + * Implementation of the Lasso Regularization main program + */ + +#include "lasso_regularization.hpp" + +template +LassoRegularization::LassoRegularization(double lambda) : lambda(lambda) +{ +} + +template +double LassoRegularization::lossFunction(std::vector> &x, + std::vector &y) +{ + int trainingSampleSize = x.size(); + double loss = 0.0; + for (int i = 0; i < trainingSampleSize; i++) + { + double yPred = 0.0; + for (int j = 0; j < coefficients.size(); j++) + { + yPred += coefficients[j] * coefficients[i][j]; + } + loss += pow(y[i] - yPred, 2); + } + loss /= 2 * trainingSampleSize; + double regularization = 0.0; + for (int i = 0; i < coefficients.size(); i++) + { + regularization += fabs(coefficients[i]); + } + regularization *= lambda; + return loss + regularization; +} +template +std::vector +LassoRegularization::gradient(std::vector> &x, + std::vector &y) +{ + int trainingSampleSize = x.size(); + int m = coefficients.size(); + std::vector grad(m); + for (int j = 0; j < m; j++) + { + double sum = 0.0; + for (int i = 0; i < trainingSampleSize; i++) + { + sum += (coefficients[j] * x[i][j] - y[i]) * x[i][j]; + } + grad[j] = sum / trainingSampleSize; + if (coefficients[j] > 0) + { + grad[j] += lambda; + } + else if (coefficients[j] < 0) + { + grad[j] -= lambda; + } + } + return grad; +} +template +void LassoRegularization::gradientDescent(std::vector> &x, + std::vector &y, double alpha) +{ + std::vector grad = gradient(x, y); + int m = coefficients.size(); + for (int j = 0; j < m; j++) + { + coefficients[j] -= alpha * grad[j]; + } +} +template +std::vector LassoRegularization::fit(std::vector> &x, + std::vector &y, int epochs, + double alpha) +{ + if (x.size() == 0 || y.size() == 0) + throw "Make sure that you have atleast one train example"; + if (x.size() != y.size()) + throw "Number of features and target must be equal"; + int trainExampleSize = x.size(); + int featureSize = x[0].size(); + if (featureSize == 0) + throw "Feature size should be at least 1"; + coefficients.clear(); + coefficients.resize(featureSize + 1); + int n = x.size(); + int m = x[0].size(); + std::vector coefficients(m, 0.0); + for (int i = 0; i < epochs; i++) + { + gradientDescent(x, y, alpha); + } + return coefficients; +} + +template +std::vector LassoRegularization::predict(std::vector> x) +{ + std::vector yPred; + int trainExampleSize = x.size(), featureSize = x[0].size(); + for (int example = 0; example < trainExampleSize; example++) + { + T currentY = coefficients[0]; + for (int feature = 0; feature < featureSize; feature++) + currentY += coefficients[feature + 1] * x[example][feature]; + yPred.push_back(currentY); + } + return yPred; +} + +template void LassoRegularization::printCoefficients() +{ + for (int i = 0; i < coefficients.size(); i++) + std::cout << "Θ" << i << ": " << coefficients[i] << std::endl; +} diff --git a/src/slowmokit/methods/linear_model/lasso_regularization.hpp b/src/slowmokit/methods/linear_model/lasso_regularization.hpp new file mode 100644 index 0000000..d630b03 --- /dev/null +++ b/src/slowmokit/methods/linear_model/lasso_regularization.hpp @@ -0,0 +1,63 @@ +/** + * @file methods/linear_model/lasso_regularization.hpp + * + * The header file including the lasso regularization model + */ + +#ifndef SLOWMOKIT_LASSO_REGULARIZATION_HPP +#define SLOWMOKIT_LASSO_REGULARIZATION_HPP + +#include "../../core.hpp" + +template class LassoRegularization +{ + private: + std::vector coefficients; + double lambda = 0.01; + + public: + LassoRegularization(double = 0.01); + + + /** + * @brief calculates the lasso regularization term + * @param x training x values + * @param y training output values + * @return regularized loss function + */ + double lossFunction(std::vector> &, std::vector &); + + + /** + * @brief calculates the gradient of the loss function + * @param x training x values + * @param y training output values + * @return std::vector + */ + std::vector gradient(std::vector> &, std::vector &); + + + /** + * @brief implements the gradient descent optimization algorithm to find the + * optimal coefficients for the lasso regression model + * @param x training x values + * @param y training output values + * @param alpha + */ + void gradientDescent(std::vector> &, std::vector &, double); + + /** + * @param x training x values + * @param y training output values + * @param epochs + * @returns std::vector + */ + std::vector fit(std::vector> &, std::vector &, int, + double); + + std::vector predict(std::vector>); + + void printCoefficients(); +}; + +#endif // SLOWMOKIT_LASSO_REGULARIZATION_HPP diff --git a/src/slowmokit/methods/linear_model/linear_regression/linear_regression.hpp b/src/slowmokit/methods/linear_model/linear_regression/linear_regression.hpp index 9149aad..21d7e68 100644 --- a/src/slowmokit/methods/linear_model/linear_regression/linear_regression.hpp +++ b/src/slowmokit/methods/linear_model/linear_regression/linear_regression.hpp @@ -4,8 +4,8 @@ * The header file including the linear regression algorithm */ -#ifndef SLOWMOKIT_LINEAR_REGRESSION_HPP_1 -#define SLOWMOKIT_LINEAR_REGRESSION_HPP_1 +#ifndef SLOWMOKIT_LINEAR_REGRESSION_HPP +#define SLOWMOKIT_LINEAR_REGRESSION_HPP #include "../../../core.hpp" diff --git a/src/slowmokit/methods/linear_model/ridge_regularization.cpp b/src/slowmokit/methods/linear_model/ridge_regularization.cpp new file mode 100644 index 0000000..36155df --- /dev/null +++ b/src/slowmokit/methods/linear_model/ridge_regularization.cpp @@ -0,0 +1,102 @@ +/** + * @file methods/linear_model/ridge_regularization.cpp + * + * Implementation of the Ridge Regularization main program + */ + +#include "ridge_regularization.hpp" + +template +RidgeRegularization::RidgeRegularization(double lambda) : lambda(lambda) +{ +} + +template +double RidgeRegularization::lossFunction(std::vector &x, double y) +{ + double yPred = predict(x, coefficients); + double error = yPred - y; + double loss = error * error; + for (int i = 1; i < coefficients.size(); ++i) + { + loss += lambda * coefficients[i] * coefficients[i]; + } + return loss; +} +template +std::vector RidgeRegularization::gradient(std::vector &x, + double y) +{ + std::vector gradient(coefficients.size()); + double yPred = predict(x); + double error = yPred - y; + gradient[0] = 2 * error; + for (int i = 0; i < x.size(); ++i) + { + gradient[i + 1] = 2 * error * x[i] + 2 * lambda * coefficients[i + 1]; + } + return gradient; +} +template +void RidgeRegularization::gradientDescent(std::vector> &x, + std::vector &y, double alpha, + int epochs) +{ + int m = y.size(); + for (int epoch = 0; epoch < epochs; ++epoch) + { + std::vector grad(coefficients.size()); + for (int i = 0; i < m; ++i) + { + std::vector exampleGrad = gradient(x[i], y[i]); + for (int j = 0; j < coefficients.size(); ++j) + { + grad[j] += exampleGrad[j]; + } + } + for (int j = 0; j < coefficients.size(); ++j) + { + coefficients[j] = coefficients[j] - alpha * grad[j] / m; + } + } +} +template +std::vector RidgeRegularization::fit(std::vector> &x, + std::vector &y, int epochs, + double alpha) +{ + if (x.size() == 0 || y.size() == 0) + throw "Make sure that you have atleast one train example"; + if (x.size() != y.size()) + throw "Number of features and target must be equal"; + int trainExampleSize = x.size(); + int featureSize = x[0].size(); + if (featureSize == 0) + throw "Feature size should be at least 1"; + coefficients.clear(); + coefficients.resize(featureSize + 1); + int n = x.size(); + int m = x[0].size(); + std::vector coefficients(m, 0.0); + for (int i = 0; i < epochs; i++) + { + gradientDescent(x, y, alpha, epochs); + } + return coefficients; +} + +template double RidgeRegularization::predict(std::vector &x) +{ + double yPred = coefficients[0]; + for (int i = 0; i < x.size(); ++i) + { + yPred += coefficients[i + 1] * x[i]; + } + return yPred; +} + +template void RidgeRegularization::printCoefficients() +{ + for (int i = 0; i < coefficients.size(); i++) + std::cout << "Θ" << i << ": " << coefficients[i] << std::endl; +} diff --git a/src/slowmokit/methods/linear_model/ridge_regularization.hpp b/src/slowmokit/methods/linear_model/ridge_regularization.hpp new file mode 100644 index 0000000..0e1da6c --- /dev/null +++ b/src/slowmokit/methods/linear_model/ridge_regularization.hpp @@ -0,0 +1,66 @@ +/** + * @file methods/linear_model/ridge_regularization.hpp + * + * The header file including the ridge regularization model + */ + +#ifndef SLOWMOKIT_RIDGE_REGULARIZATION_HPP +#define SLOWMOKIT_RIDGE_REGULARIZATION_HPP + +#include "../../core.hpp" + +template class RidgeRegularization +{ + private: + std::vector coefficients; + double lambda = 0.01; + + public: + RidgeRegularization(double = 0.01); + + + /** + * @brief calculates the ridge regularization term + * @param x training x values for a single feature + * @param y training output value + * @return regularized loss function + */ + double lossFunction(std::vector &, double); + + + /** + * @brief calculates the gradient of the loss function + * @param x training x values for a single feature + * @param y training output value + * @return std::vector + */ + std::vector gradient(std::vector &, double); + + + /** + * @brief implements the gradient descent optimization algorithm to find the + * optimal coefficients for the ridge regression model + * @param x training x values + * @param y training output values + * @param alpha + * @param epochs + */ + void gradientDescent(std::vector> &, std::vector &, double, + int); + + /** + * @param x training x values + * @param y training output values + * @param epochs + * @param alpha + * @returns std::vector + */ + std::vector fit(std::vector> &, std::vector &, int, + double); + + double predict(std::vector &); + + void printCoefficients(); +}; + +#endif // SLOWMOKIT_RIDGE_REGULARIZATION_HPP diff --git a/src/slowmokit/methods/metrics/silhouette_score.cpp b/src/slowmokit/methods/metrics/silhouette_score.cpp deleted file mode 100644 index 17ed013..0000000 --- a/src/slowmokit/methods/metrics/silhouette_score.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/** - * @file methods/metrics/silhouette_score.cpp - * - * Implementation of the silhouette Score main program - */ - -#include "silhouette_score.hpp" -template -double silhouetteScore(std::vector> x, std::vector y, - int numClusters, std::string typeDist) -{ - if (x.size() != y.size()) - { - throw std::invalid_argument("Size of x and y values are not same"); - return -1; - } - - if (numClusters < 2 or x.size() - 1 < numClusters) - { - throw std::invalid_argument("Invalid arguments sizes of x or numClusters"); - return -1; - } - - std::transform(typeDist.begin(), typeDist.end(), typeDist.begin(), - [](unsigned char c) { return std::tolower(c); }); - - std::vector> distances(x.size(), - std::vector(x.size())); - for (int i = 0; i < x.size(); i++) - { // iterating over each x[i] - for (int j = 0; j < x.size(); j++) - { // iterating over each x[i] - if (i == j) - { - distances[i][j] = 0.0; // initialize distance=0 if computing distance - // between same values - } - else - { - if (typeDist == "euclidean") - { // euclidean distance - for (int k = 0; k < x[0].size(); k++) - { - distances[i][j] += - (x[i][k] - x[j][k]) * - (x[i][k] - x[j][k]); // x[i][k]-> i=point,k=1-d value of point - } - distances[i][j] = pow(distances[i][j], 0.5); - } - else if (typeDist == "manhattan") - { - for (int k = 0; k < x[0].size(); k++) - { - distances[i][j] += abs(x[i][k] - x[j][k]); - } - } - } - } - } - - std::vector intraClusters( - x.size(), 0.0); // Computing intraclusters distances of each point - std::vector crossClusters( - x.size()); // Minimum Distance of each point to other clusters - for (int i = 0; i < x.size(); i++) - { - int sumNum = 0; - std::vector interClusters( - numClusters, - 0.0); // values of point to each cluster points - std::vector sumsOfParticular(numClusters, 0); - for (int j = 0; j < x.size(); j++) - { - if (y[j] == y[i]) - { - intraClusters[i] += distances[i][j]; // Sum of distance of point to each - // other point in same cluster - sumNum++; - } - else - { - interClusters[y[j]] += distances[i][j]; // Sum of distance of point to - // points in different clusters - sumsOfParticular[y[j]]++; // computes points in that cluster - } - } - intraClusters[i] /= - sumNum; // Mean of sum values of distances b/w points of same cluster - double minimumOfall = std::numeric_limits::max(); - ; - for (int j = 0; j < numClusters; j++) - { - if (j != y[i]) - { - interClusters[j] /= - sumsOfParticular[j]; // Mean of values of interclusters distances - if (interClusters[j] < minimumOfall) - { // computing minimum value of means of intercluster distances - minimumOfall = interClusters[j]; - } - } - } - crossClusters[i] = minimumOfall; - } - double si = 0.0; - for (int i = 0; i < x.size(); i++) - { - si += ((crossClusters[i] - intraClusters[i]) / - std::max(intraClusters[i], - crossClusters[i])); // s = b[i]-a[i] / max(b[i],a[i]) - } - return si / double(x.size()); -}; \ No newline at end of file diff --git a/src/slowmokit/methods/metrics/silhouette_score.hpp b/src/slowmokit/methods/metrics/silhouette_score.hpp deleted file mode 100644 index 94fa3d6..0000000 --- a/src/slowmokit/methods/metrics/silhouette_score.hpp +++ /dev/null @@ -1,24 +0,0 @@ -/** - * @file methods/metrics/silhoutte_score.hpp - * - * Easy include to find Silhoutte score - */ - -#ifndef SLOWMOKIT_SILHOUETTE_SCORE_HPP -#define SLOWMOKIT_SILHOUETTE_SCORE_HPP -#include "../../core.hpp" - -template -/** - * @brief Calculates Silhouette Score - * - * @param vector> x values - * @param vector y values - * @param int number of clusters - * @param string distance type - * @return double Silhouette Score - */ -double silhouetteScore(std::vector>, std::vector, int, - std::string); - -#endif // SLOWMOKIT_SILHOUETTE_SCORE_HPP \ No newline at end of file diff --git a/src/slowmokit/methods/neighbors/knn/knn.hpp b/src/slowmokit/methods/neighbors/knn/knn.hpp index c8b2807..bbbc9a3 100644 --- a/src/slowmokit/methods/neighbors/knn/knn.hpp +++ b/src/slowmokit/methods/neighbors/knn/knn.hpp @@ -6,6 +6,8 @@ #ifndef SLOWMOKIT_KNN_HPP #define SLOWMOKIT_KNN_HPP +#include "../../../models/model.hpp" + template class KNN { private: