From bd8378b1444dcd7856094767c1427235d238d3c9 Mon Sep 17 00:00:00 2001 From: ManJ-PC Date: Sat, 6 Jul 2024 09:28:26 +0100 Subject: [PATCH 1/2] https://pedroazambuja.medium.com/adaboost-adaptive-boosting-dbbec150fced --- Adaboost.ipynb | 273 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 273 insertions(+) create mode 100644 Adaboost.ipynb diff --git a/Adaboost.ipynb b/Adaboost.ipynb new file mode 100644 index 0000000..089f696 --- /dev/null +++ b/Adaboost.ipynb @@ -0,0 +1,273 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WLpfCn9XJqu7" + }, + "outputs": [], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import AdaBoostClassifier\n", + "from sklearn import datasets\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import confusion_matrix\n", + "from sklearn import metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XnW6ubOJJqu8" + }, + "source": [ + "Primeiramente carregamos o dataset de lírios do Scikit Learn, mais informações deste podem ser encontradas em:\n", + "\n", + "https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fq-a--cbJqu9" + }, + "outputs": [], + "source": [ + "iris = datasets.load_iris()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4-Bj4cVsJqu9" + }, + "source": [ + "Em seguida separamos os dados carregados em data (X) e target (y). Estes por sua vez são divididos em conjunto de treinamento e de teste com uma proporção de 60% para o primeiro e 40% para o segundo." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V9n-GkRzJqu9" + }, + "outputs": [], + "source": [ + "X = iris.data\n", + "y = iris.target\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ALoRz9SyJqu9" + }, + "source": [ + "Então, criamos o classificador com Adaboost, o **AdaboostClassfier** tem um DecisionTreeClassifier com profundidade 1 como seu classificador padrão. Porém é possível utilizar outros classificadores como será mostrado mais adiante.\n", + "\n", + "Para mais informações sobre o **AdaboostClassifier**:\n", + "\n", + "https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lyqqo0fXJqu-" + }, + "outputs": [], + "source": [ + "ab_classifier = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),\n", + " n_estimators=50,\n", + " learning_rate=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2iigVztGJqu-" + }, + "source": [ + "Por fim, treinamos o classificador criado com o conjunto de treinamento e o utilizamos para realizar a predição do conjunto de testes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RHBpWTBTJqu-" + }, + "outputs": [], + "source": [ + "model = ab_classifier.fit(X_train, y_train)\n", + "\n", + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bJgxTIo8Jqu-" + }, + "source": [ + "Para comparação, todo o procedimento anterior foi realizado com o mesmo classificador mas sem Adaboost:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1SANz_9cJqu_" + }, + "outputs": [], + "source": [ + "dt = DecisionTreeClassifier(max_depth=1)\n", + "dt_model = dt.fit(X_train, y_train)\n", + "y_pred_dt = dt_model.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6N9o3KnxJqu_" + }, + "source": [ + "Abaixo são calculadas a acurácia e a matriz de confusão geradas pelos modelos:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xi2r0G_IJqu_", + "outputId": "86c9dd4c-5007-4701-d705-ddf07749c9c0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matriz de Confusão sem Adaboost:\n", + "[[18 0 0]\n", + " [ 0 0 24]\n", + " [ 0 0 18]]\n", + "Matriz de Confusão sem Adaboost:\n", + "[[18 0 0]\n", + " [ 0 18 6]\n", + " [ 0 0 18]]\n", + "Acurácia sem Adaboost: 0.6\n", + "Acurácia com Adaboost: 0.9\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import cross_val_score\n", + "\n", + "print(\"Matriz de Confusão sem Adaboost:\")\n", + "print(confusion_matrix(y_test, y_pred_dt))\n", + "\n", + "print(\"Matriz de Confusão sem Adaboost:\")\n", + "print(confusion_matrix(y_test, y_pred))\n", + "\n", + "print(\"Acurácia sem Adaboost:\", metrics.accuracy_score(y_test, y_pred_dt))\n", + "print(\"Acurácia com Adaboost:\", metrics.accuracy_score(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lKkyRpz3Jqu_" + }, + "source": [ + "Como é possível observar, a adição do Adaboost ao classificador melhora muito sua eficiência." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZKpY5_KqJqu_" + }, + "source": [ + "Além disso, abaixo é mostrada a utilização do Adaboost, porém com outro tipo de classificador." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kz1O13DKJqvA" + }, + "outputs": [], + "source": [ + "svc = SVC(probability=True, kernel='linear')\n", + "\n", + "# Create adaboost classifer object\n", + "ab_classifier = AdaBoostClassifier(svc,\n", + " n_estimators=50,\n", + " learning_rate=1)\n", + "# Train Adaboost Classifer\n", + "model = ab_classifier.fit(X_train, y_train)\n", + "\n", + "#Predict the response for test dataset\n", + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nVedkP3qJqvA", + "outputId": "56de05f5-6c6a-4148-aa46-f4bb751aa875" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matriz de Confusão do SVC com Adaboost:\n", + "[[18 0 0]\n", + " [ 0 22 2]\n", + " [ 0 0 18]]\n", + "Acurácia do SVC com Adaboost: 0.9666666666666667\n" + ] + } + ], + "source": [ + "print(\"Matriz de Confusão do SVC com Adaboost:\")\n", + "print(confusion_matrix(y_test, y_pred))\n", + "\n", + "print(\"Acurácia do SVC com Adaboost:\", metrics.accuracy_score(y_test, y_pred))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From 55ae8c63ee6160e8d31dbafa43107b2312a9b44a Mon Sep 17 00:00:00 2001 From: ManJ-PC Date: Sat, 6 Jul 2024 09:29:35 +0100 Subject: [PATCH 2/2] https://pedroazambuja.medium.com/adaboost-adaptive-boosting-dbbec150fced --- Adaboost.ipynb | 52 ++++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/Adaboost.ipynb b/Adaboost.ipynb index 089f696..e310a44 100644 --- a/Adaboost.ipynb +++ b/Adaboost.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "id": "WLpfCn9XJqu7" }, @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "id": "fq-a--cbJqu9" }, @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "id": "V9n-GkRzJqu9" }, @@ -77,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "id": "lyqqo0fXJqu-" }, @@ -99,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "id": "RHBpWTBTJqu-" }, @@ -121,7 +121,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "id": "1SANz_9cJqu_" }, @@ -143,25 +143,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, "id": "xi2r0G_IJqu_", - "outputId": "86c9dd4c-5007-4701-d705-ddf07749c9c0" + "outputId": "10abb273-aa72-4b92-9a45-edb4cf7071ba" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "Matriz de Confusão sem Adaboost:\n", - "[[18 0 0]\n", - " [ 0 0 24]\n", - " [ 0 0 18]]\n", + "[[16 0 0]\n", + " [ 0 19 0]\n", + " [ 0 25 0]]\n", "Matriz de Confusão sem Adaboost:\n", - "[[18 0 0]\n", - " [ 0 18 6]\n", - " [ 0 0 18]]\n", - "Acurácia sem Adaboost: 0.6\n", + "[[16 0 0]\n", + " [ 0 19 0]\n", + " [ 0 6 19]]\n", + "Acurácia sem Adaboost: 0.5833333333333334\n", "Acurácia com Adaboost: 0.9\n" ] } @@ -199,7 +202,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "id": "kz1O13DKJqvA" }, @@ -220,20 +223,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, "id": "nVedkP3qJqvA", - "outputId": "56de05f5-6c6a-4148-aa46-f4bb751aa875" + "outputId": "2599414f-1929-4bf0-956e-cd43bfd48c41" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "Matriz de Confusão do SVC com Adaboost:\n", - "[[18 0 0]\n", - " [ 0 22 2]\n", - " [ 0 0 18]]\n", + "[[16 0 0]\n", + " [ 0 18 1]\n", + " [ 0 1 24]]\n", "Acurácia do SVC com Adaboost: 0.9666666666666667\n" ] }