In [21]:
#|export
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import sklearn
from sklearn import tree
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
In [4]:
print(sklearn.__version__)
1.1.3
In [5]:
iris = sns.load_dataset('iris')
In [6]:
X = iris.drop('species', axis = 1)
y =iris[['species']]
In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=30)
In [8]:
from sklearn.tree import DecisionTreeClassifier
In [9]:
arbol = DecisionTreeClassifier()
In [10]:
arbol.fit(X_train,y_train)
Out[10]:
DecisionTreeClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier()
In [13]:
tree.plot_tree(arbol)
Out[13]:
[Text(0.625, 0.9285714285714286, 'X[2] <= 2.6\ngini = 0.664\nsamples = 105\nvalue = [37, 37, 31]'), Text(0.5, 0.7857142857142857, 'gini = 0.0\nsamples = 37\nvalue = [37, 0, 0]'), Text(0.75, 0.7857142857142857, 'X[3] <= 1.75\ngini = 0.496\nsamples = 68\nvalue = [0, 37, 31]'), Text(0.625, 0.6428571428571429, 'X[2] <= 5.35\ngini = 0.139\nsamples = 40\nvalue = [0, 37, 3]'), Text(0.5, 0.5, 'X[3] <= 1.65\ngini = 0.097\nsamples = 39\nvalue = [0, 37, 2]'), Text(0.25, 0.35714285714285715, 'X[2] <= 4.95\ngini = 0.053\nsamples = 37\nvalue = [0, 36, 1]'), Text(0.125, 0.21428571428571427, 'gini = 0.0\nsamples = 35\nvalue = [0, 35, 0]'), Text(0.375, 0.21428571428571427, 'X[3] <= 1.55\ngini = 0.5\nsamples = 2\nvalue = [0, 1, 1]'), Text(0.25, 0.07142857142857142, 'gini = 0.0\nsamples = 1\nvalue = [0, 0, 1]'), Text(0.5, 0.07142857142857142, 'gini = 0.0\nsamples = 1\nvalue = [0, 1, 0]'), Text(0.75, 0.35714285714285715, 'X[0] <= 5.8\ngini = 0.5\nsamples = 2\nvalue = [0, 1, 1]'), Text(0.625, 0.21428571428571427, 'gini = 0.0\nsamples = 1\nvalue = [0, 0, 1]'), Text(0.875, 0.21428571428571427, 'gini = 0.0\nsamples = 1\nvalue = [0, 1, 0]'), Text(0.75, 0.5, 'gini = 0.0\nsamples = 1\nvalue = [0, 0, 1]'), Text(0.875, 0.6428571428571429, 'gini = 0.0\nsamples = 28\nvalue = [0, 0, 28]')]
In [14]:
X_nombre = list(X.columns)
classes = ['setosa', 'versicolor', 'virginica']
In [15]:
fig, axes = plt.subplots(nrows = 1, ncols=1, figsize = (3,2), dpi = 300)
tree.plot_tree(arbol,
feature_names= X_nombre,
class_names= classes,
filled= True);
fig.savefig('imagename.png')
In [18]:
pred = arbol.predict(X_test)
In [19]:
print(confusion_matrix(y_test, pred))
[[13 0 0] [ 0 12 1] [ 0 0 19]]
In [20]:
print(classification_report(y_test, pred))
precision recall f1-score support setosa 1.00 1.00 1.00 13 versicolor 1.00 0.92 0.96 13 virginica 0.95 1.00 0.97 19 accuracy 0.98 45 macro avg 0.98 0.97 0.98 45 weighted avg 0.98 0.98 0.98 45
In [ ]:
# Hemos acabado un solo arbol, pero lo mejor es coger muchos de forma aleatoria y coger el mejor.
In [22]:
rfc = RandomForestClassifier(n_estimators= 20 , random_state= 33)
In [23]:
rfc.fit(X_train, y_train)
C:\Users\USER\AppData\Local\Temp\ipykernel_13148\1542427849.py:1: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel(). rfc.fit(X_train, y_train)
Out[23]:
RandomForestClassifier(n_estimators=20, random_state=33)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(n_estimators=20, random_state=33)
In [24]:
rfc_pred = rfc.predict(X_test)
In [25]:
print(confusion_matrix(y_test, rfc_pred))
print(classification_report(y_test, rfc_pred))
[[13 0 0] [ 0 12 1] [ 0 2 17]] precision recall f1-score support setosa 1.00 1.00 1.00 13 versicolor 0.86 0.92 0.89 13 virginica 0.94 0.89 0.92 19 accuracy 0.93 45 macro avg 0.93 0.94 0.94 45 weighted avg 0.94 0.93 0.93 45