Reading the data again
import pandas as pd
golf = pd.read_csv('golf.csv')
print(golf.head())
from category_encoders.ordinal import OrdinalEncoder# choose one encoder
encoder = OrdinalEncoder()
#from category_encoders.one_hot import OneHotEncoder
#encoder = OneHotEncoder()
golf_encoded = encoder.fit_transform(golf[['Outlook', 'Temperature', 'Humidity', 'Wind']])
print(golf_encoded.head())
from sklearn.naive_bayes import GaussianNB
naive_bayes = GaussianNB()
naive_bayes.fit(golf_encoded, golf['Play'])
from sklearn.neighbors import KNeighborsClassifier
knn_estimator = KNeighborsClassifier(3)
#knn_estimator.fit....
from sklearn.neighbors.nearest_centroid import NearestCentroid
nearest_centroid_estimator = NearestCentroid()
#nearest_centroid_estimator.fit....
Three important functions
All of them need the following input:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
golf_prediction = ['yes','no','yes','yes','no','yes','yes','no','no','no','no','no','no','yes']
print(confusion_matrix(golf['Play'], golf_prediction))
print(accuracy_score(golf['Play'], golf_prediction))
print(classification_report(golf['Play'], golf_prediction))
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
def confusion_matrix_report(y_true, y_pred):
cm, labels = confusion_matrix(y_true, y_pred), unique_labels(y_true, y_pred)
column_width = max([len(str(x)) for x in labels] + [5]) # 5 is value length
report = " " * column_width + " " + "{:_^{}}".format("Prediction", column_width * len(labels))+ "\n"
report += " " * column_width + " ".join(["{:>{}}".format(label, column_width) for label in labels]) + "\n"
for i, label1 in enumerate(labels):
report += "{:>{}}".format(label1, column_width) + " ".join(["{:{}d}".format(cm[i, j], column_width) for j in range(len(labels))]) + "\n"
return report
print(confusion_matrix_report(golf['Play'], golf_prediction))
from sklearn.model_selection import train_test_split
golf_data = golf_encoded
golf_target = golf['Play']
data_train, data_test, target_train, target_test = train_test_split(
golf_data, golf_target,test_size=0.2, random_state=42, stratify=golf_target)
print("=======TRAIN=========")
print(data_train)
print(target_train)
print("=======TEST=========")
print(data_test)
print(target_test)
print("hello {} you are {} years old".format("heinz", 30))