import pandas as pd
iris = pd.read_csv("iris.csv")
iris_data = iris.drop('Name', axis=1)
iris_target = iris['Name']
GridSearchCV(estimator, parameters, scoring, cv)
{'param_a':[1,2,3], 'param_b':[7,8,9] }
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
knn_estimator = KNeighborsClassifier()
parameters = {
'n_neighbors':[2,3,4,5,6,7,8],
'algorithm':['ball_tree', 'kd_tree', 'brute']
}
stratified_10_fold_cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
grid_search_estimator = GridSearchCV(knn_estimator, parameters, scoring='accuracy', cv=stratified_10_fold_cv)
grid_search_estimator.fit(iris_data,iris_target)# this will try out all possibilities
#one can use the best estimator for further prediction
#this estimator is trained on the whole dataset with the best hyper parameters
#grid_search_estimator.best_estimator_.predict()
print("best score is {} with params {}".format(grid_search_estimator.best_score_, grid_search_estimator.best_params_ ))
results = grid_search_estimator.cv_results_
#import pprint
#pprint.pprint(results)
for i in range(len(results['params'])):
print("{}, {}".format(results['params'][i], results['mean_test_score'][i]))
import pandas as pd
import numpy as np
from scipy.io import arff
adult_arff_data, adult_arff_meta = arff.loadarff(open('adult.arff', 'r'))
adult = pd.DataFrame(adult_arff_data)
adult = adult.applymap(lambda x: x.decode('utf8').replace("'", "") if hasattr(x, 'decode') else x)
adult_target = np.array(adult['class'])
print(adult_target[:10])
adult_data = pd.get_dummies(adult.drop('class', axis=1))
adult_data.head()
from sklearn.model_selection import train_test_split
data_train, data_test, target_train, target_test = train_test_split(
adult_data, adult_target,test_size=0.2, random_state=42)