Car Safety Analysis¶
1) importing the dataset¶
In [68]:
import pandas
In [69]:
d = pandas.read_csv('C://Users//Admin//Documents//careval.csv',header='infer' )
In [70]:
d
Out[70]:
2) Preprocessing Step¶
In [71]:
from sklearn import preprocessing
In [8]:
le = preprocessing.LabelEncoder()
In [72]:
data=d.apply(le.fit_transform)
In [73]:
data
Out[73]:
In [13]:
from sklearn.cross_validation import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
from sklearn import preprocessing
3) Splitting The Training and Testing data¶
In [16]:
X= data.values[:,0:4]
Y= data.values[:,5]
In [18]:
X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size = 0.3, random_state = 100)
4) Fitting the Decision Tree¶
In [77]:
clf_gini = DecisionTreeClassifier(criterion = "entropy", random_state = 100,
max_depth=3, min_samples_leaf=5)
clf_gini.fit(X_train, y_train)
Out[77]:
5) Predicting on the Test data¶
In [78]:
y_pred = clf_gini.predict(X_test)
6) Validating¶
In [79]:
accuracy_score(y_test,y_pred)
Out[79]:
7) Same Procedure on KNN¶
In [80]:
from sklearn.neighbors import KNeighborsClassifier
In [81]:
knn=KNeighborsClassifier(n_neighbors=10)
In [82]:
knn.fit(X_train,y_train)
Out[82]:
In [83]:
y_predknn=knn.predict(X_test)
In [84]:
accuracy_score(y_test,y_predknn)
Out[84]:
8) Cross Validation¶
In [90]:
from sklearn.model_selection import cross_val_score
In [87]:
scores = cross_val_score(knn,X,Y,cv=5)
In [88]:
scores.mean()
Out[88]:
9) Ensembling¶
In [58]:
from sklearn.ensemble import AdaBoostClassifier
In [60]:
abc=AdaBoostClassifier(n_estimators=10,learning_rate=1)
In [62]:
abc.fit(X_train,y_train)
Out[62]:
In [64]:
y_predada=abc.predict(X_test)
In [65]:
accuracy_score(y_test,y_predada)
Out[65]: