flowers.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. # Load libraries
  2. from pandas import read_csv
  3. from pandas.plotting import scatter_matrix
  4. from matplotlib import pyplot
  5. from sklearn.model_selection import train_test_split
  6. from sklearn.model_selection import cross_val_score
  7. from sklearn.model_selection import StratifiedKFold
  8. from sklearn.metrics import classification_report
  9. from sklearn.metrics import confusion_matrix
  10. from sklearn.metrics import accuracy_score
  11. from sklearn.linear_model import LogisticRegression
  12. from sklearn.tree import DecisionTreeClassifier
  13. from sklearn.neighbors import KNeighborsClassifier
  14. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
  15. from sklearn.naive_bayes import GaussianNB
  16. from sklearn.svm import SVC
  17. # Load dataset
  18. url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv"
  19. names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
  20. dataset = read_csv(url, names=names)
  21. # shape
  22. print(dataset.shape)
  23. # head
  24. print(dataset.head(20))
  25. # descriptions
  26. print(dataset.describe())
  27. # box and whisker plots
  28. dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
  29. pyplot.show()
  30. # Split-out validation dataset
  31. array = dataset.values
  32. X = array[:,0:4]
  33. y = array[:,4]
  34. X_train, X_validation, Y_train, Y_validation = train_test_split(X, y, test_size=0.20, random_state=1)
  35. # Spot Check Algorithms
  36. models = []
  37. models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
  38. models.append(('LDA', LinearDiscriminantAnalysis()))
  39. models.append(('KNN', KNeighborsClassifier()))
  40. models.append(('CART', DecisionTreeClassifier()))
  41. models.append(('NB', GaussianNB()))
  42. models.append(('SVM', SVC(gamma='auto')))
  43. # evaluate each model in turn
  44. results = []
  45. names = []
  46. for name, model in models:
  47. kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
  48. cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')
  49. results.append(cv_results)
  50. names.append(name)
  51. print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))
  52. # Compare Algorithms
  53. pyplot.boxplot(results, labels=names)
  54. pyplot.title('Algorithm Comparison')
  55. pyplot.show()