from http.client import HTTPResponse from urllib.request import urlopen import pandas from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier, plot_tree from sklearn.linear_model import LogisticRegression from sklearn.neural_network import MLPClassifier from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, davies_bouldin_score, roc_curve from sklearn.cluster import KMeans from sklearn.decomposition import PCA import matplotlib.pyplot as plt # 1. feladat data = urlopen('https://arato.inf.unideb.hu/ispany.marton/MachineLearning/Datasets/banknote_authentication.txt') lines = [] osztalyok = set() for line in data.read().decode('utf-8').split('\r\n'): lines.append(line.split(',')) osztalyok.add(line.split(',')[-1]) print('rekordok: ', len(lines)) print('attributumok:', len(lines[0])) print('osztalyok:', len(osztalyok)) # 2. feladat df = pandas.DataFrame(lines, columns=['variance', 'skewness', 'curtosis', 'entropy', 'target']) # pandas.plotting.andrews_curves(df, 'target', color=['blue', 'red']) # 3. feladat X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, :4], df['target'], test_size=0.2, shuffle=True, random_state=100) # 4. feladat scores = dict() dec_tree = DecisionTreeClassifier(max_depth=5, criterion='entropy') dec_tree.fit(X_train, y_train) dec_tree_score = dec_tree.score(X_test, y_test) scores['dec_tree'] = dec_tree_score print(dec_tree_score) log_reg = LogisticRegression(solver='liblinear') log_reg.fit(X_train, y_train) log_reg_score = log_reg.score(X_test, y_test) scores['log_reg'] = log_reg_score print(log_reg_score) neural = MLPClassifier(hidden_layer_sizes=(2), activation='logistic') neural.fit(X_train, y_train) neural_score = neural.score(X_test, y_test) scores['neural'] = neural_score print(neural_score) for k, v in scores.items(): if v == max(scores.values()): print('legjobb:', k, ': ', v) # 5. feladat plot_tree(dec_tree) plt.show() y_pred = dec_tree.predict(X_test) cm = confusion_matrix(y_test, y_pred) x = ConfusionMatrixDisplay(cm) x.plot() plt.show() # 6. feladat roc_curve() # 7. feladat kmeans2 = KMeans(n_clusters=2) kmeans2.fit(df.iloc[:, :4], df['target']) kmeans11 = KMeans(n_clusters=11) kmeans11.fit(df.iloc[:, :4], df['target']) db2 = davies_bouldin_score(df.iloc[:, :4], kmeans2.labels_) db11 = davies_bouldin_score(df.iloc[:, :4], kmeans11.labels_) print(db2, db11)