from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.linear_model import LogisticRegression from sklearn.neural_network import MLPClassifier from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix, davies_bouldin_score from sklearn.cluster import KMeans import matplotlib.pyplot as plt # 1. feladat df = load_digits(as_frame=True) print('rekordok:', df.data.shape[0]) print('attributumok:', df.data.shape[1]) print('osztalyok:', len(df.target_names)) # 2. feladat """ sns.set(style='ticks') sns.pairplot(df.frame) plt.show() """ # 3. feladat X_train, X_test, y_train, y_test = train_test_split( df.data, df.target, test_size=0.2, shuffle=True, random_state=2025) # 4. feladat dec_tree = DecisionTreeClassifier(max_depth=4, criterion='entropy') dec_tree.fit(X_train, y_train) dec_tree_score = dec_tree.score(X_test, y_test) print('dec_tree:', dec_tree_score) logreg = LogisticRegression(solver='liblinear') logreg.fit(X_train, y_train) logreg_score = logreg.score(X_test, y_test) print('logreg:', logreg_score) neural = MLPClassifier(hidden_layer_sizes=(4), activation='logistic') neural.fit(X_train, y_train) neural_score = neural.score(X_test, y_test) print('neural:', neural_score) # 5. feladat prediction = logreg.predict(X_test) cm = confusion_matrix(y_test, prediction) # 6. feladat disp = ConfusionMatrixDisplay(cm) disp.plot(cmap=plt.cm.Blues) plt.show() # 7. feladat kmeans = KMeans(n_clusters=2) kmeans.fit(X_train, y_train) DB = [davies_bouldin_score(X_train, kmeans.labels_)] K = 3 while K != 31: kmeans = KMeans(n_clusters=K) kmeans.fit(X_train, y_train) DB.append(davies_bouldin_score(X_train, kmeans.labels_)) K+=1 optimalis_K = DB.index(max(DB))+2 print('klaszterszam:', optimalis_K) cluster = KMeans(n_clusters=optimalis_K) cluster.fit(X_train, y_train) centers = cluster.cluster_centers_; # centroid of clusters distX = cluster.transform(df.data); dist_center = cluster.transform(centers); # Visualizing of clustering in the distance space where coordinates are the distances from the cluster centers fig = plt.figure(2); plt.title('Iris data in the distance space'); plt.xlabel('Cluster 1'); plt.ylabel('Cluster 2'); plt.scatter(distX[:,0],distX[:,1],s=50,c=cluster.labels_); # data plt.scatter(dist_center[:,0],dist_center[:,1],s=200,c='red',marker='X'); # centroids plt.show();