Posts

Showing posts from June, 2025

3

 import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import load_iris from sklearn.decomposition import PCA # Load the Iris dataset iris = load_iris() data = iris.data labels = iris.target label_names = iris.target_names # Reduce dimensionality to 2 using PCA pca = PCA(n_components=2) reduced_data = pca.fit_transform(data) # Create DataFrame for visualization df_pca = pd.DataFrame(reduced_data, columns=['PC1', 'PC2']) df_pca['Label'] = labels # Plot PCA result plt.figure(figsize=(8, 6)) colors = ['r', 'g', 'b'] for i, label in enumerate(np.unique(labels)): subset = df_pca[df_pca['Label'] == label] plt.scatter(subset['PC1'], subset['PC2'], color=colors[i], label=label_names[label], alpha=0.7) plt.title('PCA on Iris Dataset') plt.xlabel('Principal Component 1') plt.ylabel('Principal Component 2') plt.legend() plt.grid(True) p...

10

 import numpy as np  import pandas as pd  import matplotlib.pyplot as plt  import seaborn as sns  from sklearn.datasets import load_breast_cancer  from sklearn.cluster import KMeans  from sklearn.preprocessing import StandardScaler  from sklearn.decomposition import PCA  from sklearn.metrics import confusion_matrix, classification_report  # Load dataset  data = load_breast_cancer()  X, y = data.data, data.target  # Standardize features  X_scaled = StandardScaler().fit_transform(X)  # Apply KMeans clustering  kmeans = KMeans(n_clusters=2, random_state=42, n_init=10)  y_kmeans = kmeans.fit_predict(X_scaled)  # Evaluate clustering  print("Confusion Matrix:")  print(confusion_matrix(y, y_kmeans))  print("\nClassification Report:")  print(classification_report(y, y_kmeans))  # Reduce dimensions for visualization  pca = PCA(n_components=2)  X_pca = pca.fit_transform(X_s...

9

 import numpy as np  import matplotlib.pyplot as plt  from sklearn.datasets import fetch_olivetti_faces  from sklearn.model_selection import train_test_split, cross_val_score  from sklearn.naive_bayes import GaussianNB  from sklearn.metrics import accuracy_score, classification_report,  confusion_matrix  # Load dataset  data = fetch_olivetti_faces(shuffle=True, random_state=42)  X, y = data.data, data.target  # Train-test split  X_train, X_test, y_train, y_test = train_test_split(X, y,  test_size=0.3, random_state=42)  # Train Naive Bayes model  model = GaussianNB()  model.fit(X_train, y_train)  y_pred = model.predict(X_test)  # Evaluation  print(f'Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%')  print("\nClassification Report:")  print(classification_report(y_test, y_pred, zero_division=0))  print("\nConfusion Matrix:")  print(confusion_matrix(y_test, y_pred))...

8

 import numpy as np  import matplotlib.pyplot as plt  from sklearn.datasets import load_breast_cancer  from sklearn.model_selection import train_test_split  from sklearn.tree import DecisionTreeClassifier, plot_tree  from sklearn.metrics import accuracy_score  # Load dataset  data = load_breast_cancer()  X, y = data.data, data.target  # Train-test split  X_train, X_test, y_train, y_test = train_test_split(X, y,  test_size=0.2, random_state=42)  # Train Decision Tree  clf = DecisionTreeClassifier(random_state=42)  clf.fit(X_train, y_train)  # Accuracy  y_pred = clf.predict(X_test)  print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")  # Predict one sample  sample = X_test[0]  predicted_label = clf.predict([sample])[0]  print("Predicted Class:", "Benign" if predicted_label == 1 else  "Malignant")  # Plot tree  plt.figure(figsize=(12, 8))  plot_tree(c...

7

 import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import fetch_california_housing from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score from sklearn.pipeline import make_pipeline from sklearn.preprocessing import PolynomialFeatures, StandardScaler def linear_regression_california():     data = fetch_california_housing(as_frame=True)     X = data.data[["AveRooms"]]     y = data.target     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)     model = LinearRegression()     model.fit(X_train, y_train)     y_pred = model.predict(X_test)     sorted_idx = X_test.values.flatten().argsort()     plt.scatter(X_test, y_test, color="blue", label="Actual")     plt.plot(X_test.values[sorted_idx], y_pred[sorted_idx], color="red", l...

6

 import numpy as np import matplotlib.pyplot as plt def weight(x, xi, tau):     return np.exp(-np.sum((x - xi) ** 2) / (2 * tau ** 2)) def predict(x, X, y, tau):     W = np.diag([weight(x, xi, tau) for xi in X])     theta = np.linalg.pinv(X.T @ W @ X) @ X.T @ W @ y     return x @ theta # Generate training data X = np.linspace(0, 2 * np.pi, 100) y = np.sin(X) + 0.1 * np.random.randn(100) X_ = np.c_[np.ones(X.shape), X]  # Add bias term # Generate test data x_test = np.linspace(0, 2 * np.pi, 200) x_test_ = np.c_[np.ones(x_test.shape), x_test] # Predict tau = 0.5 y_pred = [predict(xi, X_, y, tau) for xi in x_test_] # Plot plt.scatter(X, y, alpha=0.6, label="Data") plt.plot(x_test, y_pred, color="red", label="LWR Prediction") plt.legend() plt.title("Locally Weighted Regression") plt.show()

5

 import numpy as np import matplotlib.pyplot as plt from collections import Counter np.random.seed(42) # Generate data data = np.random.rand(100) train_data = data[:50] test_data = data[50:] train_labels = np.array(["Class1" if x <= 0.5 else "Class2" for x in train_data]) # k-NN function def knn(train_data, train_labels, test_point, k):     distances = np.abs(train_data - test_point)     nearest_labels = train_labels[np.argsort(distances)[:k]]     return Counter(nearest_labels).most_common(1)[0][0] # k values to test k_values = [1, 2, 3, 4, 5, 20, 30] # Loop over each k for k in k_values:     predictions = np.array([knn(train_data, train_labels, x, k) for x in test_data])          class1 = test_data[predictions == "Class1"]     class2 = test_data[predictions == "Class2"]     plt.figure(figsize=(8, 3))     plt.scatter(train_data, np.zeros_like(train_data), c="black", label="Train", mark...

4

   import pandas as pd    def find_s_algorithm(file_path):      # Load the dataset      data = pd.read_csv(file_path)      print("\nTraining Data:\n", data)        attributes = data.columns[:-1]  # All columns except the target      target = data.columns[-1]       # Target column (last one)        # Step 1: Initialize hypothesis with the first positive example      hypothesis = None      for _, row in data.iterrows():          if row[target] == 'Yes':              hypothesis = list(row[attributes])              break        # If no positive examples found      if hypothesis is None:          print("No positive examples in the dataset.")       ...

3

 import numpy as np  import pandas as pd  import matplotlib.pyplot as plt  from sklearn.datasets import load_iris  from sklearn.decomposition import PCA  # Load the Iris dataset  iris = load_iris()  data = iris.data  labels = iris.target  label_names = iris.target_names  # Reduce dimensionality to 2 using PCA  pca = PCA(n_components=2)  reduced_data = pca.fit_transform(data)  # Create DataFrame for visualization  df_pca = pd.DataFrame(reduced_data, columns=['PC1', 'PC2'])  df_pca['Label'] = labels  # Plot PCA result  plt.figure(figsize=(8, 6))  colors = ['r', 'g', 'b']  for i, label in enumerate(np.unique(labels)):  subset = df_pca[df_pca['Label'] == label]  plt.scatter(subset['PC1'], subset['PC2'], color=colors[i],  label=label_names[label], alpha=0.7)  plt.title('PCA on Iris Dataset')  plt.xlabel('Principal Component 1')  plt.ylabel('Principal Component 2')...

2

import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from sklearn.datasets import fetch_california_housing # Load dataset and convert to DataFrame data = fetch_california_housing() df = pd.DataFrame(data.data, columns=data.feature_names) df['MedHouseVal'] = data.target # Add target column # 1. Correlation Matrix correlation_matrix = df.corr() print("\nCorrelation Matrix:") print(correlation_matrix) # 2. Heatmap of Correlation Matrix plt.figure(figsize=(10, 8)) sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5) plt.title('Correlation Matrix of California Housing Features') plt.tight_layout() plt.show() # 3. Pair Plot for Selected Features selected_features = ['MedInc', 'HouseAge', 'AveRooms', 'AveOccup', 'MedHouseVal'] sns.pairplot(df[selected_features], diag_kind='kde', plot_kws={'alpha': ...

1

 import numpy as np  import pandas as pd  import matplotlib.pyplot as plt  import seaborn as sns  from sklearn.datasets import fetch_california_housing  # Load California housing dataset  housing_data = fetch_california_housing()  df = pd.DataFrame(housing_data.data,  columns=housing_data.feature_names)  # 1. Display First Five Rows  print("First five rows of the dataset:")  print(df.head())  # 2. Dataset Summary  print("\nDataset Summary:")  print(df.describe())  # 3. Histograms for All Features  df.hist(figsize=(12, 8), bins=30, edgecolor='black')  plt.suptitle("Histograms for All Numerical Features", fontsize=16)  plt.show()  # 4. Boxplots for All Features  plt.figure(figsize=(12, 6))  df.boxplot(rot=45)  plt.title("Box Plots for All Numerical Features", fontsize=16)  plt.show()  # 5. Outlier Detection using IQR  Q1 = df.quantile(0.25)  Q3 = df.quanti...