1

 import numpy as np 

import pandas as pd 

import matplotlib.pyplot as plt 

import seaborn as sns 

from sklearn.datasets import fetch_california_housing 

# Load California housing dataset 

housing_data = fetch_california_housing() 

df = pd.DataFrame(housing_data.data, 

columns=housing_data.feature_names) 

# 1. Display First Five Rows 

print("First five rows of the dataset:") 

print(df.head()) 

# 2. Dataset Summary 

print("\nDataset Summary:") 

print(df.describe()) 

# 3. Histograms for All Features 

df.hist(figsize=(12, 8), bins=30, edgecolor='black') 

plt.suptitle("Histograms for All Numerical Features", fontsize=16) 

plt.show() 

# 4. Boxplots for All Features 

plt.figure(figsize=(12, 6)) 

df.boxplot(rot=45) 

plt.title("Box Plots for All Numerical Features", fontsize=16) 

plt.show() 

# 5. Outlier Detection using IQR 

Q1 = df.quantile(0.25) 

Q3 = df.quantile(0.75) 

IQR = Q3 - Q1 

lower_bound = Q1 - 1.5 * IQR 

upper_bound = Q3 + 1.5 * IQR 

outliers = ((df<lower_bound) | (df>upper_bound)).sum() 

print("\nNumber of Outliers in Each Feature:") 

print(outliers) 

# 6. Individual Box Plots with Outliers Highlighted 

plt.figure(figsize=(12, 8)) 

for i, col in enumerate(df.columns, 1): 

plt.subplot(3, 3, i) 

sns.boxplot(x=df[col], color="skyblue") 

plt.title(col) 

plt.tight_layout() 

plt.show()

Comments

Popular posts from this blog

2

3