lab1

 import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.datasets import fetch_california_housing

housing_data = fetch_california_housing()

df = pd.DataFrame(housing_data.data, columns=housing_data.feature_names)

print("First five rows of the dataset:")

print(df.head(5))

print("\nDataset Summary:")

print(df.describe())

df.hist(figsize=(12, 8), bins=30, edgecolor='black')

plt.suptitle("Histograms for All Numerical Features", fontsize=16)

plt.show()

plt.figure(figsize=(12, 6))

df.boxplot(rot=45)

plt.title("Box Plots for All Numerical Features", fontsize=16)

plt.show()

Q1 = df.quantile(0.25)

Q3 = df.quantile(0.75)

IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR

upper_bound = Q3 + 1.5 * IQR

outliers = ((df < lower_bound) | (df > upper_bound)).sum()

print("\nNumber of Outliers in Each Feature:")

print(outliers)

plt.figure(figsize=(12, 8))

for i, col in enumerate(df.columns, 1):

    plt.subplot(3, 3, i)

    sns.boxplot(x=df[col], color="skyblue")

    plt.title(col)

plt.tight_layout()

plt.show()


Comments

Popular posts from this blog

lab9

lab2

lab10