Data Visualisation Dashboard¶
Difficulty: Intermediate
Time: 60-90 minutes
Learning Focus: Data analysis, visualisation, pandas, matplotlib
Module: chat
Overview¶
Create an interactive dashboard that allows users to visualise and explore data relationships through various chart types. Students will learn data manipulation with pandas and visualisation with matplotlib.
Instructions¶
from hands_on_ai.chat import get_response
import pandas as pd
import matplotlib.pyplot as plt
from io import BytesIO
import base64
import random
import os
import numpy as np
def data_dashboard():
"""Interactive data visualization dashboard for exploring datasets"""
# Sample dataset (students could replace with their own CSV)
sample_data = {
'Month': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
'Temperature': [12, 14, 16, 19, 22, 25, 27, 26, 23, 19, 15, 13],
'Rainfall': [50, 45, 35, 30, 25, 15, 10, 12, 20, 35, 40, 48],
'Visitors': [120, 135, 190, 240, 310, 430, 590, 560, 420, 320, 190, 150]
}
# Create a DataFrame from the sample data
df = pd.DataFrame(sample_data)
print("=== Data Visualization Dashboard ===")
print("This dashboard allows you to explore relationships in data.")
# Create directory for plots if it doesn't exist
plots_dir = "dashboard_plots"
os.makedirs(plots_dir, exist_ok=True)
while True:
print("\nOptions:")
print("1. View data summary")
print("2. Line chart")
print("3. Bar chart")
print("4. Scatter plot")
print("5. Get AI insights")
print("6. Exit")
choice = input("\nSelect an option (1-6): ")
if choice == '1':
# Data summary
print("\n=== Data Summary ===")
print(f"Dataset shape: {df.shape[0]} rows, {df.shape[1]} columns")
print("\nColumns:")
for column in df.columns:
print(f"- {column}")
print("\nSummary statistics:")
print(df.describe())
print("\nFirst few rows:")
print(df.head())
elif choice == '2':
# Line chart
print("\n=== Line Chart ===")
print("Available columns:")
for i, column in enumerate(df.columns[1:], 1): # Skip 'Month' column
print(f"{i}. {column}")
column_idx = int(input("\nSelect column to plot (1-3): ")) - 1
column_to_plot = df.columns[column_idx + 1] # +1 to account for skipping 'Month'
plt.figure(figsize=(10, 6))
plt.plot(df['Month'], df[column_to_plot], marker='o', linewidth=2)
plt.title(f'{column_to_plot} by Month')
plt.xlabel('Month')
plt.ylabel(column_to_plot)
plt.grid(True, linestyle='--', alpha=0.7)
# Save plot to file
plot_filename = os.path.join(plots_dir, f"line_{column_to_plot.lower()}.png")
plt.savefig(plot_filename)
plt.close()
print(f"\nLine chart created and saved as {plot_filename}")
elif choice == '3':
# Bar chart
print("\n=== Bar Chart ===")
print("Available columns:")
for i, column in enumerate(df.columns[1:], 1): # Skip 'Month' column
print(f"{i}. {column}")
column_idx = int(input("\nSelect column to plot (1-3): ")) - 1
column_to_plot = df.columns[column_idx + 1] # +1 to account for skipping 'Month'
plt.figure(figsize=(10, 6))
plt.bar(df['Month'], df[column_to_plot], color='skyblue', edgecolor='navy')
plt.title(f'{column_to_plot} by Month')
plt.xlabel('Month')
plt.ylabel(column_to_plot)
plt.grid(True, axis='y', linestyle='--', alpha=0.7)
# Save plot to file
plot_filename = os.path.join(plots_dir, f"bar_{column_to_plot.lower()}.png")
plt.savefig(plot_filename)
plt.close()
print(f"\nBar chart created and saved as {plot_filename}")
elif choice == '4':
# Scatter plot
print("\n=== Scatter Plot ===")
print("Available columns for X-axis:")
for i, column in enumerate(df.columns[1:], 1): # Skip 'Month' column
print(f"{i}. {column}")
x_idx = int(input("\nSelect X-axis column (1-3): ")) - 1
x_column = df.columns[x_idx + 1] # +1 to account for skipping 'Month'
print("\nAvailable columns for Y-axis:")
for i, column in enumerate(df.columns[1:], 1): # Skip 'Month' column
if column != x_column: # Don't show the X column again
print(f"{i}. {column}")
y_idx = int(input("\nSelect Y-axis column (1-3): ")) - 1
y_column = df.columns[y_idx + 1] # +1 to account for skipping 'Month'
plt.figure(figsize=(10, 6))
plt.scatter(df[x_column], df[y_column], color='purple', alpha=0.7, s=100)
# Add month labels to each point
for i, month in enumerate(df['Month']):
plt.annotate(month, (df[x_column][i], df[y_column][i]),
xytext=(5, 5), textcoords='offset points')
plt.title(f'{y_column} vs {x_column}')
plt.xlabel(x_column)
plt.ylabel(y_column)
plt.grid(True, linestyle='--', alpha=0.7)
# Optional: Add trendline
plt.plot(np.unique(df[x_column]),
np.poly1d(np.polyfit(df[x_column], df[y_column], 1))(np.unique(df[x_column])),
color='red', linestyle='--', alpha=0.7)
# Save plot to file
plot_filename = os.path.join(plots_dir, f"scatter_{x_column.lower()}_{y_column.lower()}.png")
plt.savefig(plot_filename)
plt.close()
print(f"\nScatter plot created and saved as {plot_filename}")
elif choice == '5':
# AI insights
print("\n=== AI Data Insights ===")
try:
# Prepare data summary for AI
data_description = f"""
Dataset with columns: {', '.join(df.columns)}
Summary statistics:
{df.describe().to_string()}
First few rows:
{df.head().to_string()}
"""
insight_prompt = f"""
Analyze this dataset and provide 3-5 key insights:
{data_description}
Focus on:
1. Patterns or trends over months
2. Correlations between variables
3. Anomalies or interesting data points
4. Suggestions for further analysis
"""
print("Generating AI insights...")
insights = get_response(insight_prompt)
print("\n=== AI Analysis Results ===")
print(insights)
except Exception as e:
print(f"Error getting AI insights: {e}")
print("AI insight generation is not available.")
elif choice == '6':
print("\nExiting Dashboard. Goodbye!")
break
else:
print("\nInvalid choice. Please select a number between 1 and 6.")
# Run the dashboard
if __name__ == "__main__":
data_dashboard()
Extension Ideas¶
- Add more visualisation types like pie charts, histograms, or heatmaps
- Implement data filtering options to explore subsets of the data
- Add the ability to load CSV files from disk
- Create a feature to export all visualisations as a report
- Implement interactive plots using libraries like Plotly
- Add clustering or other basic data analysis techniques