A data warehouse organises data for analytical queries. It uses a star schema: one fact table surrounded by dimension tables. Today you will design a star schema and write analytical SQL.
-- Star schema for an e-commerce warehouse
-- Dimension tables (slowly changing, descriptive)
CREATE TABLE dim_date (
date_key INTEGER PRIMARY KEY, -- 20260101
full_date DATE,
year INT, quarter INT, month INT, week INT, day_of_week INT,
is_weekend BOOLEAN
);
CREATE TABLE dim_customer (
customer_key INT PRIMARY KEY,
customer_id VARCHAR(36),
name TEXT, email TEXT, city TEXT, country TEXT,
valid_from DATE, valid_to DATE, is_current BOOLEAN
);
CREATE TABLE dim_product (
product_key INT PRIMARY KEY,
product_id VARCHAR(36),
name TEXT, category TEXT, brand TEXT, unit_price DECIMAL(10,2)
);
-- Fact table (events, metrics, foreign keys)
CREATE TABLE fact_orders (
order_key BIGINT PRIMARY KEY,
date_key INT REFERENCES dim_date,
customer_key INT REFERENCES dim_customer,
product_key INT REFERENCES dim_product,
quantity INT,
unit_price DECIMAL(10,2),
discount DECIMAL(5,2),
revenue DECIMAL(10,2)
);-- Analytical queries on the star schema
-- Monthly revenue by category
SELECT
d.year,
d.month,
p.category,
SUM(f.revenue) AS total_revenue,
COUNT(DISTINCT f.customer_key) AS unique_customers,
AVG(f.revenue) AS avg_order_value
FROM fact_orders f
JOIN dim_date d ON f.date_key = d.date_key
JOIN dim_product p ON f.product_key = p.product_key
GROUP BY 1, 2, 3
ORDER BY 1, 2, total_revenue DESC;
-- Customer lifetime value (LTV)
SELECT
c.customer_id,
c.name,
COUNT(*) AS total_orders,
SUM(f.revenue) AS ltv,
MIN(d.full_date) AS first_order,
MAX(d.full_date) AS last_order
FROM fact_orders f
JOIN dim_customer c ON f.customer_key = c.customer_key
JOIN dim_date d ON f.date_key = d.date_key
WHERE c.is_current = TRUE
GROUP BY 1, 2
ORDER BY ltv DESC;