dbt (data build tool) lets you write SQL SELECT statements and automatically handles materialisation, documentation, and testing. It turns your warehouse into a reliable transformation layer.
pip install dbt-core dbt-duckdb # or dbt-bigquery, dbt-snowflake
dbt init my_project
cd my_project
dbt debug # verify connection-- models/staging/stg_orders.sql
-- Staging model: clean and rename raw columns
{{ config(materialized='view') }}
SELECT
id::VARCHAR AS order_id,
customer_id,
LOWER(status) AS status,
created_at::DATE AS order_date,
total_amount::DECIMAL(10,2) AS amount
FROM {{ source('raw', 'orders') }}
WHERE id IS NOT NULL-- models/marts/orders_daily.sql
-- Mart model: business-level aggregate
{{ config(materialized='table') }}
WITH orders AS (
SELECT * FROM {{ ref('stg_orders') }}
WHERE status = 'completed'
),
daily AS (
SELECT
order_date,
COUNT(*) AS order_count,
SUM(amount) AS revenue,
AVG(amount) AS avg_order_value
FROM orders
GROUP BY order_date
)
SELECT * FROM daily
ORDER BY order_date# models/staging/schema.yml — documentation and tests
version: 2
models:
- name: stg_orders
description: "Cleaned orders from the raw source"
columns:
- name: order_id
description: "Unique order ID"
tests:
- unique
- not_null
- name: status
tests:
- accepted_values:
values: ['pending', 'completed', 'cancelled']dbt run && dbt test to build all models and run all tests in one command.