A full automated reporting script that takes any sales CSV, cleans the data, generates a bar chart and heatmap, gets AI narrative insights from Claude, and assembles everything into a clean HTML report — saved to disk, ready to share.
Five stages, one script
The pipeline runs in sequence. Each stage feeds the next:
Stage 1 — Load: Read the CSV with pandas.
Stage 2 — Clean: Handle nulls, fix types, remove duplicates.
Stage 3 — Summarize: Compute group-level stats.
Stage 4 — Visualize: Generate charts and save as base64 (embedded in HTML).
Stage 5 — Report: Get AI insights from Claude, assemble HTML, save report.
The key design decision: encode charts as base64 so the entire report is a single HTML file with no external dependencies. You can email it, open it anywhere, share it without worrying about broken image paths.
The full script
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("Agg") # non-interactive backend
import seaborn as sns
import anthropic
import base64
import io
from datetime import datetime
import sys
def load_and_clean(filepath):
df = pd.read_csv(filepath)
df = df.dropna(subset=["revenue"])
df["region"] = df["region"].str.strip().str.title()
df = df.drop_duplicates()
return df
def fig_to_base64(fig):
buf = io.BytesIO()
fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
buf.seek(0)
return base64.b64encode(buf.read()).decode()
def make_bar_chart(df):
by_region = df.groupby("region")["revenue"].sum().sort_values()
fig, ax = plt.subplots(figsize=(9, 4))
ax.barh(by_region.index, by_region.values, color="#1e3a5f", height=0.55)
ax.set_title("Revenue by Region", fontsize=14, fontweight="bold")
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
plt.tight_layout()
b64 = fig_to_base64(fig)
plt.close(fig)
return b64
def make_heatmap(df):
pivot = df.pivot_table(values="revenue", index="region",
columns="product", aggfunc="sum", fill_value=0)
fig, ax = plt.subplots(figsize=(9, 4))
sns.heatmap(pivot, annot=True, fmt=",", cmap="Blues", ax=ax)
ax.set_title("Revenue by Region & Product", fontsize=14, fontweight="bold")
plt.tight_layout()
b64 = fig_to_base64(fig)
plt.close(fig)
return b64
def get_ai_insights(df):
summary = df.groupby("region").agg(
total=("revenue", "sum"), avg=("revenue", "mean"), deals=("revenue", "count")
).to_string()
client = anthropic.Anthropic()
resp = client.messages.create(
model="claude-opus-4-5", max_tokens=512,
messages=[{"role": "user",
"content": f"You are a business analyst. Provide 3 concise bullet-point insights from this sales data:\n{summary}"}]
)
return resp.content[0].text
def build_report(df, bar_b64, heat_b64, insights, filename):
total = df["revenue"].sum()
avg = df["revenue"].mean()
date = datetime.now().strftime("%B %d, %Y")
insights_html = "".join(f"<li>{line.strip().lstrip('•-* ')}</li>"
for line in insights.strip().splitlines() if line.strip())
html = f"""<!DOCTYPE html>
<html><head><meta charset="UTF-8">
<title>Sales Report — {date}</title>
<style>body{{font-family:sans-serif;max-width:900px;margin:40px auto;padding:0 24px;color:#1a1a1a}}
h1{{font-size:28px;margin-bottom:4px}}
.meta{{color:#666;font-size:14px;margin-bottom:36px}}
.stats{{display:grid;grid-template-columns:repeat(3,1fr);gap:16px;margin-bottom:36px}}
.stat{{background:#f8f8f6;border-radius:10px;padding:20px;text-align:center}}
.stat .val{{font-size:26px;font-weight:700;color:#1e3a5f}}
.stat .lbl{{font-size:12px;color:#888;text-transform:uppercase;letter-spacing:.06em;margin-top:4px}}
.chart{{margin-bottom:32px}} img{{width:100%;border-radius:8px;border:1px solid #e0dbd3}}
.insights{{background:#faf9f7;border-left:3px solid #c4873e;padding:20px 24px;border-radius:0 8px 8px 0}}
.insights h3{{margin:0 0 12px;color:#1e3a5f}}
.insights ul{{margin:0;padding-left:20px;line-height:1.85}}
</style></head><body>
<h1>Sales Report</h1>
<p class="meta">Generated {date} · {len(df)} records</p>
<div class="stats">
<div class="stat"><div class="val">${total:,.0f}</div><div class="lbl">Total Revenue</div></div>
<div class="stat"><div class="val">${avg:,.0f}</div><div class="lbl">Avg Deal Size</div></div>
<div class="stat"><div class="val">{len(df)}</div><div class="lbl">Total Deals</div></div>
</div>
<div class="chart"><img src="data:image/png;base64,{bar_b64}" alt="Revenue by Region"></div>
<div class="chart"><img src="data:image/png;base64,{heat_b64}" alt="Heatmap"></div>
<div class="insights"><h3>AI Insights</h3><ul>{insights_html}</ul></div>
</body></html>"""
with open(filename, "w") as f:
f.write(html)
print(f"Report saved: {filename}")
# Run the pipeline
if __name__ == "__main__":
csv_file = sys.argv[1] if len(sys.argv) > 1 else "sales.csv"
print(f"Loading {csv_file}...")
df = load_and_clean(csv_file)
print("Generating charts...")
bar = make_bar_chart(df)
heat = make_heatmap(df)
print("Getting AI insights...")
insights = get_ai_insights(df)
print("Building report...")
build_report(df, bar, heat, insights, "report.html")
print("Done. Open report.html in your browser.")
Run it:
python report_pipeline.py sales.csv
Open report.html in your browser. You have a complete analysis report with charts and AI insights, generated from one command.
The career unlock: Point this at any new sales CSV on the first of every month. What used to take you 2-3 hours takes 30 seconds. That's the value you now bring to your team — and it's scriptable, repeatable, and zero effort.
Day 5 Complete — Course Complete
- Built a modular pipeline with load, clean, chart, AI, and report stages
- Encoded charts as base64 for self-contained HTML reports
- Wired Claude API insights directly into the report
- Created a script you can run on any CSV in seconds
You built a real AI data pipeline.
In 5 days you went from spreadsheets to a fully automated AI-powered reporting system. That puts you ahead of the majority of analysts in the workforce today.