Set resource requests and limits, configure a Horizontal Pod Autoscaler, and understand how the scheduler places Pods.
containers:
- name: web
image: nginx:alpine
resources:
requests:
memory: '64Mi' # guaranteed minimum
cpu: '100m' # 100 millicores = 0.1 CPU
limits:
memory: '128Mi' # hard cap
cpu: '500m' # throttled if exceeded
# CPU units: 1000m = 1 core. 100m = 0.1 core
# Memory: Mi = mebibytes, Gi = gibibytes# HPA scales Pods based on CPU or memory usage
kubectl autoscale deployment web-app \
--min=2 --max=10 --cpu-percent=50
# Or as YAML:
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: web-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: web-app
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 50kubectl get hpa
kubectl describe hpa web-hpa
# Columns: TARGETS shows current/target CPU usage
# REPLICAS shows current pod countminikube addons enable metrics-server. In production clusters, deploy it separately: kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml.