Capítulo 11: Escala y Producción
Capítulo 11: Escala y Producción
En producción, Firecracker no se usa de forma manual — se necesita un orchestrator que gestione el ciclo de vida de decenas o cientos de microVMs simultáneamente.
Arquitectura de un orchestrator mínimo
graph TD
A[API del orchestrator] --> B[Pool de VMs disponibles]
B --> C{Solicitud de VM}
C -->|VM del pool| D[Asignar VM pre-calentada]
C -->|Pool vacío| E[Arrancar nueva VM]
D --> F[Configurar via MMDS]
E --> F
F --> G[VM activa]
G -->|Fin de uso| H[Limpiar y devolver al pool]
Script orchestrator básico
#!/bin/bash
# vm-manager.sh — gestión básica de múltiples VMs
VM_COUNT=0
POOL_DIR="/tmp/fc-pool"
mkdir -p "${POOL_DIR}"
start_vm() {
local VM_ID="vm-$(date +%s%N | md5sum | head -c 8)"
local SOCKET="${POOL_DIR}/${VM_ID}.sock"
local LOG="${POOL_DIR}/${VM_ID}.log"
local VM_NUM="${VM_COUNT}"
# Calcular IPs para esta VM
THIRD_OCTET=$(( (VM_NUM * 4) / 256 ))
FOURTH_OCTET=$(( (VM_NUM * 4) % 256 ))
TAP_IP="172.16.${THIRD_OCTET}.${FOURTH_OCTET}"
GUEST_IP="172.16.${THIRD_OCTET}.$(( FOURTH_OCTET + 1 ))"
TAP_DEV="tap${VM_NUM}"
# Setup de red
sudo ip tuntap add dev "${TAP_DEV}" mode tap 2>/dev/null
sudo ip addr add "${TAP_IP}/30" dev "${TAP_DEV}" 2>/dev/null
sudo ip link set "${TAP_DEV}" up
# Copiar rootfs (copy-on-write con cp --reflink si es posible)
cp --reflink=auto /opt/firecracker/images/base.ext4 \
"${POOL_DIR}/${VM_ID}-rootfs.ext4"
# Crear config JSON
cat > "${POOL_DIR}/${VM_ID}-config.json" << EOF
{
"boot-source": {
"kernel_image_path": "/opt/firecracker/kernels/vmlinux",
"boot_args": "console=ttyS0 reboot=k panic=1 pci=off"
},
"drives": [{
"drive_id": "rootfs",
"path_on_host": "${POOL_DIR}/${VM_ID}-rootfs.ext4",
"is_root_device": true,
"is_read_only": false
}],
"machine-config": {
"vcpu_count": 1,
"mem_size_mib": 512
},
"network-interfaces": [{
"iface_id": "eth0",
"guest_mac": "AA:FC:00:00:$(printf '%02X' ${VM_NUM}):01",
"host_dev_name": "${TAP_DEV}"
}]
}
EOF
# Arrancar VM en background
firecracker \
--api-sock "${SOCKET}" \
--config-file "${POOL_DIR}/${VM_ID}-config.json" \
>> "${LOG}" 2>&1 &
echo "${SOCKET}:${GUEST_IP}:${TAP_DEV}"
(( VM_COUNT++ ))
}
stop_vm() {
local SOCKET="$1"
local TAP_DEV="$2"
# Apagar via SendCtrlAltDel
curl -sf -X PUT \
--unix-socket "${SOCKET}" \
--data '{"action_type": "SendCtrlAltDel"}' \
"http://localhost/actions" 2>/dev/null || true
# Limpiar TAP
sudo ip link delete "${TAP_DEV}" 2>/dev/null || true
}
# Ejemplo: arrancar 5 VMs
for i in $(seq 5); do
info=$(start_vm)
echo "VM ${i} info: ${info}"
done
Patrones comunes en producción
1. Pool pre-calentado (AWS Lambda pattern)
# Mantener N VMs listas con la app ya cargada (via snapshot)
WARM_POOL_SIZE=10
POOL_FILE="${POOL_DIR}/warm-pool.txt"
# Pre-calentar el pool
for i in $(seq ${WARM_POOL_SIZE}); do
info=$(start_vm_from_snapshot "/opt/fc-snapshots/app-ready")
echo "${info}" >> "${POOL_FILE}"
done
# Al recibir una solicitud, tomar VM del pool
serve_request() {
local vm_info=$(head -1 "${POOL_FILE}")
sed -i '1d' "${POOL_FILE}" # Remover del pool
local socket=$(echo "${vm_info}" | cut -d: -f1)
# ... usar la VM ...
# Reponer el pool asíncronamente
(start_vm_from_snapshot "/opt/fc-snapshots/app-ready" \
>> "${POOL_FILE}") &
}
2. Monitoreo de VMs activas
# Estado de todas las VMs activas
list_vms() {
for socket in "${POOL_DIR}"/*.sock; do
[ -S "${socket}" ] || continue
state=$(curl -sf --unix-socket "${socket}" \
"http://localhost/vm" 2>/dev/null | jq -r '.state // "dead"')
echo "$(basename ${socket}): ${state}"
done
}
3. Cleanup automático de VMs terminadas
cleanup_dead_vms() {
for socket in "${POOL_DIR}"/*.sock; do
[ -S "${socket}" ] || continue
if ! curl -sf --unix-socket "${socket}" \
"http://localhost/version" >/dev/null 2>&1; then
VM_ID=$(basename "${socket}" .sock)
rm -f "${socket}" "${POOL_DIR}/${VM_ID}-config.json"
rm -f "${POOL_DIR}/${VM_ID}-rootfs.ext4"
echo "Limpiado: ${VM_ID}"
fi
done
}
Límites del sistema host
Verifica los límites del sistema antes de escalar:
# Máximo de archivos abiertos
ulimit -n
# Para muchas VMs, incrementar a 1048576
sudo sysctl -w fs.file-max=1048576
echo "* soft nofile 1048576" | sudo tee -a /etc/security/limits.conf
echo "* hard nofile 1048576" | sudo tee -a /etc/security/limits.conf
# Máximo de instancias inotify
cat /proc/sys/fs/inotify/max_user_instances
sudo sysctl -w fs.inotify.max_user_instances=8192
# Verificar que hay suficientes hugepages si las usas
cat /proc/meminfo | grep HugePagesTotal
Logs centralizados
# Configurar logging de Firecracker a archivo
curl -s -X PUT \
--unix-socket "${API_SOCKET}" \
--header "Content-Type: application/json" \
--data '{
"log_path": "/var/log/firecracker/vm-001.log",
"level": "Info",
"show_level": true,
"show_log_origin": true
}' \
"http://localhost/logger"
Niveles disponibles: Error, Warning, Info, Debug, Trace.