Node.js is single-threaded by default — and most production servers have 8, 16, or 32 cores sitting idle. The cluster module spawns multiple Node.js processes sharing the same port — each handling requests independently. worker_threads spawns threads in the same process for CPU-bound computation. Understanding which to use and when is key to unlocking your server’s full capacity.
⚡ TL;DR: cluster: multi-process HTTP server, each process gets full Node.js with own memory — best for web servers. worker_threads: CPU-bound computation in parallel threads with shared memory. PM2 cluster mode: production cluster with zero-config. Use worker_threads for image processing, crypto, ML inference.
Cluster module — multi-process HTTP server
const cluster = require('cluster');
const http = require('http');
const os = require('os');
const numCPUs = os.cpus().length;
if (cluster.isPrimary) {
console.log(`Primary ${process.pid} starting ${numCPUs} workers`);
// Fork one worker per CPU core
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
// Restart dead workers
cluster.on('exit', (worker, code, signal) => {
console.log(`Worker ${worker.process.pid} died (${signal || code}). Restarting...`);
cluster.fork();
});
} else {
// Each worker runs a full Express server
const express = require('express');
const app = express();
app.get('/api/data', async (req, res) => {
res.json(await fetchData());
});
app.listen(3000, () => {
console.log(`Worker ${process.pid} listening on port 3000`);
});
}
// Result: 8 workers, 8x throughput, OS load-balances connections
PM2 — production cluster without boilerplate
# ecosystem.config.js
module.exports = {
apps: [{
name: 'api',
script: './src/server.js',
instances: 'max', // One instance per CPU core
exec_mode: 'cluster', // Cluster mode — shared port
max_memory_restart: '500M',
env_production: {
NODE_ENV: 'production',
PORT: 3000
}
}]
};
# Commands:
pm2 start ecosystem.config.js --env production
pm2 list # View all instances
pm2 logs api # View logs
pm2 reload api # Zero-downtime reload (restart workers one by one)
pm2 monit # Real-time monitoring
Worker Threads — CPU-bound computation
// main.js — spawn workers for CPU-intensive tasks
const { Worker, isMainThread, parentPort, workerData } = require('worker_threads');
if (isMainThread) {
// Main thread: receive task, dispatch to worker
function runWorker(data) {
return new Promise((resolve, reject) => {
const worker = new Worker(__filename, { workerData: data });
worker.on('message', resolve);
worker.on('error', reject);
worker.on('exit', code => {
if (code !== 0) reject(new Error(`Worker stopped with exit code ${code}`));
});
});
}
// Worker pool — reuse workers instead of spawning per task
const { WorkerPool } = require('./worker-pool');
const pool = new WorkerPool('./hasher.js', os.cpus().length);
const hash = await pool.run({ input: largeData });
} else {
// Worker thread: do CPU-intensive work
const result = heavyComputation(workerData.input);
parentPort.postMessage(result);
}
Shared memory between threads
// SharedArrayBuffer: share memory between worker threads
const { SharedArrayBuffer, Atomics } = globalThis;
// Main thread:
const sharedBuffer = new SharedArrayBuffer(Int32Array.BYTES_PER_ELEMENT * 1000);
const sharedArray = new Int32Array(sharedBuffer);
const worker = new Worker('./worker.js', {
workerData: { sharedBuffer }
});
// Worker thread:
const sharedArray = new Int32Array(workerData.sharedBuffer);
// Use Atomics for thread-safe operations:
Atomics.add(sharedArray, 0, 1); // Atomic increment — no race conditions
- ✅ Cluster for web servers — each worker handles requests independently
- ✅ PM2 cluster mode for production — zero-downtime reloads, monitoring
- ✅ Worker threads for CPU-bound: image processing, crypto, compression
- ✅ Worker pool — reuse threads instead of spawning per task
- ✅ SharedArrayBuffer + Atomics for thread-safe shared memory
- ❌ Never use cluster for pure CPU tasks — process creation is expensive
- ❌ Never block the main thread — offload CPU work to workers
Node.js clustering multiplies the throughput of your REST API. For truly massive scale, Lambda auto-scales with zero cluster management. External reference: Node.js cluster documentation.
Recommended Reading
→ Designing Data-Intensive Applications — The essential book every senior developer needs.
→ The Pragmatic Programmer — Timeless engineering wisdom for writing better code.
Affiliate links. We earn a small commission at no extra cost to you.
Free Weekly Newsletter
🚀 Don’t Miss the Next Cheat Code
Join 1,000+ senior developers getting expert JS, Python, AWS and system design secrets weekly.
Discover more from CheatCoders
Subscribe to get the latest posts sent to your email.
