CUSTOM SHARD MANAGER
┌───────────────────────────────────────────────┐
│ spawn(totalShards) │
│ └─ SpawnQueue { concurrency:1, delay:5500ms } │
│ └─ fork child_process (shard_worker.js) │
│ └─ IDENTIFY token + shard [n/N] │
│ │
│ Health Monitor (every 30s) │
│ ├─ IPC ping → each shard via broadcastEval │
│ ├─ receive: { ws_ping, guilds, memory } │
│ ├─ ws_ping > 500ms → emit shardDegraded │
│ └─ no response 10s → respawn(shardId) │
│ │
│ respawn(shardId) │
│ ├─ send IPC { type: DRAIN } │
│ ├─ wait DRAIN_ACK or timeout 3s │
│ ├─ SIGTERM child process │
│ └─ re-enqueue in SpawnQueue │
└───────────────────────────────────────────────┘
// shard_manager.js
import { ShardingManager } from "discord.js";
import PQueue from "p-queue";
class CustomShardingManager extends ShardingManager {
constructor(file, options) {
super(file, { ...options, respawn: false });
this.spawnQueue = new PQueue({ concurrency: 1 });
this.healthMap = new Map();
this.startHealthMonitor();
}
spawnShard(shardId) {
return this.spawnQueue.add(async () => {
// IDENTIFY ratelimit: 1 per 5000ms
await sleep(5500);
const shard = await this.createShard(shardId);
shard.on("ready", () => this.onShardReady(shardId));
shard.on("death", () => this.onShardDeath(shardId));
shard.on("message", (m) => this.handleIPC(shardId, m));
return shard;
});
}
startHealthMonitor() {
setInterval(async () => {
for (const [id, shard] of this.shards) {
try {
const h = await Promise.race([
shard.eval("() => ({ ping: client.ws.ping, guilds: client.guilds.cache.size })"),
rejectAfter(10_000)
]);
this.healthMap.set(id, { ...h, ts: Date.now() });
if (h.ping > 500) this.emit("shardDegraded", id, h);
} catch { await this.respawnShard(id); }
}
}, 30_000);
}
async respawnShard(shardId) {
const shard = this.shards.get(shardId);
await Promise.race([
shard.send({ op: "DRAIN" })
.then(() => new Promise(r => shard.once("DRAIN_ACK", r))),
sleep(3000)
]);
shard.process.kill("SIGTERM");
await sleep(500);
await this.spawnShard(shardId);
}
}
WS Receive Buffer ── buffer_drain_lag <1ms
|
JSON.parse() ── parse_time/event (erlpack 2x faster)
|
Intent Filter ── events_dropped_% (tune via Intents)
|
Cache Update ── cache_write_time (GC pauses, alloc/s)
|
EventEmitter ── listener_count (sync blocking risk)
|
App Handler ── handler_time_p99 (your logic cost)
|
event_loop_lag ──── setImmediate delta every 500ms
Tools: perf_hooks | clinic.js flame | Prometheus p99
// bottleneck_profiler.js
import { performance, PerformanceObserver } from "node:perf_hooks";
// Track raw WS receive before discord.js processes it
client.ws.on("raw", (packet) => {
if (!packet.t) return;
recvMap.set(packet.t + ":" + (packet.d?.id ?? ""), performance.now());
});
// Event-loop lag — fires every 500ms
setInterval(() => {
const t = Date.now();
setImmediate(() => {
const lag = Date.now() - t;
metrics.gauge("event_loop_lag_ms", lag);
if (lag > 100) logger.warn("Loop lag: " + lag + "ms");
});
}, 500);
// Cache write contention monitor
const _orig = client.guilds.cache.set.bind(client.guilds.cache);
client.guilds.cache.set = (k, v) => {
const t0 = performance.now();
const r = _orig(k, v);
metrics.histogram("cache_set_ms", performance.now() - t0);
return r;
};
// GC observer — detect major pauses
new PerformanceObserver((list) => {
for (const e of list.getEntries())
if (e.detail?.kind === 2)
logger.warn("Major GC: " + e.duration.toFixed(1) + "ms");
}).observe({ type: "gc", buffered: false });
DISCONNECT DETECTED
|
Load session from Redis
KEY: session:{shardId}
VALUE: { sessionId, resumeUrl, lastSeq, savedAt }
|
+── found AND (Date.now() - savedAt) < 58000 ?
| |
| RESUME → resumeUrl
| +─ op:9 INVALID_SESSION → fall to IDENTIFY
| +─ op:0 RESUMED → replay from lastSeq ✓
|
+── missing OR expired ?
|
backoff = min(1000 * 2^attempt + rand(0..1000), 30000)
await sleep(backoff)
IDENTIFY → new { sessionId, resumeUrl }
store in Redis TTL 90s
// session_store.js
export class RedisSessionStore {
constructor(redis) { this.r = redis; }
async save(id, data) {
await this.r.set("session:" + id,
JSON.stringify({ ...data, savedAt: Date.now() }), "EX", 90);
}
async load(id) {
const r = await this.r.get("session:" + id);
return r ? JSON.parse(r) : null;
}
}
// reconnect handler
const NON_RESUMABLE = new Set([4004,4010,4011,4013,4014]);
let attempt = 0;
ws.on("close", async (code) => {
if (!NON_RESUMABLE.has(code)) {
const s = await store.load(shardId);
if (s && Date.now() - s.savedAt < 58_000) {
if (await attemptResume(s)) { attempt = 0; return; }
}
}
await sleep(Math.min(1000 * 2 ** attempt++, 30_000) + Math.random() * 1000);
await identify();
});
// Persist session on every sequence update
ws.on("message", (raw) => {
const { s } = JSON.parse(raw);
if (s !== null && s > lastSeq) {
lastSeq = s;
store.save(shardId, { sessionId, resumeUrl, lastSeq: s });
}
});
Query: "Top 100 balance for guild_id = X"
┌─────────────────├──────────────├─────────────┐
│ │ PostgreSQL │ MongoDB │
├─────────────────┼──────────────┼─────────────┘
│ Query plan │ Index-only │ Collection │
│ Heap fetches │ 0 (covering) │ Many │
│ P50 @ 1M rows │ ~2ms │ ~8ms │
│ P99 @ 1M rows │ ~6ms │ ~40ms │
└─────────────────┴──────────────┴─────────────┘
Schema: economy.users
PRIMARY KEY (guild_id, user_id)
INDEX idx_lb_balance ON (guild_id, balance DESC) INCLUDE (user_id)
PARTIAL INDEX idx_active WHERE updated_at > now() - INTERVAL 30d
-- economy_schema.sql
CREATE TABLE economy.users (
guild_id BIGINT NOT NULL,
user_id BIGINT NOT NULL,
balance BIGINT NOT NULL DEFAULT 0,
bank BIGINT NOT NULL DEFAULT 0,
xp BIGINT NOT NULL DEFAULT 0,
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
CONSTRAINT pk_economy PRIMARY KEY (guild_id, user_id),
CONSTRAINT ck_balance CHECK (balance >= 0)
);
-- Covering index: zero heap fetch for leaderboard
CREATE INDEX CONCURRENTLY idx_lb_balance
ON economy.users (guild_id, balance DESC)
INCLUDE (user_id);
-- Partial index: active guilds only, ~60% smaller
CREATE INDEX CONCURRENTLY idx_active
ON economy.users (guild_id)
WHERE updated_at > now() - INTERVAL '30 days';
-- Validate: must show "Index Only Scan", Heap Fetches: 0
EXPLAIN (ANALYZE, BUFFERS)
SELECT user_id, balance
FROM economy.users
WHERE guild_id = $1
ORDER BY balance DESC
LIMIT 100;
READ-ASIDE with SETNX Stampede Prevention
┌─────────────────────────────────────────┐
│ GET redis:bal:{guildId}:{userId} │
│ +─ HIT → return BigInt(value) <0.5ms │
│ +─ MISS → │
│ SETNX lock:bal:.. (5s TTL) │
│ +─ LOCKED: SELECT FROM pg │
│ | SET redis:bal EX 60 │
│ | DEL lock │
│ +─ NO LOCK: sleep(50ms) → retry │
└─────────────────────────────────────────┘
WRITE-THROUGH on mutations:
BEGIN → UPDATE pg → COMMIT → SET redis EX 60
// economy_cache.js
export class EconomyCache {
constructor(redis, pg) {
this.r = redis; this.pg = pg;
this.TTL = 60; this.LOCK = 5;
}
bk(g, u) { return "bal:" + g + ":" + u; }
lk(g, u) { return "lock:" + g + ":" + u; }
async getBalance(g, u) {
const c = await this.r.get(this.bk(g, u));
if (c !== null) return BigInt(c);
const lock = await this.r.set(this.lk(g, u), "1", "NX", "EX", this.LOCK);
if (!lock) { await sleep(50); return this.getBalance(g, u); }
try {
const { rows } = await this.pg.query(
"SELECT balance FROM economy.users WHERE guild_id=$1 AND user_id=$2",
[g, u]
);
const bal = rows[0]?.balance ?? 0n;
await this.r.set(this.bk(g, u), bal.toString(), "EX", this.TTL);
return bal;
} finally { await this.r.del(this.lk(g, u)); }
}
invalidate(g, u) { return this.r.del(this.bk(g, u)); }
async invalidateGuild(g) {
let cur = 0;
do {
const [next, keys] = await this.r.scan(cur, "MATCH", "bal:"+g+":*", "COUNT", 100);
if (keys.length) await this.r.del(...keys);
cur = Number(next);
} while (cur !== 0);
}
}
RACE CONDITION — naive read-modify-write
Thread A Thread B
READ bal=1000 READ bal=1000
check >= 500 ✓ check >= 500 ✓
WRITE bal=500 WRITE bal=500 ← CORRUPT
ATOMIC FIX — single conditional UPDATE
┌────────────────────────────────────────┐
│ UPDATE economy.users │
│ SET balance = balance - $amount │
│ WHERE guild_id = $1 AND user_id = $2 │
│ AND balance >= $amount │
│ RETURNING balance │
│ │
│ rowCount=0 → InsufficientFundsError │
│ rowCount=1 → success, new balance │
│ Both threads race → exactly ONE wins │
└────────────────────────────────────────┘
// economy_service.js — atomic transfer
async transfer(guildId, fromId, toId, amount) {
if (amount <= 0n) throw new RangeError("Amount must be positive");
if (fromId === toId) throw new Error("Cannot self-transfer");
const client = await pool.connect();
try {
await client.query("BEGIN");
// Debit atomically — rowCount=0 means insufficient
const { rowCount } = await client.query(
"UPDATE economy.users SET balance=balance-$1 WHERE guild_id=$2 AND user_id=$3 AND balance>=$1 RETURNING balance",
[amount, guildId, fromId]
);
if (rowCount === 0) {
await client.query("ROLLBACK");
throw new InsufficientFundsError();
}
// Credit via UPSERT (handles first-time users)
await client.query(
"INSERT INTO economy.users(guild_id,user_id,balance) VALUES($2,$3,$1) ON CONFLICT(guild_id,user_id) DO UPDATE SET balance=economy.users.balance+$1",
[amount, guildId, toId]
);
// Append-only audit ledger
await client.query(
"INSERT INTO economy.transactions(guild_id,from_id,to_id,amount,type) VALUES($1,$2,$3,$4,'transfer')",
[guildId, fromId, toId, amount]
);
await client.query("COMMIT");
await Promise.all([cache.invalidate(guildId, fromId), cache.invalidate(guildId, toId)]);
} catch (e) {
if (!(e instanceof InsufficientFundsError)) await client.query("ROLLBACK").catch(() => {});
throw e;
} finally { client.release(); }
}
TOKEN BUDGET ARCHITECTURE
Incoming message
|
Rate limiter (Redis sliding window)
rl:ai:{userId} → max 10 calls/60s
|
Guild daily quota
tokens:{guildId}:{todayUTC} → max 100k/day
|
Context builder
Load channel history from Redis ZSET
tiktoken.encode(messages) → count tokens
Walk backwards, drop oldest until budget fits
|
API call { model: gpt-4o-mini, max_tokens: 800 }
On response: INCRBY actual usage to quota key
// token_manager.js
import { encodingForModel } from "tiktoken";
const enc = encodingForModel("gpt-4o-mini");
const CFG = {
userCallsPerMin: 10, guildDailyTokens: 100_000,
maxCtx: 4_000, maxReply: 800
};
async function buildContext(channelId, msg) {
const hist = (await redis.zrange("ctx:"+channelId, 0, -1))
.map(s => JSON.parse(s));
const all = [...hist, { role:"user", content: msg }];
let budget = CFG.maxCtx; const sel = [];
for (let i = all.length-1; i >= 0; i--) {
const t = enc.encode(all[i].content).length;
if (budget - t < 0) break;
budget -= t; sel.unshift(all[i]);
}
return sel;
}
export async function callAI(guildId, userId, channelId, msg) {
const rl = await redis.incr("rl:ai:"+userId);
if (rl === 1) await redis.expire("rl:ai:"+userId, 60);
if (rl > CFG.userCallsPerMin) throw new RateLimitError();
const qk = "tokens:"+guildId+":"+todayUTC();
const used = Number(await redis.get(qk) ?? 0);
if (used >= CFG.guildDailyTokens) throw new QuotaError();
const messages = await buildContext(channelId, msg);
const res = await openai.chat.completions.create({
model: "gpt-4o-mini", messages, max_tokens: CFG.maxReply
});
const p = redis.pipeline();
p.incrby(qk, res.usage.total_tokens); p.expire(qk, 86400);
await p.exec();
return res.choices[0].message.content;
}
THREE-STAGE MODERATION PIPELINE
[Incoming message]
|
Stage 1: Rule Engine (sync, <1ms)
Regex, slur list, invite link patterns
Match → ACT(severity, source="rules")
| no match
Stage 2: OpenAI Moderation API (FREE, ~100ms)
/v1/moderations → category_scores
Score > threshold → ACT(source="api")
No score > 0.3 → clean → return
| score 0.3–threshold: ambiguous
Stage 3: Context LLM Agent (gpt-4o-mini, ~600ms)
[system: mod policy]
[last 5 channel messages]
[flagged message + score]
→ { severity: 0-4, category, reason }
|
Policy Engine → action(severity) + audit_log
severity 4 → also push to human review queue
// moderation_pipeline.js
const THRESH = { hate:.70, violence:.80, sexual:.85, self_harm:.60 };
const SYS = [
"You are a moderation agent.",
"Analyze the message in its conversation context.",
"Respond ONLY with JSON: {severity:0-4, category:string, reason:string}",
"Severity: 0=clean 1=warn 2=delete 3=timeout 4=ban"
].join(" ");
export async function moderate(message, history) {
// Stage 1: rule engine (synchronous)
const rule = RULES.test(message.content);
if (rule) { await audit(message, rule); return rule; }
// Stage 2: Moderation API (free endpoint)
const { results } = await openai.moderations.create({ input: message.content });
const scores = results[0].category_scores;
let maxSc = 0, maxCat = "";
for (const [c, s] of Object.entries(scores)) {
if (s > maxSc) { maxSc = s; maxCat = c; }
if (s > (THRESH[c] ?? .9)) {
const r = { severity: sev(s), category: c, source: "api" };
await audit(message, r); return r;
}
}
if (maxSc < .3) return { severity: 0, source: "clean" };
// Stage 3: context agent (ambiguous only)
const msgs = [
{ role:"system", content: SYS },
...history.slice(-5).map(m => ({ role:"user", content: m.content })),
{ role:"user", content: "FLAGGED("+maxCat+" "+maxSc.toFixed(2)+"): "+message.content }
];
const res = await openai.chat.completions.create({
model:"gpt-4o-mini", messages:msgs,
max_tokens:120, response_format:{ type:"json_object" }
});
const r = { ...JSON.parse(res.choices[0].message.content), source:"agent" };
if (r.severity >= 4) await reviewQueue.push(message, r);
await audit(message, r);
return r;
}
LAYERED CONTEXT ASSEMBLY (per request, 4000 token budget)
Layer 0: SYSTEM PERSONA (~200 tokens, always included)
Name, tone, server rules, capability scope
Layer 1: USER MEMORY FACTS (max 150 tokens)
SELECT fact FROM user_memories
WHERE user_id=$1 ORDER BY weight DESC LIMIT 5
Layer 2: CHANNEL HISTORY (fills remaining budget)
Redis ZSET → walk backwards → drop oldest to fit
Layer 3: CURRENT MESSAGE
{ role: "user", content: message.content }
After response → extractMemory() (async, non-blocking)
gpt-4o-mini extracts facts → INSERT INTO user_memories
// persona_engine.js
const PERSONA = (guild) =>
"You are the assistant for: " + guild.name + "." +
" Name: Cortex. Tone: direct, technical, concise." +
" Never use filler phrases. You remember users across sessions.";
export class PersonaEngine {
constructor(redis, pg, enc) {
this.r=redis; this.pg=pg; this.enc=enc; this.BUD=4_000;
}
async buildMessages(guild, userId, channelId, msg) {
const out = []; let b = this.BUD;
const sys = PERSONA(guild);
out.push({ role:"system", content:sys });
b -= this.enc.encode(sys).length;
const { rows } = await this.pg.query(
"SELECT fact FROM user_memories WHERE user_id=$1 ORDER BY weight DESC LIMIT 5",
[userId]
);
if (rows.length) {
const m = "[User context]
" + rows.map(r => r.fact).join("
");
out.push({ role:"system", content:m }); b -= this.enc.encode(m).length;
}
const hist = (await this.r.zrange("ctx:"+channelId,0,-1)).map(s => JSON.parse(s));
for (let i=hist.length-1; i>=0; i--) {
const t = this.enc.encode(hist[i].content).length;
if (b - t < 400) break;
b -= t; out.unshift(hist[i]);
}
out.push({ role:"user", content:msg });
return out;
}
async extractMemory(userId, msg) {
const res = await openai.chat.completions.create({
model:"gpt-4o-mini", max_tokens:80,
response_format:{ type:"json_object" },
messages:[{ role:"user",
content:"Extract personal facts from: ""+msg+"". Return {"facts":["..."]} or {"facts":[]}"
}]
});
const { facts } = JSON.parse(res.choices[0].message.content);
for (const f of facts)
await this.pg.query(
"INSERT INTO user_memories(user_id,fact,weight) VALUES($1,$2,1.0) ON CONFLICT DO NOTHING",
[userId, f]
);
}
}
HARDENING COMPARISON
┌─────────────────────├─────────────────┐
│ DEFAULT (insecure) │ HARDENED │
├─────────────────────┼─────────────────┘
│ All Linux caps │ cap_drop: ALL │
│ Root user (UID 0) │ user: 1000:1000 │
│ No process limit │ pids_limit: 256 │
│ No seccomp │ seccomp: custom.json │
│ Unrestricted egress │ iptables FORWARD │
│ Unlimited mem/CPU │ 1g / 1.5 cpus │
└─────────────────────┴─────────────────┘
# docker security config via Wings
security_opt:
- no-new-privileges:true
- seccomp:/etc/docker/seccomp/discord-bot.json
cap_drop:
- ALL
cap_add:
- CHOWN
- SETUID
- SETGID
user: "1000:1000"
mem_limit: "1g"
memswap_limit: "1g"
cpus: "1.5"
pids_limit: 256
tmpfs:
- /tmp:size=128m,noexec,nosuid
networks:
pterodactyl_nw:
driver: bridge
driver_opts:
com.docker.network.bridge.enable_icc: "false"
# iptables on host (rc.local):
# iptables -I FORWARD -s 172.18.0.0/16 -d 192.168.0.0/16 -j DROP
# iptables -I FORWARD -s 172.18.0.0/16 -d 10.0.0.0/8 -j DROP
# Dockerfile:
# RUN addgroup -g 1000 bot && adduser -u 1000 -G bot -D bot
# USER bot
CI/CD PIPELINE — push to main
push to main
|
Job 1: test
checkout → npm ci (cached) → lint → npm test
FAIL → pipeline stops. deploy never runs.
| all pass
Job 2: build
docker buildx build (multi-stage Dockerfile)
tag: ghcr.io/{repo}:{git.sha}
push to GitHub Container Registry
|
Job 3: deploy
Pterodactyl API: POST /power { signal: "restart" }
Health check: GET /health every 5s for 60s
+— 200 OK → SUCCESS
+— timeout → rollback (restart prev SHA)
process.on("SIGTERM") → drain events 10s → exit(0)
# .github/workflows/deploy.yml
name: Build and Deploy
on:
push:
branches: [main]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with: { node-version: "20", cache: "npm" }
- run: npm ci && npm run lint && npm test
build:
needs: test
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: docker/login-action@v3
with:
registry: ghcr.io
username: GITHUB_ACTOR
password: GITHUB_TOKEN (secrets)
- uses: docker/build-push-action@v5
with:
push: true
tags: ghcr.io/REPO:SHA, ghcr.io/REPO:latest
cache-from: type=gha
deploy:
needs: build
runs-on: ubuntu-latest
steps:
- name: Restart via Pterodactyl API
run: |
curl -sf -X POST "PANEL_URL/api/client/servers/ID/power" -H "Authorization: Bearer PTERODACTYL_KEY" -d '{"signal":"restart"}'
- name: Health check with rollback
run: |
for i in {1..12}; do
curl -sf "BOT_URL/health" && exit 0
sleep 5
done
echo "HEALTH CHECK FAILED" && exit 1
LAYERED DEFENSE
[ATTACKER — volumetric flood]
|
Layer 1: Cloudflare Edge
Anycast → absorbs volumetric floods
IP reputation → block malicious ranges
WAF rule: >100 req/min from 1 IP → BLOCK
Browser Integrity Check on web endpoints
| (only legitimate requests pass)
Layer 2: App Rate Limiter (Redis sliding window)
/api/* → 60 req/min/IP
/webhook → 20 req/min/IP
/verify → 5 req/min/IP
|
Layer 3: Pterodactyl Process Isolation
mem:1g / pids:256 / cpus:1.5
OOM-killed before host exhaustion
// rate_limiter.js — Redis sliding window
const LIMITS = {
"/api": { max:60, win:60 },
"/webhook":{ max:20, win:60 },
"/verify": { max:5, win:60 },
};
export async function rateLimit(req, res, next) {
const route = Object.keys(LIMITS).find(r => req.path.startsWith(r));
if (!route) return next();
const { max, win } = LIMITS[route];
// CF-Connecting-IP = real IP behind Cloudflare
const ip = req.headers["cf-connecting-ip"] ?? req.socket.remoteAddress;
const key = "rl:" + ip + ":" + route;
const now = Date.now();
// Sorted set sliding window
const p = redis.pipeline();
p.zremrangebyscore(key, 0, now - win*1000);
p.zadd(key, now, now + ":" + Math.random());
p.zcard(key);
p.expire(key, win+1);
const r = await p.exec();
const count = r[2][1];
res.set("X-RateLimit-Remaining", Math.max(0, max-count));
if (count > max)
return res.status(429).json({ error:"Too many requests" });
next();
}
/* Cloudflare WAF Rule:
(http.request.uri.path matches "^/api")
and (rate.requests.per.minute gt 100)
Action: Block — fires at edge, origin never hit */