Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions pi/skills/control-agent/startup-cleanup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,35 @@ mkdir -p "$BRIDGE_LOG_DIR"
# --- Kill anything holding port 7890, any existing bridge tmux session,
# and any leftover old-style PID-file supervisor.
echo "Cleaning up old bridge..."

# Kill the tmux session first — this stops the restart loop from respawning
# the bridge while we're trying to clean up the port.
tmux kill-session -t "$BRIDGE_TMUX_SESSION" 2>/dev/null || true

# Now gracefully stop any process on the port. SIGTERM lets the bridge close
# the HTTP server and release the port cleanly; SIGKILL is the fallback.
PORT_PIDS=$(lsof -ti :7890 2>/dev/null || true)
if [ -n "$PORT_PIDS" ]; then
echo "Killing processes on port 7890: $PORT_PIDS"
echo "$PORT_PIDS" | xargs kill -9 2>/dev/null || true
sleep 1
echo "Stopping processes on port 7890 (SIGTERM): $PORT_PIDS"
echo "$PORT_PIDS" | xargs kill 2>/dev/null || true
# Wait up to 3s for graceful shutdown
for i in 1 2 3; do
sleep 1
PORT_PIDS=$(lsof -ti :7890 2>/dev/null || true)
[ -z "$PORT_PIDS" ] && break
done
# Force-kill anything that didn't exit
if [ -n "$PORT_PIDS" ]; then
echo "Force-killing stubborn processes: $PORT_PIDS"
echo "$PORT_PIDS" | xargs kill -9 2>/dev/null || true
sleep 1
fi
fi
tmux kill-session -t "$BRIDGE_TMUX_SESSION" 2>/dev/null || true

OLD_PID_FILE="$HOME/.pi/agent/slack-bridge.pid"
if [ -f "$OLD_PID_FILE" ]; then
OLD_PID="$(cat "$OLD_PID_FILE" 2>/dev/null || true)"
[ -n "$OLD_PID" ] && kill -9 "$OLD_PID" 2>/dev/null || true
[ -n "$OLD_PID" ] && kill "$OLD_PID" 2>/dev/null || true
rm -f "$OLD_PID_FILE"
fi

Expand Down Expand Up @@ -151,6 +169,12 @@ tmux new-session -d -s "$BRIDGE_TMUX_SESSION" "\
exit_code=\$?; \
echo \"[\$(date -Is)] bridge: exited with code \$exit_code, restarting in 5s\" >> $BRIDGE_LOG_FILE; \
sleep 5; \
tries=0; \
while lsof -ti :7890 >/dev/null 2>&1 && [ \$tries -lt 10 ]; do \
echo \"[\$(date -Is)] bridge: port 7890 still in use, waiting...\" >> $BRIDGE_LOG_FILE; \
sleep 2; \
tries=\$((tries + 1)); \
done; \
done"

echo "Bridge tmux session: $BRIDGE_TMUX_SESSION"
Expand Down
56 changes: 55 additions & 1 deletion slack-bridge/broker-bridge.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -885,6 +885,36 @@ function getLogLinesForResponse(url) {
return lines;
}

/** Reference to the HTTP server so we can close it on shutdown. */
let apiServer = null;
let shuttingDown = false;

/**
* Graceful shutdown: close the HTTP server (releases the port), then exit.
* Called on SIGTERM/SIGINT so restarts don't fight over the port.
*/
function gracefulShutdown(signal) {
if (shuttingDown) return;
shuttingDown = true;
logInfo(`🛑 received ${signal} — shutting down gracefully`);
if (apiServer) {
apiServer.close(() => {
logInfo("🛑 HTTP server closed, exiting");
process.exit(0);
});
// Force exit after 5s if connections don't drain
setTimeout(() => {
logWarn("🛑 forceful exit after 5s timeout");
process.exit(1);
}, 5000).unref();
} else {
process.exit(0);
}
}

process.on("SIGTERM", () => gracefulShutdown("SIGTERM"));
process.on("SIGINT", () => gracefulShutdown("SIGINT"));

function startApiServer() {
const server = createServer(async (req, res) => {
const url = new URL(req.url, `http://localhost:${API_PORT}`);
Expand Down Expand Up @@ -1024,9 +1054,33 @@ function startApiServer() {
}
});

server.listen(API_PORT, "127.0.0.1", () => {
// Retry with backoff if the port is still held by a dying predecessor.
const MAX_BIND_RETRIES = 5;
const BIND_RETRY_DELAY_MS = 2000;
let bindAttempt = 0;

function tryListen() {
bindAttempt++;
server.listen(API_PORT, "127.0.0.1");
}

server.on("listening", () => {
apiServer = server;
logInfo(`📡 Outbound API listening on http://127.0.0.1:${API_PORT}`);
});

server.on("error", (err) => {
if (err.code === "EADDRINUSE" && bindAttempt < MAX_BIND_RETRIES) {
logWarn(`⚠️ port ${API_PORT} in use, retrying in ${BIND_RETRY_DELAY_MS}ms (attempt ${bindAttempt}/${MAX_BIND_RETRIES})`);
server.close();
setTimeout(tryListen, BIND_RETRY_DELAY_MS);
} else {
Comment on lines 1072 to 1077
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing server.close() before retry
The Node.js documentation recommends calling server.close() before retrying server.listen() after an EADDRINUSE error, to properly reset the server's internal state. While the current code may work in practice (the failed bind doesn't create resources), adding server.close() follows the documented pattern and avoids potential edge cases in future Node.js versions.

Suggested change
server.on("error", (err) => {
if (err.code === "EADDRINUSE" && bindAttempt < MAX_BIND_RETRIES) {
logWarn(`⚠️ port ${API_PORT} in use, retrying in ${BIND_RETRY_DELAY_MS}ms (attempt ${bindAttempt}/${MAX_BIND_RETRIES})`);
setTimeout(tryListen, BIND_RETRY_DELAY_MS);
} else {
if (err.code === "EADDRINUSE" && bindAttempt < MAX_BIND_RETRIES) {
logWarn(`⚠️ port ${API_PORT} in use, retrying in ${BIND_RETRY_DELAY_MS}ms (attempt ${bindAttempt}/${MAX_BIND_RETRIES})`);
server.close();
setTimeout(tryListen, BIND_RETRY_DELAY_MS);
Prompt To Fix With AI
This is a comment left during a code review.
Path: slack-bridge/broker-bridge.mjs
Line: 1072-1076

Comment:
**Missing `server.close()` before retry**
The [Node.js documentation](https://nodejs.org/api/net.html#event-error) recommends calling `server.close()` before retrying `server.listen()` after an EADDRINUSE error, to properly reset the server's internal state. While the current code may work in practice (the failed bind doesn't create resources), adding `server.close()` follows the documented pattern and avoids potential edge cases in future Node.js versions.

```suggestion
    if (err.code === "EADDRINUSE" && bindAttempt < MAX_BIND_RETRIES) {
      logWarn(`⚠️ port ${API_PORT} in use, retrying in ${BIND_RETRY_DELAY_MS}ms (attempt ${bindAttempt}/${MAX_BIND_RETRIES})`);
      server.close();
      setTimeout(tryListen, BIND_RETRY_DELAY_MS);
```

How can I resolve this? If you propose a fix, please make it concise.

logError(`❌ HTTP server error: ${err.message}`);
process.exit(1);
}
});

tryListen();
}

async function startPollLoop() {
Expand Down