diff --git a/lib/config.js b/lib/config.js index 04a9ea4..5593813 100644 --- a/lib/config.js +++ b/lib/config.js @@ -175,6 +175,79 @@ function loadOrCreateIdentity(name) { return identity; } +/** + * True if `pid` refers to a live process. `process.kill(pid, 0)` sends no + * signal but throws ESRCH when the process is gone; EPERM means the + * process exists but isn't ours to signal (still alive). Works on POSIX + * and Windows in Node. + * @param {number} pid + * @returns {boolean} + */ +function pidIsAlive(pid) { + if (!Number.isFinite(pid)) return false; + try { + process.kill(pid, 0); + return true; + } catch (e) { + return e.code === 'EPERM'; + } +} + +/** + * Read the holder PID from a node's lockfile, or null if the lockfile is + * absent, unreadable, or has non-numeric content. + * @param {string} name — node name + * @returns {number|null} + */ +function lockHolderPid(name) { + try { + const pid = parseInt( + fs.readFileSync(path.join(nodeDir(name), 'lock.pid'), 'utf8').trim(), + 10, + ); + return Number.isFinite(pid) ? pid : null; + } catch { + return null; // no lockfile / unreadable → treat as free + } +} + +/** + * Resolve an available node name by appending `-2`/`-3`/… when the base is + * already held by a LIVE foreign process. This is the lib-level equivalent + * of the strict `acquireIdentityLock` liveness check, but instead of + * throwing EIDENTITYLOCK on a collision it returns the next free name so a + * second co-resident process can coexist — a duplicate dev agent, two + * sessions sharing a fixed SYM_NODE_NAME, etc. Every SymNode host (the + * `sym` CLI, sym-mesh-channel, sym-swift, custom agents) gets the same + * collision-resilience by resolving the name through here before + * constructing the node, instead of each host reimplementing it. + * + * A base that is free, held by a DEAD process, or held by OUR OWN process + * is returned unchanged — `acquireIdentityLock` reclaims stale/own locks + * on start(). Candidates that would exceed the 64-byte name limit are + * skipped. If every candidate up to `maxSuffix` is a live foreign holder, + * the base is returned unchanged so `acquireIdentityLock` still throws — + * preserving the hard-fail as a backstop rather than overflowing silently. + * + * @param {string} base — desired node name + * @param {object} [opts] + * @param {number} [opts.maxSuffix=64] — highest suffix index to try + * @returns {string} an available name (`base` or `base-N`) + */ +function resolveAvailableName(base, { maxSuffix = 64 } = {}) { + validateName(base); + for (let i = 0; i < maxSuffix; i++) { + const candidate = i === 0 ? base : `${base}-${i + 1}`; + if (Buffer.byteLength(candidate, 'utf8') > 64) continue; // would fail validateName + const holder = lockHolderPid(candidate); + if (holder !== null && holder !== process.pid && pidIsAlive(holder)) { + continue; // live foreign holder → try the next suffix + } + return candidate; // free, ours, or a stale lock acquire will reclaim + } + return base; +} + /** * Acquire an exclusive lock on a node identity. Prevents two processes * from claiming the same nodeId on the same host, which would cause @@ -225,18 +298,9 @@ function acquireIdentityLock(name) { } catch {} }; } - // process.kill(pid, 0) sends no signal but throws ESRCH if the - // process is dead. Works on POSIX and Windows in Node. - let alive = false; - try { - process.kill(holderPid, 0); - alive = true; - } catch (e) { - // ESRCH = no such process; EPERM = exists but we can't signal - // (still alive, just not ours). Both indicate "exists" except ESRCH. - if (e.code === 'EPERM') alive = true; - } - if (alive) { + // Liveness via process.kill(pid, 0) — see pidIsAlive (ESRCH = dead, + // EPERM = alive-but-not-ours). Works on POSIX and Windows in Node. + if (pidIsAlive(holderPid)) { const err = new Error( `Identity '${name}' is already locked by PID ${holderPid}. ` + `Only one SymNode process can hold a given nodeId on a host. ` + @@ -305,6 +369,9 @@ module.exports = { generateSigningKeyPair, loadOrCreateIdentity, normalizeMdnsHostname, + pidIsAlive, + lockHolderPid, + resolveAvailableName, acquireIdentityLock, log, }; diff --git a/lib/node.js b/lib/node.js index 5e1f3d2..bc1d07e 100644 --- a/lib/node.js +++ b/lib/node.js @@ -21,7 +21,7 @@ const net = require('net'); const path = require('path'); const { EventEmitter } = require('events'); const { MeshNode } = require('@sym-bot/core'); -const { nodeDir, loadOrCreateIdentity, acquireIdentityLock, log: logMsg } = require('./config'); +const { nodeDir, loadOrCreateIdentity, acquireIdentityLock, resolveAvailableName, log: logMsg } = require('./config'); const { MemoryStore } = require('./memory-store'); const { encode, DIM, createCMB, renderContent, FIELD_WEIGHT_PROFILES, @@ -54,6 +54,7 @@ class SymNode extends EventEmitter { * @param {string} [opts.relayToken] — relay authentication token * @param {boolean} [opts.relayOnly=false] — skip LAN discovery, relay only * @param {boolean} [opts.silent=false] — suppress log output + * @param {boolean} [opts.autoSuffix=false] — on a same-host name collision, resolve to `-2`/`-3`/… instead of throwing EIDENTITYLOCK, so co-resident processes coexist. Read `node.name` for the resolved value (`node.requestedName` keeps the original). * @param {function} [opts.onSynthesis] — synthesis delegate for xMesh insights * @param {number} [opts.heartbeatInterval=5000] — heartbeat check interval in ms * @param {number} [opts.heartbeatTimeout=15000] — heartbeat timeout in ms @@ -64,7 +65,17 @@ class SymNode extends EventEmitter { if (!opts.name) throw new Error('SymNode requires a name'); this._silent = opts.silent || false; - this.name = opts.name; + // opts.autoSuffix (default off): when two co-resident processes want the + // same name — a duplicate dev agent, or sessions sharing a fixed + // SYM_NODE_NAME — resolve to `-2`/`-3`/… instead of hard-failing + // with EIDENTITYLOCK. The name is resolved BEFORE identity/dir/lock are + // derived below, so all three key off the final coexisting name. Default + // stays strict (throw) so hosts that rely on the lock to detect a + // double-launch keep that behavior. Hosts can read back `node.name` to + // see the resolved value. The base is kept when free, held by a dead + // process, or held by us (acquireIdentityLock reclaims stale/own locks). + this.requestedName = opts.name; + this.name = opts.autoSuffix ? resolveAvailableName(opts.name) : opts.name; this.nodeId = null; // set after identity loaded (see below) this._cognitiveProfile = opts.cognitiveProfile || null; this._moodThreshold = opts.moodThreshold ?? 0.8; diff --git a/tests/config.test.js b/tests/config.test.js index 8981297..ce8e935 100644 --- a/tests/config.test.js +++ b/tests/config.test.js @@ -5,10 +5,11 @@ const assert = require('node:assert'); const fs = require('fs'); const path = require('path'); const os = require('os'); +const { spawn } = require('child_process'); const { SYM_DIR, NODES_DIR, ensureDir, nodeDir, uuidv7, validateName, generateSigningKeyPair, loadOrCreateIdentity, - normalizeMdnsHostname, log, + normalizeMdnsHostname, pidIsAlive, lockHolderPid, resolveAvailableName, log, } = require('../lib/config'); describe('uuidv7', () => { @@ -208,6 +209,104 @@ describe('nodeDir', () => { }); }); +describe('resolveAvailableName', () => { + const base = `test-resolve-${Date.now()}`; + const writeLock = (name, pid) => { + ensureDir(nodeDir(name)); + fs.writeFileSync(path.join(nodeDir(name), 'lock.pid'), String(pid)); + }; + // A real, live, foreign process (our child — alive, pid !== process.pid). + let liveChild; + before(() => { + liveChild = spawn(process.execPath, ['-e', 'setInterval(() => {}, 1000)'], { stdio: 'ignore' }); + }); + after(() => { + if (liveChild) { try { liveChild.kill('SIGKILL'); } catch {} } + for (let i = 1; i <= 4; i++) { + const n = i === 1 ? base : `${base}-${i}`; + fs.rmSync(nodeDir(n), { recursive: true, force: true }); + } + fs.rmSync(nodeDir(`${base}-long`), { recursive: true, force: true }); + }); + + it('returns the base name when no lockfile exists', () => { + assert.strictEqual(resolveAvailableName(base), base); + }); + + it('returns the base name when the holder PID is dead (stale lock)', () => { + writeLock(base, 2147483646); // implausible PID → ESRCH → treated as free + assert.strictEqual(resolveAvailableName(base), base); + }); + + it('returns the base name when the holder is our own process', () => { + writeLock(base, process.pid); + assert.strictEqual(resolveAvailableName(base), base); + }); + + it('suffixes to -2 when the base is held by a live foreign process', () => { + writeLock(base, liveChild.pid); + assert.strictEqual(resolveAvailableName(base), `${base}-2`); + }); + + it('skips multiple live holders to the next free suffix', () => { + writeLock(base, liveChild.pid); + writeLock(`${base}-2`, liveChild.pid); + assert.strictEqual(resolveAvailableName(base), `${base}-3`); + }); + + it('keeps a suffixed slot whose prior holder has died', () => { + writeLock(base, liveChild.pid); // base: live → skip + writeLock(`${base}-2`, 2147483646); // -2: dead → reclaimable + assert.strictEqual(resolveAvailableName(base), `${base}-2`); + }); + + it('does not overflow the 64-byte name limit (skips over-long candidates)', () => { + // A base near the limit: appending "-2" would exceed 64 bytes, so that + // candidate is skipped. With the base held live and no room to suffix, + // it falls back to the base (acquireIdentityLock then hard-fails). + const longBase = 'x'.repeat(63); // 63 bytes; "-2" → 65 bytes, over limit + ensureDir(nodeDir(longBase)); + fs.writeFileSync(path.join(nodeDir(longBase), 'lock.pid'), String(liveChild.pid)); + assert.strictEqual(resolveAvailableName(longBase), longBase); + fs.rmSync(nodeDir(longBase), { recursive: true, force: true }); + }); + + it('validates the base name (throws on invalid input)', () => { + assert.throws(() => resolveAvailableName(''), /non-empty string/); + }); +}); + +describe('pidIsAlive', () => { + it('is true for our own process', () => { + assert.strictEqual(pidIsAlive(process.pid), true); + }); + it('is false for an implausible/dead PID', () => { + assert.strictEqual(pidIsAlive(2147483646), false); + }); + it('is false for non-finite input', () => { + assert.strictEqual(pidIsAlive(NaN), false); + }); +}); + +describe('lockHolderPid', () => { + const name = `test-holder-${Date.now()}`; + after(() => { fs.rmSync(nodeDir(name), { recursive: true, force: true }); }); + + it('returns null when no lockfile exists', () => { + assert.strictEqual(lockHolderPid(name), null); + }); + it('returns the numeric PID when present', () => { + ensureDir(nodeDir(name)); + fs.writeFileSync(path.join(nodeDir(name), 'lock.pid'), '12345'); + assert.strictEqual(lockHolderPid(name), 12345); + }); + it('returns null for non-numeric content', () => { + ensureDir(nodeDir(name)); + fs.writeFileSync(path.join(nodeDir(name), 'lock.pid'), 'not-a-pid'); + assert.strictEqual(lockHolderPid(name), null); + }); +}); + describe('log', () => { it('should not throw', () => { assert.doesNotThrow(() => log('test', 'hello'));