Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions src/mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include "utils/buffer.h"

#include <pthread.h>
#include <time.h>
#include <errno.h>
#include <hiredis.h>
#ifndef EXECUTION_DEFAULT_MAX_IDLE_MS
#define EXECUTION_DEFAULT_MAX_IDLE_MS 5000
Expand Down Expand Up @@ -1408,6 +1410,74 @@ void MR_ExecutionSetMaxIdle(Execution* e, size_t maxIdle) {
e->timeoutMS = maxIdle;
}

/* Drain LibMR background threads to a safe point before fork(): a thread holding a
* libc lock (e.g. the allocator lock) at fork() would leave the child holding a locked
* mutex with no owner and deadlock it. MR_DrainForFork() parks the event-loop thread
* (between tasks, so it stops dispatching and isn't holding the GIL) and waits for the
* worker pool to go idle; MR_ResumeAfterFork() releases it after FORK_CHILD_BORN/CANCELLED. */
#define MR_FORK_DRAIN_TIMEOUT_MS 2000

static pthread_mutex_t mr_forkDrainLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t mr_forkDrainCond = PTHREAD_COND_INITIALIZER;
static int mr_forkElParked = 0;
static int mr_forkElRelease = 0;

/* Runs on the event-loop thread between tasks (a safe point); parks it until resumed. */
static void MR_ForkParkElThread(void* ctx) {
REDISMODULE_NOT_USED(ctx);
pthread_mutex_lock(&mr_forkDrainLock);
mr_forkElParked = 1;
pthread_cond_broadcast(&mr_forkDrainCond);

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use the already-existing sync. functions in utils?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also: the broadcasting back and forth between ml and worker threads (with its extra locks and hidden locks such that in mr_thpool_num_jobs_in_queue()) pause a threat of increasing latencies for all commands. I would really like a much simpler solution, if possible.

while (!mr_forkElRelease) {
pthread_cond_wait(&mr_forkDrainCond, &mr_forkDrainLock);
}
mr_forkElParked = 0;
pthread_mutex_unlock(&mr_forkDrainLock);
}

void MR_DrainForFork(void) {
if (!mrCtx.executionsThreadPool) return;

struct timespec deadline;
clock_gettime(CLOCK_REALTIME, &deadline);
deadline.tv_sec += MR_FORK_DRAIN_TIMEOUT_MS / 1000;
deadline.tv_nsec += (MR_FORK_DRAIN_TIMEOUT_MS % 1000) * 1000000L;
if (deadline.tv_nsec >= 1000000000L) { deadline.tv_sec++; deadline.tv_nsec -= 1000000000L; }

/* Park the event-loop thread between tasks so it stops dispatching work. */
pthread_mutex_lock(&mr_forkDrainLock);
mr_forkElParked = 0;
mr_forkElRelease = 0;
pthread_mutex_unlock(&mr_forkDrainLock);

MR_EventLoopAddTask(MR_ForkParkElThread, NULL);

pthread_mutex_lock(&mr_forkDrainLock);
while (!mr_forkElParked) {
if (pthread_cond_timedwait(&mr_forkDrainCond, &mr_forkDrainLock, &deadline) == ETIMEDOUT)
break;
}
int parked = mr_forkElParked;
pthread_mutex_unlock(&mr_forkDrainLock);

/* With the event loop parked no new work is dispatched; wait for the worker pool
* to go idle via its own idle signal. */
int idle = mr_thpool_wait_timeout(mrCtx.executionsThreadPool, MR_FORK_DRAIN_TIMEOUT_MS);

if (!parked || !idle)
RedisModule_Log(mr_staticCtx, "warning",
"MR_DrainForFork: not fully quiesced before fork (el_parked=%d, pool_idle=%d)",
parked, idle);
}

void MR_ResumeAfterFork(void) {
if (!mrCtx.executionsThreadPool) return;
pthread_mutex_lock(&mr_forkDrainLock);
mr_forkElRelease = 1;
pthread_cond_broadcast(&mr_forkDrainCond);
pthread_mutex_unlock(&mr_forkDrainLock);
}

void MR_Run(Execution* e) {
/* take ownership on the execution */
__atomic_add_fetch(&e->refCount, 1, __ATOMIC_RELAXED);
Expand Down
8 changes: 8 additions & 0 deletions src/mr.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,14 @@ LIBMR_API bool MR_IsInternalCommandsExecution(const Execution* e);
/* Free the given execution */
LIBMR_API void MR_FreeExecution(Execution* e);

/* MOD-15307: drain LibMR background threads (worker pool + event-loop thread) to a safe,
* lock-free point before fork(), so the forked child does not inherit a libc lock held by a
* LibMR thread (ghost-lock). Bounded (a few seconds), then proceeds anyway (fail-open). Call
* on the main thread from the FORK_CHILD_PRE module event; pair every call with exactly one
* MR_ResumeAfterFork() (on FORK_CHILD_BORN, or if the fork was cancelled). */
LIBMR_API void MR_DrainForFork(void);
LIBMR_API void MR_ResumeAfterFork(void);

/* Initialize mr library */
LIBMR_API int MR_Init(struct RedisModuleCtx* ctx, size_t numThreads, char *password);

Expand Down
20 changes: 20 additions & 0 deletions src/utils/thpool.c
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,26 @@ void mr_thpool_wait(mr_thpool_* thpool_p) {
pthread_mutex_unlock(&thpool_p->thcount_lock);
}

/* Like mr_thpool_wait but bounded; returns 1 if the pool went idle, 0 on timeout. */
int mr_thpool_wait_timeout(mr_thpool_* thpool_p, long timeout_ms) {
struct timespec deadline;
clock_gettime(CLOCK_REALTIME, &deadline);
deadline.tv_sec += timeout_ms / 1000;
deadline.tv_nsec += (timeout_ms % 1000) * 1000000L;
if (deadline.tv_nsec >= 1000000000L) { deadline.tv_sec++; deadline.tv_nsec -= 1000000000L; }

int idle = 1;
pthread_mutex_lock(&thpool_p->thcount_lock);
while (thpool_p->jobqueue.len || thpool_p->num_threads_working) {
if (pthread_cond_timedwait(&thpool_p->threads_all_idle, &thpool_p->thcount_lock, &deadline) == ETIMEDOUT) {
idle = 0;
break;
}
}
pthread_mutex_unlock(&thpool_p->thcount_lock);
return idle;
}

/* Destroy the threadpool */
void mr_thpool_destroy(mr_thpool_* thpool_p) {
/* No need to destory if it's NULL */
Expand Down
6 changes: 6 additions & 0 deletions src/utils/thpool.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ int mr_thpool_add_work(mr_threadpool, void (*function_p)(void*), void* arg_p);
*/
void mr_thpool_wait(mr_threadpool);

/**
* @brief Like mr_thpool_wait, bounded by timeout_ms.
* @return 1 if the pool went idle, 0 on timeout.
*/
int mr_thpool_wait_timeout(mr_threadpool, long timeout_ms);


/**
* @brief Pauses all threads immediately
Expand Down
Loading