futex(2) System Calls Manual futex(2) futex - LIBRARY Standard C library (libc, -lc) #include /* FUTEX_* */ #include /* SYS_* */ #include long syscall(SYS_futex, uint32_t *uaddr, int futex_op, uint32_t val, const struct timespec *timeout, /* or: uint32_t val2 */ uint32_t *uaddr2, uint32_t val3); Note: glibc provides no wrapper for futex(), necessitating the use of syscall(2). futex() . , . . futex() , , . futex() , . A futex is a 32-bit value--referred to below as a futex word--whose address is supplied to the futex() system call. (Futexes are 32 bits in size on all platforms, including 64-bit systems.) All futex operations are governed by this value. In order to share a futex between processes, the futex is placed in a region of shared memory, created using (for example) mmap(2) or shmat(2). (Thus, the futex word may have different virtual addresses in different processes, but these addresses all refer to the same location in physical memory.) In a multithreaded program, it is sufficient to place the futex word in a global variable shared by all threads. , , , , ( futex()) . , , . , . --, , , , --. . (. ., ) . , , -- ( , ). , , . , futex(). futex() , (. ., << >>). , , , , ( ). , , futex(7). , . , ; (. ., ) FUTEX_WAIT, , . uaddr . , . , , futex_op; val, futex_op. (timeout, uaddr2 val3) . , , . timeout timespec, . , , , , . timeout unsigned long, uint32_t. val2, . , , uaddr2 , . , val3, . futex_op : , , , . , futex_op: FUTEX_PRIVATE_FLAG ( Linux 2.6.22) . , (. ., ). . , _PRIVATE, , , FUTEX_PRIVATE_FLAG. , FUTEX_WAIT_PRIVATE, FUTEX_WAKE_PRIVATE . . FUTEX_CLOCK_REALTIME ( Linux 2.6.28) This option bit can be employed only with the FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI, (since Linux 4.5) FUTEX_WAIT, and (since Linux 5.14) FUTEX_LOCK_PI2 operations. , timeout CLOCK_REALTIME. , timeout CLOCK_MONOTONIC. futex_op : FUTEX_WAIT ( Linux 2.6.0) , , uaddr val , , FUTEX_WAKE . (. ., ). , - . , -- . val, EAGAIN. . , - FUTEX_WAKE ( ) FUTEX_WAIT, . timeout NULL, , , ( , , ). CLOCK_MONOTONIC, Linux 4.5 CLOCK_REALTIME, FUTEX_CLOCK_REALTIME futex_op. timeout NULL, . : FUTEX_WAIT timeout . , timeout . FUTEX_WAIT FUTEX_WAIT_BITSET val3 FUTEX_BITSET_MATCH_ANY. uaddr2 val3 . FUTEX_WAKE ( Linux 2.6.0) val , (, FUTEX_WAIT) uaddr. , val 1 ( ), INT_MAX ( ). , - (, , ). timeout, uaddr2 val3 . FUTEX_FD ( Linux 2.6.0 Linux 2.6.25 ) , uaddr. . FUTEX_WAKE , select(2), poll(2) epoll(7). : val , FUTEX_WAKE, , val. timeout, uaddr2 val3 . FUTEX_FD , Linux, 2.6.26. FUTEX_REQUEUE ( Linux 2.6.0) , FUTEX_CMP_REQUEUE ( ), , val3 ( val3 ). FUTEX_CMP_REQUEUE ( Linux 2.6.7) , uaddr val3. , EAGAIN. , val , uaddr. val , - uaddr - uaddr2. val2 , uaddr2. uaddr (. ., ). , val3 - . val 0 1 ( INT_MAX , FUTEX_CMP_REQUEUE FUTEX_WAKE). , val2, , 1 INT_MAX ( 0 , FUTEX_CMP_REQUEUE FUTEX_WAIT). FUTEX_CMP_REQUEUE FUTEX_REQUEUE. , uaddr , , . FUTEX_REQUEUE FUTEX_CMP_REQUEUE << >> , FUTEX_WAKE , . , B, : lock(A) while (!check_value(V)) { unlock(A); block_on(B); lock(A); }; unlock(A); FUTEX_WAKE, , B, , A. , A. , A, A, . FUTEX_WAKE_OP ( Linux 2.6.14) , . -- pthread_cond_signal(3), : , , . FUTEX_WAKE_OP . FUTEX_WAKE_OP , , -, : uint32_t oldval = *(uint32_t *) uaddr2; *(uint32_t *) uaddr2 = oldval op oparg; futex(uaddr, FUTEX_WAKE, val, 0, 0, 0); if (oldval cmp cmparg) futex(uaddr2, FUTEX_WAKE, val2, 0, 0, 0); , FUTEX_WAKE_OP : o uaddr2 uaddr2; -- (. ., ); o val uaddr; o uaddr2, val2 uaddr2. , , val3. , : +---+---+-----------+-----------+ | || | | +---+---+-----------+-----------+ 4 4 12 12 <== - : #define FUTEX_OP(op, oparg, cmp, cmparg) \ (((op & 0xf) << 28) | \ ((cmp & 0xf) << 24) | \ ((oparg & 0xfff) << 12) | \ (cmparg & 0xfff)) op cmp , . oparg cmparg , . op : FUTEX_OP_SET 0 /* uaddr2 = oparg; */ FUTEX_OP_ADD 1 /* uaddr2 += oparg; */ FUTEX_OP_OR 2 /* uaddr2 |= oparg; */ FUTEX_OP_ANDN 3 /* uaddr2 &= ~oparg; */ FUTEX_OP_XOR 4 /* uaddr2 ^= oparg; */ , op (1 << oparg) : FUTEX_OP_ARG_SHIFT 8 /* . (1 << oparg) */ cmp : FUTEX_OP_CMP_EQ 0 /* (oldval == cmparg) -- */ FUTEX_OP_CMP_NE 1 /* (oldval != cmparg) -- */ FUTEX_OP_CMP_LT 2 /* (oldval < cmparg) -- */ FUTEX_OP_CMP_LE 3 /* (oldval <= cmparg) -- */ FUTEX_OP_CMP_GT 4 /* (oldval > cmparg) -- */ FUTEX_OP_CMP_GE 5 /* (oldval >= cmparg) -- */ FUTEX_WAKE_OP -- uaddr uaddr2. FUTEX_WAIT_BITSET ( Linux 2.6.25) FUTEX_WAIT, , val3 32- . , , . FUTEX_WAKE_BITSET. timeout NULL, , , . timeout NULL, . uaddr2 . FUTEX_WAKE_BITSET ( Linux 2.6.25) FUTEX_WAKE, , val3 32- . , , , . <> (. ., val3) , ( <>, FUTEX_WAIT_BITSET). , , ; . FUTEX_WAIT_BITSET FUTEX_WAKE_BITSET , . , , , , , , (. ., <>). FUTEX_BITSET_MATCH_ANY, 32- , val3 FUTEX_WAIT_BITSET FUTEX_WAKE_BITSET. timeout, FUTEX_WAIT FUTEX_WAIT_BITSET val3, FUTEX_BITSET_MATCH_ANY, . FUTEX_WAKE FUTEX_WAKE_BITSET val3, FUTEX_BITSET_MATCH_ANY, () . uaddr2 timeout . - Linux - priority-inheritance (PI), , . , , , . , , . -- , . , - , , , , . , , , - , - ( , ), (, , ) , . PI - ( ) PI-, ( , , PI- IPC). PI-, , , : o , 0. o , ID (TID; gettid(2)), . o , FUTEX_WAITERS; , : FUTEX_WAITERS | TID (, PI- FUTEX_WAITERS) , (, -- cmpxchg x86). -- TID , 0. -- 0, TID. If a futex is already acquired (i.e., has a nonzero value), waiters must employ the FUTEX_LOCK_PI or FUTEX_LOCK_PI2 operations to acquire the lock. If other threads are waiting for the lock, then the FUTEX_WAITERS bit is set in the futex value; in this case, the lock owner must employ the FUTEX_UNLOCK_PI operation to release the lock. , (. ., futex()), RT-, , . RT-, , . , ( , , , 0, , FUTEX_WAITERS). RT- (. ., ) /RT- , RT- . , , , . FUTEX_OWNER_DIED ID . FUTEX_OWNER_DIED , , , - . PI- futex_op , . , PI- : o FUTEX_LOCK_PI, FUTEX_LOCK_PI2 FUTEX_TRYLOCK_PI FUTEX_UNLOCK_PI. FUTEX_UNLOCK_PI , , , EPERM. o FUTEX_WAIT_REQUEUE_PI FUTEX_CMP_REQUEUE_PI. PI- PI- ( EINVAL). , val ( ) 1 ( EINVAL). PI-: FUTEX_LOCK_PI ( Linux 2.6.18) This operation is used after an attempt to acquire the lock via an atomic user-mode instruction failed because the futex word has a nonzero value--specifically, because it contained the (PID-namespace-specific) TID of the lock owner. uaddr. 0, TID . , FUTEX_WAITERS, , 0. : (1) - TID. (2) ( , , RT- RT-. , ). (3) (. ., RT-). , ( SCHED_DEADLINE, SCHED_FIFO SCHED_RR sched(7)). ( SCHED_DEADLINE) ( SCHED_RR SCHED_FIFO). . timeout . timeout NULL, , , , CLOCK_REALTIME. timeout NULL, . uaddr2, val val3 . FUTEX_LOCK_PI2 ( Linux 5.14) This operation is the same as FUTEX_LOCK_PI, except that the clock against which timeout is measured is selectable. By default, the (absolute) timeout specified in timeout is measured against the CLOCK_MONOTONIC clock, but if the FUTEX_CLOCK_REALTIME flag is specified in futex_op, then the timeout is measured against the CLOCK_REALTIME clock. FUTEX_TRYLOCK_PI ( Linux 2.6.18) uaddr. , , 0. , , , (. ., ) (FUTEX_WAITERS / FUTEX_OWNER_DIED). , . , . uaddr2, val, timeout val3 . FUTEX_UNLOCK_PI ( Linux 2.6.18) This operation wakes the top priority waiter that is waiting in FUTEX_LOCK_PI or FUTEX_LOCK_PI2 on the futex address provided by the uaddr argument. , uaddr TID () 0. uaddr2, val, timeout val3 . FUTEX_CMP_REQUEUE_PI ( Linux 2.6.31) PI- FUTEX_CMP_REQUEUE. , FUTEX_WAIT_REQUEUE_PI uaddr, PI- (uaddr) PI- (uaddr2). FUTEX_CMP_REQUEUE, val , uaddr. FUTEX_CMP_REQUEUE_PI val 1 ( << >>). - uaddr - uaddr2. val2 val3 , FUTEX_CMP_REQUEUE. FUTEX_WAIT_REQUEUE_PI ( Linux 2.6.31) PI- uaddr , ( FUTEX_CMP_REQUEUE_PI ) PI- uaddr2. uaddr FUTEX_WAIT. uaddr uaddr2 FUTEX_WAKE . FUTEX_WAIT_REQUEUE_PI EAGAIN. timeout NULL, , , . timeout NULL, . val3 . FUTEX_WAIT_REQUEUE_PI FUTEX_CMP_REQUEUE_PI -- POSIX . , , . FUTEX_WAIT_REQUEUE_PI , FUTEX_CMP_REQUEUE_PI. In the event of an error (and assuming that futex() was invoked via syscall(2)), all operations return -1 and set errno to indicate the error. : FUTEX_WAIT 0 , . , , , (, Pthreads ). , 0 (spurious wake-up), (. . ) . FUTEX_WAKE . FUTEX_FD , . FUTEX_REQUEUE . FUTEX_CMP_REQUEUE , , uaddr2. val, , , uaddr2. FUTEX_WAKE_OP . uaddr uaddr2. FUTEX_WAIT_BITSET 0, . FUTEX_WAIT, . FUTEX_WAKE_BITSET . FUTEX_LOCK_PI 0, . FUTEX_LOCK_PI2 0, . FUTEX_TRYLOCK_PI 0, . FUTEX_UNLOCK_PI 0, . FUTEX_CMP_REQUEUE_PI , , uaddr2. val, , , uaddr2. FUTEX_WAIT_REQUEUE_PI 0, uaddr2. EACCES . EAGAIN (FUTEX_WAIT, FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI) , uaddr, val . : Linux EAGAIN EWOULDBLOCK ( ) . EAGAIN (FUTEX_CMP_REQUEUE, FUTEX_CMP_REQUEUE_PI) , uaddr, val3. EAGAIN (FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_TRYLOCK_PI, FUTEX_CMP_REQUEUE_PI) ID uaddr ( FUTEX_CMP_REQUEUE_PI -- uaddr2) , . . EDEADLK (FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_TRYLOCK_PI, FUTEX_CMP_REQUEUE_PI) uaddr . EDEADLK (FUTEX_CMP_REQUEUE_PI) PI- uaddr2 (deadlock). EFAULT (. ., uaddr, uaddr2 timeout) . EINTR A FUTEX_WAIT or FUTEX_WAIT_BITSET operation was interrupted by a signal (see signal(7)). Before Linux 2.6.22, this error could also be returned for a spurious wakeup; since Linux 2.6.22, this no longer happens. EINVAL futex_op , (timeout), timeout (tv_sec tv_nsec 1000000000). EINVAL The operation specified in futex_op employs one or both of the pointers uaddr and uaddr2, but one of these does not point to a valid object--that is, the address is not four-byte-aligned. EINVAL (FUTEX_WAIT_BITSET, FUTEX_WAKE_BITSET) , val3, . EINVAL (FUTEX_CMP_REQUEUE_PI) uaddr uaddr2 (. ., ). EINVAL (FUTEX_FD) val . EINVAL (FUTEX_WAKE, FUTEX_WAKE_OP, FUTEX_WAKE_BITSET, FUTEX_REQUEUE, FUTEX_CMP_REQUEUE) The kernel detected an inconsistency between the user-space state at uaddr and the kernel state--that is, it detected a waiter which waits in FUTEX_LOCK_PI or FUTEX_LOCK_PI2 on uaddr. EINVAL (FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_TRYLOCK_PI, FUTEX_UNLOCK_PI) uaddr . uaddr, FUTEX_WAIT FUTEX_WAIT_BITSET. EINVAL (FUTEX_CMP_REQUEUE_PI) uaddr2 , , FUTEX_WAIT FUTEX_WAIT_BITSET uaddr2. EINVAL (FUTEX_CMP_REQUEUE_PI) uaddr , , FUTEX_WAIT FUTEX_WAIT_BITSET uaddr. EINVAL (FUTEX_CMP_REQUEUE_PI) The kernel detected an inconsistency between the user-space state at uaddr and the kernel state; that is, the kernel detected a waiter which waits on uaddr via FUTEX_LOCK_PI or FUTEX_LOCK_PI2 (instead of FUTEX_WAIT_REQUEUE_PI). EINVAL (FUTEX_CMP_REQUEUE_PI) , FUTEX_WAIT_REQUEUE_PI . EINVAL (FUTEX_CMP_REQUEUE_PI) val 1. EINVAL . ENFILE (FUTEX_FD) . ENOMEM (FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_TRYLOCK_PI, FUTEX_CMP_REQUEUE_PI) . ENOSYS futex_op . ENOSYS The FUTEX_CLOCK_REALTIME option was specified in futex_op, but the accompanying operation was neither FUTEX_WAIT, FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI, nor FUTEX_LOCK_PI2. ENOSYS (FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_TRYLOCK_PI, FUTEX_UNLOCK_PI, FUTEX_CMP_REQUEUE_PI, FUTEX_WAIT_REQUEUE_PI) , . PI- . EPERM (FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_TRYLOCK_PI, FUTEX_CMP_REQUEUE_PI) uaddr ( FUTEX_CMP_REQUEUE_PI: uaddr2) ( ). EPERM (FUTEX_UNLOCK_PI) , . ESRCH (FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_TRYLOCK_PI, FUTEX_CMP_REQUEUE_PI) uaddr . ESRCH (FUTEX_CMP_REQUEUE_PI) uaddr2 . ETIMEDOUT futex_op , timeout, . Linux. Linux 2.6.0. Linux 2.5.7, , . , , Linux 2.5.40. Linux 2.5.70, Linux 2.6.7. , , : , , : . nloops ( , , 5) , , . : $ ./futex_demo (18534) 0 (18535) 0 (18534) 1 (18535) 1 (18534) 2 (18535) 2 (18534) 3 (18535) 3 (18534) 4 (18535) 4 /* futex_demo.c Usage: futex_demo [nloops] (Default: 5) Demonstrate the use of futexes in a program where parent and child use a pair of futexes located inside a shared anonymous mapping to synchronize access to a shared resource: the terminal. The two processes each write 'num-loops' messages to the terminal and employ a synchronization protocol that ensures that they alternate in writing messages. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include static uint32_t *futex1, *futex2, *iaddr; static int futex(uint32_t *uaddr, int futex_op, uint32_t val, const struct timespec *timeout, uint32_t *uaddr2, uint32_t val3) { return syscall(SYS_futex, uaddr, futex_op, val, timeout, uaddr2, val3); } /* Acquire the futex pointed to by 'futexp': wait for its value to become 1, and then set the value to 0. */ static void fwait(uint32_t *futexp) { long s; const uint32_t one = 1; /* atomic_compare_exchange_strong(ptr, oldval, newval) atomically performs the equivalent of: if (*ptr == *oldval) *ptr = newval; It returns true if the test yielded true and *ptr was updated. */ while (1) { /* Is the futex available? */ if (atomic_compare_exchange_strong(futexp, &one, 0)) break; /* Yes */ /* Futex is not available; wait. */ s = futex(futexp, FUTEX_WAIT, 0, NULL, NULL, 0); if (s == -1 && errno != EAGAIN) err(EXIT_FAILURE, "futex-FUTEX_WAIT"); } } /* Release the futex pointed to by 'futexp': if the futex currently has the value 0, set its value to 1 and then wake any futex waiters, so that if the peer is blocked in fwait(), it can proceed. */ static void fpost(uint32_t *futexp) { long s; const uint32_t zero = 0; /* atomic_compare_exchange_strong() was described in comments above. */ if (atomic_compare_exchange_strong(futexp, &zero, 1)) { s = futex(futexp, FUTEX_WAKE, 1, NULL, NULL, 0); if (s == -1) err(EXIT_FAILURE, "futex-FUTEX_WAKE"); } } int main(int argc, char *argv[]) { pid_t childPid; unsigned int nloops; setbuf(stdout, NULL); nloops = (argc > 1) ? atoi(argv[1]) : 5; /* Create a shared anonymous mapping that will hold the futexes. Since the futexes are being shared between processes, we subsequently use the "shared" futex operations (i.e., not the ones suffixed "_PRIVATE"). */ iaddr = mmap(NULL, sizeof(*iaddr) * 2, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); if (iaddr == MAP_FAILED) err(EXIT_FAILURE, "mmap"); futex1 = &iaddr[0]; futex2 = &iaddr[1]; *futex1 = 0; /* State: unavailable */ *futex2 = 1; /* State: available */ /* Create a child process that inherits the shared anonymous mapping. */ childPid = fork(); if (childPid == -1) err(EXIT_FAILURE, "fork"); if (childPid == 0) { /* Child */ for (unsigned int j = 0; j < nloops; j++) { fwait(futex1); printf("Child (%jd) %u\n", (intmax_t) getpid(), j); fpost(futex2); } exit(EXIT_SUCCESS); } /* Parent falls through to here. */ for (unsigned int j = 0; j < nloops; j++) { fwait(futex2); printf("Parent (%jd) %u\n", (intmax_t) getpid(), j); fpost(futex1); } wait(NULL); exit(EXIT_SUCCESS); } . get_robust_list(2), restart_syscall(2), pthread_mutexattr_getprotocol(3), futex(7), sched(7) : o Documentation/pi-futex.txt o Documentation/futex-requeue-pi.txt o Documentation/locking/rt-mutex.txt o Documentation/locking/rt-mutex-design.txt o Documentation/robust-futex-ABI.txt Franke, H., Russell, R., and Kirwood, M., 2002. Fuss, Futexes and Furwocks: Fast Userlevel Locking in Linux ( Linux 2002 ), Hart, D., 2009. A futex overview and update, Hart, D. and Guniguntala, D., 2009. Requeue-PI: Making glibc Condvars PI-Aware (from proceedings of the 2009 Real-Time Linux Workshop), Drepper, U., 2011. Futexes Are Tricky, futex, futex-*.tar.bz2, Azamat Hackimov , Dmitry Bolkhovskikh , Yuri Kozlov ; GNU 3 , . . , , . Linux man-pages 6.06 31 2023 . futex(2)