memfd_create(2) System Calls Manual memfd_create(2) memfd_create - C (libc, -lc) #define _GNU_SOURCE /* feature_test_macros(7) */ #include int memfd_create(const char *name, unsigned int flags); memfd_create() . , , .. . , . . , memfd_create(), , mmap(2) MAP_ANONYMOUS. , 0. ftruncate(2) ( write(2) ). , name, . /proc/self/fd/. memfd: . - . memfd_create() flags ( OR): MFD_CLOEXEC close-on-exec (FD_CLOEXEC) . O_CLOEXEC open(2) , . MFD_ALLOW_SEALING (sealing) . F_ADD_SEALS F_GET_SEALS fcntl(2), . . , F_SEAL_SEAL, . MFD_HUGETLB ( Linux 4.14) hugetlbfs, , . Linux Documentation/admin-guide/mm/hugetlbpage.rst hugetlbfs. MFD_HUGETLB MFD_ALLOW_SEALING flags Linux 4.16. MFD_HUGE_2MB MFD_HUGE_1GB ... MFD_HUGETLB hugetlb (, 2 , 1 , ) , hugetlb. . , , mmap(2). flags 0. memfd_create() , . (O_RDWR) O_LARGEFILE. fork(2) execve(2) , memfd_create(), . , fork(2), . execve(2), close-on-exec. memfd_create() . -1, errno . EFAULT name. EINVAL flags . EINVAL name ( 249 , null). EINVAL flags MFD_HUGETLB MFD_ALLOW_SEALING . EMFILE . ENFILE . ENOMEM . EPERM The MFD_HUGETLB flag was specified, but the caller was not privileged (did not have the CAP_IPC_LOCK capability) and is not a member of the sysctl_hugetlb_shm_group group; see the description of /proc/sys/vm/sysctl_hugetlb_shm_group in proc(5). Linux. Linux 3.17, glibc 2.27. memfd_create() tmpfs(5), . memfd_create() , , fcntl(2). memfd_create() ( , MFD_ALLOW_SEALING). , tmp open(2) O_TMPFILE , . (file sealing) , , , , , . , . << -- >> ( ). SIGBUS, ( SIGBUS). . , , . : (1) tmpfs(5) memfd_create(). , . (2) , , ftruncate(2), mmap(2) . (3) The first process uses the fcntl(2) F_ADD_SEALS operation to place one or more seals on the file, in order to restrict further modifications on the file. (If placing the seal F_SEAL_WRITE, then it will be necessary to first unmap the shared writable mapping created in the previous step. Otherwise, behavior similar to F_SEAL_WRITE can be achieved by using F_SEAL_FUTURE_WRITE, which will prevent future writes via mmap(2) and write(2) from succeeding while keeping existing shared writable mappings). (4) tmpfs(5) . , : o , memfd_create(), UNIX ( unix(7) cmsg(3)). mmap(2). o fork(2) , , (, , . ). o The second process opens the file /proc/pid/fd/fd, where is the PID of the first process (the one that called memfd_create()), and is the number of the file descriptor returned by the call to memfd_create() in that process. The second process then maps the file using mmap(2). (5) fcntl(2) F_GET_SEALS , . , . , , ( , F_SEAL_SEAL). , memfd_create() . , t_memfd_create.c, tmpfs(5) memfd_create(), , , , . , . , -- , -- , . , t_get_seals.c, , memfd_create(), , . . tmpfs(5) : $ ./t_memfd_create my_memfd_file 4096 sw & [1] 11775 PID: 11775; fd: 3; /proc/11775/fd/3 t_memfd_create . , memfd_create(), /proc/pid/fd, , memfd_create(). , /proc/pid/fd t_get_seals , : $ readlink /proc/11775/fd/3 /memfd:my_memfd_file () $ ./t_get_seals /proc/11775/fd/3 : WRITE SHRINK : t_memfd_create.c #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include int main(int argc, char *argv[]) { int fd; char *name, *seals_arg; ssize_t len; unsigned int seals; if (argc < 3) { fprintf(stderr, "%s name size [seals]\n", argv[0]); fprintf(stderr, "\t'seals' can contain any of the " "following characters:\n"); fprintf(stderr, "\t\tg - F_SEAL_GROW\n"); fprintf(stderr, "\t\ts - F_SEAL_SHRINK\n"); fprintf(stderr, "\t\tw - F_SEAL_WRITE\n"); fprintf(stderr, "\t\tW - F_SEAL_FUTURE_WRITE\n"); fprintf(stderr, "\t\tS - F_SEAL_SEAL\n"); exit(EXIT_FAILURE); } name = argv[1]; len = atoi(argv[2]); seals_arg = argv[3]; /* Create an anonymous file in tmpfs; allow seals to be placed on the file. */ fd = memfd_create(name, MFD_ALLOW_SEALING); if (fd == -1) err(EXIT_FAILURE, "memfd_create"); /* Size the file as specified on the command line. */ if (ftruncate(fd, len) == -1) err(EXIT_FAILURE, "truncate"); printf("PID: %jd; fd: %d; /proc/%jd/fd/%d\n", (intmax_t) getpid(), fd, (intmax_t) getpid(), fd); /* Code to map the file and populate the mapping with data omitted. */ /* If a 'seals' command-line argument was supplied, set some seals on the file. */ if (seals_arg != NULL) { seals = 0; if (strchr(seals_arg, 'g') != NULL) seals |= F_SEAL_GROW; if (strchr(seals_arg, 's') != NULL) seals |= F_SEAL_SHRINK; if (strchr(seals_arg, 'w') != NULL) seals |= F_SEAL_WRITE; if (strchr(seals_arg, 'W') != NULL) seals |= F_SEAL_FUTURE_WRITE; if (strchr(seals_arg, 'S') != NULL) seals |= F_SEAL_SEAL; if (fcntl(fd, F_ADD_SEALS, seals) == -1) err(EXIT_FAILURE, "fcntl"); } /* Keep running, so that the file created by memfd_create() continues to exist. */ pause(); exit(EXIT_SUCCESS); } : t_get_seals.c #define _GNU_SOURCE #include #include #include #include int main(int argc, char *argv[]) { int fd; unsigned int seals; if (argc != 2) { fprintf(stderr, "%s /proc/PID/fd/FD\n", argv[0]); exit(EXIT_FAILURE); } fd = open(argv[1], O_RDWR); if (fd == -1) err(EXIT_FAILURE, "open"); seals = fcntl(fd, F_GET_SEALS); if (seals == -1) err(EXIT_FAILURE, "fcntl"); printf("Existing seals:"); if (seals & F_SEAL_SEAL) printf(" SEAL"); if (seals & F_SEAL_GROW) printf(" GROW"); if (seals & F_SEAL_WRITE) printf(" WRITE"); if (seals & F_SEAL_FUTURE_WRITE) printf(" FUTURE_WRITE"); if (seals & F_SEAL_SHRINK) printf(" SHRINK"); printf("\n"); /* Code to map the file and access the contents of the resulting mapping omitted. */ exit(EXIT_SUCCESS); } fcntl(2), ftruncate(2), memfd_secret(2), mmap(2), shmget(2), shm_open(3) () aereiae , Alexey , Azamat Hackimov , Dmitriy S. Seregin , Dmitry Bolkhovskikh , ITriskTI , Max Is , Yuri Kozlov , ; GNU (GNU General Public License - GPL, 3 ) , - . - , , () () () <>. Linux 6.9.1 15 2024 . memfd_create(2)