Files
shadow/libmisc/idmapping.c
Alejandro Colomar f45498a6c2 libmisc/write_full.c: Improve write_full()
Documentation:

-  Correct the comment documenting the function:

   write_full() doesn't write "up to" count bytes (which is write(2)'s
   behavior, and exactly what this function is designed to avoid), but
   rather exactly count bytes (on success).

-  While fixing the documentation, take the time to add a man-page-like
   comment as in other APIs.  Especially, since we'll have to document
   a few other changes from this patch, such as the modified return
   values.

-  Partial writes are still possible on error.  It's the caller's
   responsibility to handle that possibility.

API:

-  In write(2), it's useful to know how many bytes were transferred,
   since it can have short writes.  In this API, since it either writes
   it all or fails, that value is useless, and callers only want to know
   if it succeeded or not.  Thus, just return 0 or -1.

Implementation:

-  Use `== -1` instead of `< 0` to check for write(2) syscall errors.
   This is wisdom from Michael Kerrisk.  This convention is useful
   because it more explicitly tells maintainers that the only value
   which can lead to that path is -1.  Otherwise, a maintainer of the
   code might be confused to think that other negative values are
   possible.  Keep it simple.

-  The path under `if (res == 0)` was unreachable, since the loop
   condition `while (count > 0)` precludes that possibility.  Remove the
   dead code.

-  Use a temporary variable of type `const char *` to avoid a cast.

-  Rename `res`, which just holds the result from write(2), to `w`,
   which more clearly shows that it's just a very-short-lived variable
   (by it's one-letter name), and also relates itself more to write(2).
   I find it more readable.

-  Move the definition of `w` to the top of the function.  Now that the
   function is significantly shorter, the lifetime of the variable is
   clearer, and I find it more readable this way.

Use:

-  Also use `== -1` to check errors.

Cc: Christian Göttsche <cgzones@googlemail.com>
Cc: Serge Hallyn <serge@hallyn.com>
Signed-off-by: Alejandro Colomar <alx@kernel.org>
2023-08-18 20:35:15 -05:00

226 lines
7.0 KiB
C

/*
* SPDX-FileCopyrightText: 2013 Eric Biederman
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <config.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include "alloc.h"
#include "prototypes.h"
#include "stpeprintf.h"
#include "idmapping.h"
#if HAVE_SYS_CAPABILITY_H
#include <sys/prctl.h>
#include <sys/capability.h>
#endif
#include "shadowlog.h"
struct map_range *get_map_ranges(int ranges, int argc, char **argv)
{
struct map_range *mappings, *mapping;
int idx, argidx;
if (ranges < 0 || argc < 0) {
fprintf(log_get_logfd(), "%s: error calculating number of arguments\n", log_get_progname());
return NULL;
}
if (ranges != ((argc + 2) / 3)) {
fprintf(log_get_logfd(), "%s: ranges: %u is wrong for argc: %d\n", log_get_progname(), ranges, argc);
return NULL;
}
if ((ranges * 3) > argc) {
fprintf(log_get_logfd(), "ranges: %u argc: %d\n",
ranges, argc);
fprintf(log_get_logfd(),
_( "%s: Not enough arguments to form %u mappings\n"),
log_get_progname(), ranges);
return NULL;
}
mappings = CALLOC(ranges, struct map_range);
if (!mappings) {
fprintf(log_get_logfd(), _( "%s: Memory allocation failure\n"),
log_get_progname());
exit(EXIT_FAILURE);
}
/* Gather up the ranges from the command line */
mapping = mappings;
for (idx = 0, argidx = 0; idx < ranges; idx++, argidx += 3, mapping++) {
if (!getulong(argv[argidx + 0], &mapping->upper)) {
free(mappings);
return NULL;
}
if (!getulong(argv[argidx + 1], &mapping->lower)) {
free(mappings);
return NULL;
}
if (!getulong(argv[argidx + 2], &mapping->count)) {
free(mappings);
return NULL;
}
if (ULONG_MAX - mapping->upper <= mapping->count || ULONG_MAX - mapping->lower <= mapping->count) {
fprintf(log_get_logfd(), _( "%s: subuid overflow detected.\n"), log_get_progname());
exit(EXIT_FAILURE);
}
if (mapping->upper > UINT_MAX ||
mapping->lower > UINT_MAX ||
mapping->count > UINT_MAX) {
fprintf(log_get_logfd(), _( "%s: subuid overflow detected.\n"), log_get_progname());
exit(EXIT_FAILURE);
}
if (mapping->lower + mapping->count > UINT_MAX ||
mapping->upper + mapping->count > UINT_MAX) {
fprintf(log_get_logfd(), _( "%s: subuid overflow detected.\n"), log_get_progname());
exit(EXIT_FAILURE);
}
if (mapping->lower + mapping->count < mapping->lower ||
mapping->upper + mapping->count < mapping->upper) {
/* this one really shouldn't be possible given previous checks */
fprintf(log_get_logfd(), _( "%s: subuid overflow detected.\n"), log_get_progname());
exit(EXIT_FAILURE);
}
}
return mappings;
}
/* Number of ascii digits needed to print any unsigned long in decimal.
* There are approximately 10 bits for every 3 decimal digits.
* So from bits to digits the formula is roundup((Number of bits)/10) * 3.
* For common sizes of integers this works out to:
* 2bytes --> 6 ascii estimate -> 65536 (5 real)
* 4bytes --> 12 ascii estimated -> 4294967296 (10 real)
* 8bytes --> 21 ascii estimated -> 18446744073709551616 (20 real)
* 16bytes --> 39 ascii estimated -> 340282366920938463463374607431768211456 (39 real)
*/
#define ULONG_DIGITS (((WIDTHOF(unsigned long) + 9)/10)*3)
#if HAVE_SYS_CAPABILITY_H
static inline bool maps_lower_root(int cap, int ranges, const struct map_range *mappings)
{
int idx;
const struct map_range *mapping;
if (cap != CAP_SETUID)
return false;
mapping = mappings;
for (idx = 0; idx < ranges; idx++, mapping++) {
if (mapping->lower == 0)
return true;
}
return false;
}
#endif
/*
* The ruid refers to the caller's uid and is used to reset the effective uid
* back to the callers real uid.
* This clutch mainly exists for setuid-based new{g,u}idmap binaries that are
* called in contexts where all capabilities other than the necessary
* CAP_SET{G,U}ID capabilities are dropped. Since the kernel will require
* assurance that the caller holds CAP_SYS_ADMIN over the target user namespace
* the only way it can confirm is in this case is if the effective uid is
* equivalent to the uid owning the target user namespace.
* Note, we only support this when a) new{g,u}idmap is not called by root and
* b) if the caller's uid and the uid retrieved via system appropriate means
* (shadow file or other) are identical. Specifically, this does not support
* when the root user calls the new{g,u}idmap binary for an unprivileged user.
* If this is wanted: use file capabilities!
*/
void write_mapping(int proc_dir_fd, int ranges, const struct map_range *mappings,
const char *map_file, uid_t ruid)
{
int idx;
const struct map_range *mapping;
size_t bufsize;
char *buf, *pos, *end;
int fd;
#if HAVE_SYS_CAPABILITY_H
int cap;
struct __user_cap_header_struct hdr = {_LINUX_CAPABILITY_VERSION_3, 0};
struct __user_cap_data_struct data[2] = {{0}};
if (strcmp(map_file, "uid_map") == 0) {
cap = CAP_SETUID;
} else if (strcmp(map_file, "gid_map") == 0) {
cap = CAP_SETGID;
} else {
fprintf(log_get_logfd(), _("%s: Invalid map file %s specified\n"), log_get_progname(), map_file);
exit(EXIT_FAILURE);
}
/* Align setuid- and fscaps-based new{g,u}idmap behavior. */
if (geteuid() == 0 && geteuid() != ruid) {
if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) < 0) {
fprintf(log_get_logfd(), _("%s: Could not prctl(PR_SET_KEEPCAPS)\n"), log_get_progname());
exit(EXIT_FAILURE);
}
if (seteuid(ruid) < 0) {
fprintf(log_get_logfd(), _("%s: Could not seteuid to %d\n"), log_get_progname(), ruid);
exit(EXIT_FAILURE);
}
}
/* Lockdown new{g,u}idmap by dropping all unneeded capabilities. */
memset(data, 0, sizeof(data));
data[0].effective = CAP_TO_MASK(cap);
/*
* When uid 0 from the ancestor userns is supposed to be mapped into
* the child userns we need to retain CAP_SETFCAP.
*/
if (maps_lower_root(cap, ranges, mappings))
data[0].effective |= CAP_TO_MASK(CAP_SETFCAP);
data[0].permitted = data[0].effective;
if (capset(&hdr, data) < 0) {
fprintf(log_get_logfd(), _("%s: Could not set caps\n"), log_get_progname());
exit(EXIT_FAILURE);
}
#endif
bufsize = ranges * ((ULONG_DIGITS + 1) * 3);
pos = buf = XMALLOC(bufsize, char);
end = buf + bufsize;
/* Build the mapping command */
mapping = mappings;
for (idx = 0; idx < ranges; idx++, mapping++) {
/* Append this range to the string that will be written */
pos = stpeprintf(pos, end, "%lu %lu %lu\n",
mapping->upper,
mapping->lower,
mapping->count);
}
if (pos == end || pos == NULL) {
fprintf(log_get_logfd(), _("%s: stpeprintf failed!\n"), log_get_progname());
exit(EXIT_FAILURE);
}
/* Write the mapping to the mapping file */
fd = openat(proc_dir_fd, map_file, O_WRONLY);
if (fd < 0) {
fprintf(log_get_logfd(), _("%s: open of %s failed: %s\n"),
log_get_progname(), map_file, strerror(errno));
exit(EXIT_FAILURE);
}
if (write_full(fd, buf, pos - buf) == -1) {
fprintf(log_get_logfd(), _("%s: write to %s failed: %s\n"),
log_get_progname(), map_file, strerror(errno));
exit(EXIT_FAILURE);
}
close(fd);
free(buf);
}