diff --git a/Dockerfile b/Dockerfile index 0183d7eee1bc23bbe17c1e02747931c01eea903c..79dcb52d7353126e9af126fd71495f691673a7e3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,9 +42,13 @@ RUN grep stackmaximaversion ${LIB}/stackmaxima.mac | grep -oP "\d+" >> /opt/maxi useradd -M "maxima-$i"; \ done +RUN useradd -M "goemaxima-nobody" + # Add go webserver COPY ./bin/web ${BIN}/goweb +RUN chmod 0700 "${BIN}/goweb" "${BIN}/maxima-optimised" + ENV GOEMAXIMA_LIB_PATH=/opt/maxima/assets/maximalocal.mac ENV GOEMAXIMA_NUSER=$MAX_USER RUN sh -c 'echo $GOEMAXIMA_NUSER' @@ -56,4 +60,4 @@ HEALTHCHECK --interval=1m --timeout=3s CMD curl -f 'http://localhost:8080/goemax # clear tmp because when kubernetes restarts a pod, it keeps the /tmp content even if it's tmpfs, # which means that on a restart caused by an overfull tmpfs, it will keep restarting in a loop -CMD cd /tmp && rm --one-file-system -rf * && exec tini ${BIN}/goweb ${BIN}/maxima-optimised +CMD cd /tmp && rm --one-file-system -rf * && exec tini ${BIN}/goweb ${BIN}/maxima-optimised || echo oh no no >&2 diff --git a/buildscript.sh b/buildscript.sh index 4c813ac0a3a31eb11814b753df9fe185fb734c8c..80d97012ddaa7fa93c55ede1342cd27330ddbbaa 100644 --- a/buildscript.sh +++ b/buildscript.sh @@ -12,7 +12,7 @@ if [ $SBCL_ARCH = amd64 ]; then fi apt-get update -apt-get install -y bzip2 make wget python3 gcc texinfo curl +apt-get install -y bzip2 make wget python3 gcc texinfo curl libcap2-bin mkdir -p ${SRC} wget https://sourceforge.net/projects/maxima/files/Maxima-source/${MAXIMA_VERSION}-source/maxima-${MAXIMA_VERSION}.tar.gz -O ${SRC}/maxima-${MAXIMA_VERSION}.tar.gz diff --git a/docker-compose.yml b/docker-compose.yml index 52f709ddf4177baaea14ecaeb7cd328384455393..537495d62d8df397cfc7143b656ca67d8604008b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,11 +8,8 @@ services: - "/tmp" restart: unless-stopped cap_add: - - CHOWN - - DAC_OVERRIDE - SETGID - SETUID - - KILL cap_drop: - ALL read_only: true diff --git a/helmmaxima/values.yaml b/helmmaxima/values.yaml index ebdf182a4bf425d634edff2a54715b380df9b64c..b56de385148fc66c165df21037055bd717929a70 100644 --- a/helmmaxima/values.yaml +++ b/helmmaxima/values.yaml @@ -27,11 +27,8 @@ podSecurityContext: {} securityContext: capabilities: add: - - CHOWN - - DAC_OVERRIDE - SETGID - SETUID - - KILL drop: - ALL readOnlyRootFilesystem: true diff --git a/src/maxima_fork.c b/src/maxima_fork.c index 3fa1164135426139eae5270db7f0defdf86e663f..d95e06fd540edeeaa69d6d8335745bd0a4229f09 100644 --- a/src/maxima_fork.c +++ b/src/maxima_fork.c @@ -125,6 +125,29 @@ char *fork_new_process() { continue; } + uid_t uid = user_id[slot - 1]; + gid_t gid = group_id[slot - 1]; + // note: setgid should be executed before setuid when dropping from root + if (setgid(gid) == -1) { + perror("Could not set gid"); + ret = NULL; + break; + } + + // remove all aux groups + if (setgroups(0, NULL)) { + perror("Could not remove aux groups"); + ret = NULL; + break; + } + + // after this, we should be non-root + if (setuid(uid) == -1) { + perror("Could not set uid"); + ret = NULL; + break; + } + if (chdir(tempdir) == -1) { perror("Could not chdir to temporary directory"); ret = NULL; @@ -157,36 +180,13 @@ char *fork_new_process() { break; } - uid_t uid = user_id[slot - 1]; - gid_t gid = group_id[slot - 1]; - // note: setgid should be executed before setuid when dropping from root - if (setgid(gid) == -1) { - perror("Could not set gid"); - ret = NULL; - break; - } - - // remove all aux groups - if (setgroups(0, NULL)) { - perror("Could not remove aux groups"); - ret = NULL; - break; - } - - // after this, we should be non-root - if (setuid(uid) == -1) { - perror("Could not set uid"); - ret = NULL; - break; - } - // create temporary folders and files - if (mkdir("output", 0770) == -1) { + if (mkdir("output", 0755) == -1) { perror("Could not create output directory"); ret = NULL; break; } - if (mkdir("work", 0770) == -1) { + if (mkdir("work", 0755) == -1) { perror("Could not create work directory"); ret = NULL; break; diff --git a/src/web/drop_privilege_linux.go b/src/web/drop_privilege_linux.go new file mode 100644 index 0000000000000000000000000000000000000000..21810084f6e54f06b4cbfa7f745bf956fe43ae7b --- /dev/null +++ b/src/web/drop_privilege_linux.go @@ -0,0 +1,141 @@ +package main + +// This file drops the privileges of a single thread and executes functions in it. +// The effective uid is set to that of the maxima user we want to interact with. +// The real uid is set to goemaxima-nobody, which ensures that kill -1 does not +// kill the maxima mother process. If the real uid was the maxima user, an evil +// maxima process might ptrace our own process and use that to escalate privileges, +// or send signals or interact in another way. + +import ( + "fmt" + "os/user" + "runtime" + "strconv" + + "golang.org/x/sys/unix" +) + +const goemaxima_nobody string = "goemaxima-nobody" + +type PrivilegeDropper struct { + server_uid uint16 + server_gid uint16 + nobody_uid uint16 + nobody_gid uint16 + execution_getter <-chan ExecutionInfo +} + +type ExecutionInfo struct { + Uid uint16 + Gid uint16 + F func(error) +} + +func (dropper *PrivilegeDropper) thread_drop_euid(uid uint16) error { + // These functions use the raw syscall interface since unix/posix usually regards setresuid to set the UIDs of the whole process + // (usually implemented through some signal handlers) but we absolutely don't want that. + _, _, errno := unix.Syscall(unix.SYS_SETRESUID, uintptr(dropper.nobody_uid), uintptr(uid), uintptr(dropper.server_uid)) + if errno != 0 { + return errno + } + return nil +} + +func (dropper *PrivilegeDropper) thread_restore_euid() { + // since we still have our saved-set-uid, we can restore our uids + _, _, errno := unix.Syscall(unix.SYS_SETRESUID, uintptr(dropper.server_uid), uintptr(dropper.server_uid), uintptr(dropper.server_uid)) + if errno != 0 { + panic(errno) + } +} + +func (dropper *PrivilegeDropper) thread_drop_egid(gid uint16) error { + _, _, errno := unix.Syscall(unix.SYS_SETRESGID, uintptr(dropper.nobody_gid), uintptr(gid), uintptr(dropper.server_gid)) + if errno != 0 { + return errno + } + return nil +} + +func (dropper *PrivilegeDropper) thread_restore_egid() { + _, _, errno := unix.Syscall(unix.SYS_SETRESGID, uintptr(dropper.server_gid), uintptr(dropper.server_gid), uintptr(dropper.server_gid)) + if errno != 0 { + panic(errno) + } +} + +func (dropper *PrivilegeDropper) run_as_user(uid, gid uint16, f func(error)) { + if err := dropper.thread_drop_egid(gid); err != nil { + f(err) + return + } + defer dropper.thread_restore_egid() + + if err := dropper.thread_drop_euid(uid); err != nil { + f(err) + return + } + defer dropper.thread_restore_euid() + + f(nil) +} + +func (dropper *PrivilegeDropper) run() { + // since goroutines may switch between machine threads at any time, + // we lock this routine to always stay on the same machine thread + runtime.LockOSThread() + defer runtime.UnlockOSThread() + for ex := range dropper.execution_getter { + dropper.run_as_user(ex.Uid, ex.Gid, ex.F) + } +} + +func InitializeDropper(execution_channel chan ExecutionInfo) error { + server, err := user.Current() + if err != nil { + return err + } + + server_uid, err := strconv.Atoi(server.Uid) + if err != nil { + return err + } + + server_gid, err := strconv.Atoi(server.Gid) + if err != nil { + return err + } + + nobody, err := user.Lookup(goemaxima_nobody) + if err != nil { + return fmt.Errorf("no %s user found, please make sure it exists and is not the user used by the server; error: %s", goemaxima_nobody, err) + } + + nobody_uid, err := strconv.Atoi(nobody.Uid) + if err != nil { + return err + } + + nobody_gid, err := strconv.Atoi(nobody.Gid) + if err != nil { + return err + } + + if nobody_uid == server_uid { + return fmt.Errorf("server uid %d is same as %s uid %d. Please make sure that the server does not run as %s", + server_uid, goemaxima_nobody, nobody_uid, goemaxima_nobody) + } + + dropper := PrivilegeDropper{ + server_uid: uint16(server_uid), + server_gid: uint16(server_gid), + nobody_uid: uint16(nobody_uid), + nobody_gid: uint16(nobody_gid), + execution_getter: execution_channel, + } + + go dropper.run() + + return nil +} diff --git a/src/web/web.go b/src/web/web.go index 7091f17df1d7e945034c7baa6ffc193485b958a7..a27ed4f045e6c7fe04d30034ef5985c966f88c99 100644 --- a/src/web/web.go +++ b/src/web/web.go @@ -25,6 +25,9 @@ import ( var tmp_prefix string var debug uint +var privilege_drop_channel chan<- ExecutionInfo + +const MAXIMA_SPAWN_TIMEOUT = time.Duration(10) * time.Second type User struct { Id uint @@ -33,6 +36,29 @@ type User struct { Gid int } +func (user *User) go_execute_as(f func(error)) { + uid16 := uint16(user.Uid) + gid16 := uint16(user.Gid) + privilege_drop_channel <- ExecutionInfo{ + Uid: uid16, + Gid: gid16, + F: f, + } +} + +func (user *User) sync_execute_as(timeout time.Duration, f func(error) error) error { + err_chan := make(chan error) + user.go_execute_as(func(err error) { + err_chan <- f(err) + }) + select { + case result := <-err_chan: + return result + case <-time.After(timeout): + return fmt.Errorf("timed out executing function with lowered privileges") + } +} + type Metrics struct { ResponseTime prometheus.Histogram SpawnTime prometheus.Histogram @@ -126,64 +152,80 @@ func new_mother_proc(binpath string, libs []string) (*MotherProcess, error) { func (p *MotherProcess) spawn_new(user *User) (*ChildProcess, float64, error) { start := time.Now() - tmp_dir, err := ioutil.TempDir(tmp_prefix, "maxima-plot-") - if err != nil { - return nil, 0.0, fmt.Errorf("unable to create temp dir: %s", err) - } - // right permission for folders - err = os.Chown(tmp_dir, user.Uid, user.Gid) - if err != nil { - return nil, 0.0, fmt.Errorf("not able to change tempdir permission: %s", err) - } + var result ChildProcess + err := user.sync_execute_as(MAXIMA_SPAWN_TIMEOUT, func(err error) error { + if err != nil { + return err + } + tmp_dir, err := ioutil.TempDir(tmp_prefix, "maxima-plot-") + if err != nil { + return fmt.Errorf("unable to create temp dir: %s", err) + } - // create named pipe for process stdout - pipe_name_out := filepath.Clean(filepath.Join(tmp_dir, "outpipe")) + err = os.Chmod(tmp_dir, 0755) + if err != nil { + return fmt.Errorf("unable to change permissions of temp dir: %s", err) + } - err = syscall.Mkfifo(pipe_name_out, 0600) - if err != nil { - return nil, 0.0, fmt.Errorf("could not create named pipe in temp folder: %s", err) - } + // create named pipe for process stdout + pipe_name_out := filepath.Clean(filepath.Join(tmp_dir, "outpipe")) - // create named pipe for process stdin - pipe_name_in := filepath.Clean(filepath.Join(tmp_dir, "inpipe")) + err = syscall.Mkfifo(pipe_name_out, 0600) + if err != nil { + return fmt.Errorf("could not create named pipe in temp folder: %s", err) + } - err = syscall.Mkfifo(pipe_name_in, 0600) - if err != nil { - return nil, 0.0, fmt.Errorf("could not create named pipe in temp folder: %s", err) - } + // create named pipe for process stdin + pipe_name_in := filepath.Clean(filepath.Join(tmp_dir, "inpipe")) - _, err = fmt.Fprintf(p.Input, "%d%s\n", user.Id, tmp_dir) - if err != nil { - return nil, 0.0, fmt.Errorf("unable to communicate with process: %s", err) - } + err = syscall.Mkfifo(pipe_name_in, 0600) + if err != nil { + return fmt.Errorf("could not create named pipe in temp folder: %s", err) + } - debugf("Debug: Opening pipes to child") - // note: open outpipe before inpipe to avoid deadlock - out, err := os.OpenFile(pipe_name_out, os.O_RDONLY, os.ModeNamedPipe) + _, err = fmt.Fprintf(p.Input, "%d%s\n", user.Id, tmp_dir) + if err != nil { + return fmt.Errorf("unable to communicate with process: %s", err) + } + + debugf("Debug: Opening pipes to child") + // note: open outpipe before inpipe to avoid deadlock + out, err := os.OpenFile(pipe_name_out, os.O_RDONLY, os.ModeNamedPipe) + if err != nil { + return fmt.Errorf("could not open temp dir outpipe: %s", err) + } + + in, err := os.OpenFile(pipe_name_in, os.O_WRONLY, os.ModeNamedPipe) + if err != nil { + return fmt.Errorf("could not open temp dir inpipe: %s", err) + } + + result.User = user + result.Input = in + result.Outfile = out + result.TempDir = tmp_dir + return nil + }) if err != nil { - return nil, 0.0, fmt.Errorf("could not open temp dir outpipe: %s", err) + return nil, 0.0, fmt.Errorf("unable to start child process: %s", err) } - in, err := os.OpenFile(pipe_name_in, os.O_WRONLY, os.ModeNamedPipe) + debugf("Debug: Attempting to read from child") + err = result.Outfile.SetReadDeadline(time.Now().Add(MAXIMA_SPAWN_TIMEOUT)) if err != nil { - return nil, 0.0, fmt.Errorf("could not open temp dir inpipe: %s", err) + return nil, 0.0, fmt.Errorf("unable to set read deadline: %s", err) } - - out.SetReadDeadline(time.Now().Add(10 * time.Second)) - bufout := bufio.NewReader(out) + bufout := bufio.NewReader(result.Outfile) _, err = fmt.Fscanf(bufout, "\nT") if err != nil { return nil, 0.0, fmt.Errorf("not able to find end marker: %s", err) } + result.Output = bufout + total := time.Since(start) + total_ms := float64(total.Microseconds()) / 1000 debugf("Debug: child took %s for startup", total) - return &ChildProcess{ - User: user, - Input: in, - Output: bufout, - Outfile: out, - TempDir: tmp_dir, - }, float64(total.Microseconds()) / 1000, nil + return &result, total_ms, nil } type MaximaResponse struct { @@ -198,15 +240,22 @@ func (p *ChildProcess) eval_command(command string, timeout uint64) MaximaRespon in_err := make(chan error, 1) // write to stdin in separate goroutine to prevent deadlocks go func() { - p.Input.SetWriteDeadline(time.Now().Add(time.Duration(timeout+10) * time.Millisecond)) - _, err := io.Copy(p.Input, strings.NewReader(command)) + err := p.Input.SetWriteDeadline(time.Now().Add(time.Duration(timeout+10) * time.Millisecond)) + if err != nil { + in_err <- err + return + } + _, err = io.Copy(p.Input, strings.NewReader(command)) p.Input.Close() in_err <- err }() var outbuf bytes.Buffer // read from stdout - p.Outfile.SetReadDeadline(time.Now().Add(time.Duration(timeout+10) * time.Millisecond)) - _, err := io.Copy(&outbuf, p.Output) + err := p.Outfile.SetReadDeadline(time.Now().Add(time.Duration(timeout+10) * time.Millisecond)) + if err != nil { + return MaximaResponse{nil, 0.0, err} + } + _, err = io.Copy(&outbuf, p.Output) p.Outfile.Close() input_err := <-in_err if input_err != nil { @@ -222,29 +271,36 @@ func (p *ChildProcess) eval_command(command string, timeout uint64) MaximaRespon // kills all processes of user and remove temporary directories func process_cleanup(user *User, user_queue chan<- *User, tmp_dir string) { - defer os.RemoveAll(tmp_dir) - defer func() { user_queue <- user }() - - // TODO: replace with cgroups-v2 based freeze solution once docker support for cgroups2 lands - procs, err := ioutil.ReadDir("/proc") - if err != nil { - return - } - for _, dir := range procs { - pid, err := strconv.Atoi(dir.Name()) + user.go_execute_as(func(err error) { if err != nil { - continue - } - stat, ok := dir.Sys().(*syscall.Stat_t) - if !ok { - continue + log.Fatalf("Fatal: Error dropping privilege for process cleanup: %s", err) + return } - if int(stat.Uid) != user.Uid { - continue + // we have an real uid of goemaxima-nobody in this context and an effective id + // of the target user maxima-%d + // + // this allows us to kill all processes of the user: + // processes we are allowed to kill with our real uid: + // just ourselves as long as goemaxima-nobody contains no other processes + // but kill -1 doesn't kill the process itself on linux + // processes we are allowed to kill with our effective uid: + // all the processes that the target user maxima-%d contains + // any CAP_KILL capability is also not effective because we changed our effective uid + // to be non-zero + // + // note that we do not walk proc since that would not allow atomic killing of processes + // which could allow someone to avoid getting killed by fork()ing very fast. + // Also, since we are in a docker container, using cgroups does not work well right now + // + // We ignore the error because we may not actually kill any processes + _ = syscall.Kill(-1, syscall.SIGKILL) + err = os.RemoveAll(tmp_dir) + if err != nil { + log.Printf("Warn: could not clean up directories of child: %s", err) } - syscall.Kill(pid, syscall.SIGKILL) - } - debugf("Debug: Process %d cleand up", user.Id) + user_queue <- user + debugf("Debug: Process %d cleaned up", user.Id) + }) } type MaximaRequest struct { @@ -272,7 +328,7 @@ func (req *MaximaRequest) write_timeout_err() { req.Metrics.NumTimeout.Inc() } -func (req *MaximaRequest) respond_with_error(format string, a ...interface{}) { +func (req *MaximaRequest) respond_with_log_error(format string, a ...interface{}) { req.W.WriteHeader(http.StatusInternalServerError) fmt.Fprint(req.W, "500 - internal server error\n") req.Metrics.NumIntError.Inc() @@ -281,7 +337,10 @@ func (req *MaximaRequest) respond_with_error(format string, a ...interface{}) { func (req *MaximaRequest) WriteResponseWithoutPlots(response MaximaResponse) { req.W.Header().Set("Content-Type", "text/plain;charset=UTF-8") - response.Response.WriteTo(req.W) + _, err := response.Response.WriteTo(req.W) + if err != nil { + req.respond_with_log_error("could not write response body") + } req.Metrics.ResponseTime.Observe(response.Time) req.Metrics.NumSuccess.Inc() } @@ -290,27 +349,36 @@ func (req *MaximaRequest) WriteResponseWithPlots(response MaximaResponse, output debugf("Debug: output (%d) is zip, OUTPUT len %d", req.User.Id, response.Response.Len()) req.W.Header().Set("Content-Type", "application/zip;charset=UTF-8") zipfile := zip.NewWriter(req.W) + out, err := zipfile.Create("OUTPUT") if err != nil { - req.respond_with_error("Error: Could not add OUTPUT to zip archive: %s", err) + req.respond_with_log_error("Error: Could not add OUTPUT to zip archive: %s", err) return } - response.Response.WriteTo(out) + + _, err = response.Response.WriteTo(out) + if err != nil { + req.respond_with_log_error("could not write response body") + } + // loop over all plots in the output directory and put them into the zip // that is to be returned for _, file := range plots_output { ffilein, err := os.Open(filepath.Join(output_dir, file.Name())) if err != nil { - req.respond_with_error("Error: could not open plot %s: %s", file.Name(), err) + req.respond_with_log_error("Error: could not open plot %s: %s", file.Name(), err) return } defer ffilein.Close() fzipput, err := zipfile.Create("/" + file.Name()) if err != nil { - req.respond_with_error("Error: could not add file %s to zip archive: %s", file.Name(), err) + req.respond_with_log_error("Error: could not add file %s to zip archive: %s", file.Name(), err) return } - io.Copy(fzipput, ffilein) + _, err = io.Copy(fzipput, ffilein) + if err != nil { + req.respond_with_log_error("could not write response body") + } } zipfile.Close() req.Metrics.ResponseTime.Observe(response.Time) @@ -325,19 +393,23 @@ func (req *MaximaRequest) WriteResponse(response MaximaResponse) { return } if err != nil { - req.respond_with_error("Error: Communicating with maxima failed: %s", err) + req.respond_with_log_error("Error: Communicating with maxima failed: %s", err) return } if req.Health { if bytes.Contains(response.Response.Bytes(), []byte("healthcheck successful")) { req.W.Header().Set("Content-Type", "text/plain;charset=UTF-8") - response.Response.WriteTo(req.W) + _, err = response.Response.WriteTo(req.W) + if err != nil { + req.respond_with_log_error("could not write response body") + } + debugf("Healthcheck passed") // note: we don't update metrics here since they would get // too polluted by the healthchecks return } else { - req.respond_with_error("Error: Healthcheck did not pass, output: %s", response.Response) + req.respond_with_log_error("Error: Healthcheck did not pass, output: %s", response.Response) return } } @@ -373,7 +445,7 @@ func (req *MaximaRequest) Respond() { case outstr := <-proc_out: req.WriteResponse(outstr) return - case <-time.After(time.Duration(req.Timeout) * time.Millisecond): + case <-time.After(time.Duration(req.Timeout+10) * time.Millisecond): debugf("Timeout with internal timer") req.write_timeout_err() return @@ -450,25 +522,26 @@ func generate_maximas(binpath string, libs []string, queue chan<- *ChildProcess, select { case mom := <-mother_proc: mother = mom - case <-time.After(10 * time.Second): + case <-time.After(MAXIMA_SPAWN_TIMEOUT): log.Fatal("Fatal: Could not start the mother process, timed out") } fails := 0 for { user := <-user_queue - new_proc, time, err := mother.spawn_new(user) + new_proc, tim, err := mother.spawn_new(user) if err != nil { fails += 1 log.Printf("Error: Could not spawn child process - fail %d, %s", fails, err) if fails == 3 { log.Fatal("Fatal: Failed to spawn child process 3 times in a row, giving up") } + continue } else { fails = 0 } debugf("Debug: Spawning process with id %d", user.Id) metrics.QueueLen.Inc() - metrics.SpawnTime.Observe(time) + metrics.SpawnTime.Observe(tim) queue <- new_proc debugf("Debug: Spawned process with id %d", user.Id) } @@ -554,6 +627,11 @@ func main() { log.Fatalf("Fatal: Cannot create %s: %s", tmp_prefix, err) } + err = os.Chmod(tmp_prefix, 01777) + if err != nil { + log.Fatalf("Fatal: Cannot set permission on %s: %s", tmp_prefix, err) + } + // queue of ready maxima processes queue := make(chan *ChildProcess, queue_len) // queue of available user ids @@ -582,6 +660,18 @@ func main() { } } + drop_queue := make(chan ExecutionInfo, user_number) + err = InitializeDropper(drop_queue) + if err != nil { + log.Fatalf("Fatal: cannot run privilege dropper: %s", err) + } + // run two droppers for more parallelism + err = InitializeDropper(drop_queue) + if err != nil { + log.Fatalf("Fatal: cannot run privilege dropper: %s", err) + } + privilege_drop_channel = drop_queue + libs := append(strings.Split(os.Getenv("GOEMAXIMA_EXTRA_PACKAGES"), ":"), os.Getenv("GOEMAXIMA_LIB_PATH")) // spawn maxima processes in separate goroutine go generate_maximas(os.Args[1], libs, queue, user_queue, &metrics)