From af49de988a77f557f4a19786710eb229f7aa0a5f Mon Sep 17 00:00:00 2001
From: Lennart Kramer <lennart.kramer@stud.uni-goettingen.de>
Date: Wed, 14 Dec 2022 20:40:15 +0100
Subject: [PATCH] change server to need less capabilities

there are now threads that can lower their privileges
to do things in the name of the maxima users. this both
increases security and is actually also somehow faster,
maybe because we don't scan /proc anymore for leftover
processes and instead use kill -1
---
 Dockerfile                      |   6 +-
 buildscript.sh                  |   2 +-
 docker-compose.yml              |   3 -
 helmmaxima/values.yaml          |   3 -
 src/maxima_fork.c               |  50 +++----
 src/web/drop_privilege_linux.go | 141 ++++++++++++++++++
 src/web/web.go                  | 248 ++++++++++++++++++++++----------
 7 files changed, 341 insertions(+), 112 deletions(-)
 create mode 100644 src/web/drop_privilege_linux.go

diff --git a/Dockerfile b/Dockerfile
index 0183d7e..79dcb52 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -42,9 +42,13 @@ RUN grep stackmaximaversion ${LIB}/stackmaxima.mac | grep -oP "\d+" >> /opt/maxi
            useradd -M "maxima-$i"; \
     done
 
+RUN useradd -M "goemaxima-nobody"
+
 # Add go webserver
 COPY ./bin/web ${BIN}/goweb
 
+RUN chmod 0700 "${BIN}/goweb" "${BIN}/maxima-optimised"
+
 ENV GOEMAXIMA_LIB_PATH=/opt/maxima/assets/maximalocal.mac
 ENV GOEMAXIMA_NUSER=$MAX_USER
 RUN sh -c 'echo $GOEMAXIMA_NUSER'
@@ -56,4 +60,4 @@ HEALTHCHECK --interval=1m --timeout=3s CMD curl -f 'http://localhost:8080/goemax
 
 # clear tmp because when kubernetes restarts a pod, it keeps the /tmp content even if it's tmpfs,
 # which means that on a restart caused by an overfull tmpfs, it will keep restarting in a loop
-CMD cd /tmp && rm --one-file-system -rf * && exec tini ${BIN}/goweb ${BIN}/maxima-optimised
+CMD cd /tmp && rm --one-file-system -rf * && exec tini ${BIN}/goweb ${BIN}/maxima-optimised || echo oh no no >&2
diff --git a/buildscript.sh b/buildscript.sh
index 4c813ac..80d9701 100644
--- a/buildscript.sh
+++ b/buildscript.sh
@@ -12,7 +12,7 @@ if [ $SBCL_ARCH = amd64 ]; then
 fi
 
 apt-get update
-apt-get install -y bzip2 make wget python3 gcc texinfo curl
+apt-get install -y bzip2 make wget python3 gcc texinfo curl libcap2-bin
 
 mkdir -p ${SRC}
 wget https://sourceforge.net/projects/maxima/files/Maxima-source/${MAXIMA_VERSION}-source/maxima-${MAXIMA_VERSION}.tar.gz -O ${SRC}/maxima-${MAXIMA_VERSION}.tar.gz
diff --git a/docker-compose.yml b/docker-compose.yml
index 52f709d..537495d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -8,11 +8,8 @@ services:
       - "/tmp"
     restart: unless-stopped
     cap_add:
-      - CHOWN
-      - DAC_OVERRIDE
       - SETGID
       - SETUID
-      - KILL
     cap_drop:
       - ALL
     read_only: true
diff --git a/helmmaxima/values.yaml b/helmmaxima/values.yaml
index ebdf182..b56de38 100644
--- a/helmmaxima/values.yaml
+++ b/helmmaxima/values.yaml
@@ -27,11 +27,8 @@ podSecurityContext: {}
 securityContext:
   capabilities:
     add:
-    - CHOWN
-    - DAC_OVERRIDE
     - SETGID
     - SETUID
-    - KILL
     drop:
     - ALL
   readOnlyRootFilesystem: true
diff --git a/src/maxima_fork.c b/src/maxima_fork.c
index 3fa1164..d95e06f 100644
--- a/src/maxima_fork.c
+++ b/src/maxima_fork.c
@@ -125,6 +125,29 @@ char *fork_new_process() {
 			continue;
 		}
 
+		uid_t uid = user_id[slot - 1];
+		gid_t gid = group_id[slot - 1];
+		// note: setgid should be executed before setuid when dropping from root
+		if (setgid(gid) == -1) {
+			perror("Could not set gid");
+			ret = NULL;
+			break;
+		}
+	
+		// remove all aux groups
+		if (setgroups(0, NULL)) {
+			perror("Could not remove aux groups");
+			ret = NULL;
+			break;
+		}
+	
+		// after this, we should be non-root
+		if (setuid(uid) == -1) {
+			perror("Could not set uid");
+			ret = NULL;
+			break;
+		}
+
 		if (chdir(tempdir) == -1) {
 			perror("Could not chdir to temporary directory");
 			ret = NULL;
@@ -157,36 +180,13 @@ char *fork_new_process() {
 			break;
 		}
 
-		uid_t uid = user_id[slot - 1];
-		gid_t gid = group_id[slot - 1];
-		// note: setgid should be executed before setuid when dropping from root
-		if (setgid(gid) == -1) {
-			perror("Could not set gid");
-			ret = NULL;
-			break;
-		}
-	
-		// remove all aux groups
-		if (setgroups(0, NULL)) {
-			perror("Could not remove aux groups");
-			ret = NULL;
-			break;
-		}
-	
-		// after this, we should be non-root
-		if (setuid(uid) == -1) {
-			perror("Could not set uid");
-			ret = NULL;
-			break;
-		}
-
 		// create temporary folders and files
-		if (mkdir("output", 0770) == -1) {
+		if (mkdir("output", 0755) == -1) {
 			perror("Could not create output directory");
 			ret = NULL;
 			break;
 		}
-		if (mkdir("work", 0770) == -1) {
+		if (mkdir("work", 0755) == -1) {
 			perror("Could not create work directory");
 			ret = NULL;
 			break;
diff --git a/src/web/drop_privilege_linux.go b/src/web/drop_privilege_linux.go
new file mode 100644
index 0000000..2181008
--- /dev/null
+++ b/src/web/drop_privilege_linux.go
@@ -0,0 +1,141 @@
+package main
+
+// This file drops the privileges of a single thread and executes functions in it.
+// The effective uid is set to that of the maxima user we want to interact with.
+// The real uid is set to goemaxima-nobody, which ensures that kill -1 does not
+// kill the maxima mother process. If the real uid was the maxima user, an evil
+// maxima process might ptrace our own process and use that to escalate privileges,
+// or send signals or interact in another way.
+
+import (
+	"fmt"
+	"os/user"
+	"runtime"
+	"strconv"
+
+	"golang.org/x/sys/unix"
+)
+
+const goemaxima_nobody string = "goemaxima-nobody"
+
+type PrivilegeDropper struct {
+	server_uid       uint16
+	server_gid       uint16
+	nobody_uid       uint16
+	nobody_gid       uint16
+	execution_getter <-chan ExecutionInfo
+}
+
+type ExecutionInfo struct {
+	Uid uint16
+	Gid uint16
+	F   func(error)
+}
+
+func (dropper *PrivilegeDropper) thread_drop_euid(uid uint16) error {
+	// These functions use the raw syscall interface since unix/posix usually regards setresuid to set the UIDs of the whole process
+	// (usually implemented through some signal handlers) but we absolutely don't want that.
+	_, _, errno := unix.Syscall(unix.SYS_SETRESUID, uintptr(dropper.nobody_uid), uintptr(uid), uintptr(dropper.server_uid))
+	if errno != 0 {
+		return errno
+	}
+	return nil
+}
+
+func (dropper *PrivilegeDropper) thread_restore_euid() {
+	// since we still have our saved-set-uid, we can restore our uids
+	_, _, errno := unix.Syscall(unix.SYS_SETRESUID, uintptr(dropper.server_uid), uintptr(dropper.server_uid), uintptr(dropper.server_uid))
+	if errno != 0 {
+		panic(errno)
+	}
+}
+
+func (dropper *PrivilegeDropper) thread_drop_egid(gid uint16) error {
+	_, _, errno := unix.Syscall(unix.SYS_SETRESGID, uintptr(dropper.nobody_gid), uintptr(gid), uintptr(dropper.server_gid))
+	if errno != 0 {
+		return errno
+	}
+	return nil
+}
+
+func (dropper *PrivilegeDropper) thread_restore_egid() {
+	_, _, errno := unix.Syscall(unix.SYS_SETRESGID, uintptr(dropper.server_gid), uintptr(dropper.server_gid), uintptr(dropper.server_gid))
+	if errno != 0 {
+		panic(errno)
+	}
+}
+
+func (dropper *PrivilegeDropper) run_as_user(uid, gid uint16, f func(error)) {
+	if err := dropper.thread_drop_egid(gid); err != nil {
+		f(err)
+		return
+	}
+	defer dropper.thread_restore_egid()
+
+	if err := dropper.thread_drop_euid(uid); err != nil {
+		f(err)
+		return
+	}
+	defer dropper.thread_restore_euid()
+
+	f(nil)
+}
+
+func (dropper *PrivilegeDropper) run() {
+	// since goroutines may switch between machine threads at any time,
+	// we lock this routine to always stay on the same machine thread
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+	for ex := range dropper.execution_getter {
+		dropper.run_as_user(ex.Uid, ex.Gid, ex.F)
+	}
+}
+
+func InitializeDropper(execution_channel chan ExecutionInfo) error {
+	server, err := user.Current()
+	if err != nil {
+		return err
+	}
+
+	server_uid, err := strconv.Atoi(server.Uid)
+	if err != nil {
+		return err
+	}
+
+	server_gid, err := strconv.Atoi(server.Gid)
+	if err != nil {
+		return err
+	}
+
+	nobody, err := user.Lookup(goemaxima_nobody)
+	if err != nil {
+		return fmt.Errorf("no %s user found, please make sure it exists and is not the user used by the server; error: %s", goemaxima_nobody, err)
+	}
+
+	nobody_uid, err := strconv.Atoi(nobody.Uid)
+	if err != nil {
+		return err
+	}
+
+	nobody_gid, err := strconv.Atoi(nobody.Gid)
+	if err != nil {
+		return err
+	}
+
+	if nobody_uid == server_uid {
+		return fmt.Errorf("server uid %d is same as %s uid %d. Please make sure that the server does not run as %s",
+			server_uid, goemaxima_nobody, nobody_uid, goemaxima_nobody)
+	}
+
+	dropper := PrivilegeDropper{
+		server_uid:       uint16(server_uid),
+		server_gid:       uint16(server_gid),
+		nobody_uid:       uint16(nobody_uid),
+		nobody_gid:       uint16(nobody_gid),
+		execution_getter: execution_channel,
+	}
+
+	go dropper.run()
+
+	return nil
+}
diff --git a/src/web/web.go b/src/web/web.go
index 7091f17..a27ed4f 100644
--- a/src/web/web.go
+++ b/src/web/web.go
@@ -25,6 +25,9 @@ import (
 
 var tmp_prefix string
 var debug uint
+var privilege_drop_channel chan<- ExecutionInfo
+
+const MAXIMA_SPAWN_TIMEOUT = time.Duration(10) * time.Second
 
 type User struct {
 	Id   uint
@@ -33,6 +36,29 @@ type User struct {
 	Gid  int
 }
 
+func (user *User) go_execute_as(f func(error)) {
+	uid16 := uint16(user.Uid)
+	gid16 := uint16(user.Gid)
+	privilege_drop_channel <- ExecutionInfo{
+		Uid: uid16,
+		Gid: gid16,
+		F:   f,
+	}
+}
+
+func (user *User) sync_execute_as(timeout time.Duration, f func(error) error) error {
+	err_chan := make(chan error)
+	user.go_execute_as(func(err error) {
+		err_chan <- f(err)
+	})
+	select {
+	case result := <-err_chan:
+		return result
+	case <-time.After(timeout):
+		return fmt.Errorf("timed out executing function with lowered privileges")
+	}
+}
+
 type Metrics struct {
 	ResponseTime prometheus.Histogram
 	SpawnTime    prometheus.Histogram
@@ -126,64 +152,80 @@ func new_mother_proc(binpath string, libs []string) (*MotherProcess, error) {
 
 func (p *MotherProcess) spawn_new(user *User) (*ChildProcess, float64, error) {
 	start := time.Now()
-	tmp_dir, err := ioutil.TempDir(tmp_prefix, "maxima-plot-")
-	if err != nil {
-		return nil, 0.0, fmt.Errorf("unable to create temp dir: %s", err)
-	}
-	// right permission for folders
-	err = os.Chown(tmp_dir, user.Uid, user.Gid)
-	if err != nil {
-		return nil, 0.0, fmt.Errorf("not able to change tempdir permission: %s", err)
-	}
+	var result ChildProcess
+	err := user.sync_execute_as(MAXIMA_SPAWN_TIMEOUT, func(err error) error {
+		if err != nil {
+			return err
+		}
+		tmp_dir, err := ioutil.TempDir(tmp_prefix, "maxima-plot-")
+		if err != nil {
+			return fmt.Errorf("unable to create temp dir: %s", err)
+		}
 
-	// create named pipe for process stdout
-	pipe_name_out := filepath.Clean(filepath.Join(tmp_dir, "outpipe"))
+		err = os.Chmod(tmp_dir, 0755)
+		if err != nil {
+			return fmt.Errorf("unable to change permissions of temp dir: %s", err)
+		}
 
-	err = syscall.Mkfifo(pipe_name_out, 0600)
-	if err != nil {
-		return nil, 0.0, fmt.Errorf("could not create named pipe in temp folder: %s", err)
-	}
+		// create named pipe for process stdout
+		pipe_name_out := filepath.Clean(filepath.Join(tmp_dir, "outpipe"))
 
-	// create named pipe for process stdin
-	pipe_name_in := filepath.Clean(filepath.Join(tmp_dir, "inpipe"))
+		err = syscall.Mkfifo(pipe_name_out, 0600)
+		if err != nil {
+			return fmt.Errorf("could not create named pipe in temp folder: %s", err)
+		}
 
-	err = syscall.Mkfifo(pipe_name_in, 0600)
-	if err != nil {
-		return nil, 0.0, fmt.Errorf("could not create named pipe in temp folder: %s", err)
-	}
+		// create named pipe for process stdin
+		pipe_name_in := filepath.Clean(filepath.Join(tmp_dir, "inpipe"))
 
-	_, err = fmt.Fprintf(p.Input, "%d%s\n", user.Id, tmp_dir)
-	if err != nil {
-		return nil, 0.0, fmt.Errorf("unable to communicate with process: %s", err)
-	}
+		err = syscall.Mkfifo(pipe_name_in, 0600)
+		if err != nil {
+			return fmt.Errorf("could not create named pipe in temp folder: %s", err)
+		}
 
-	debugf("Debug: Opening pipes to child")
-	// note: open outpipe before inpipe to avoid deadlock
-	out, err := os.OpenFile(pipe_name_out, os.O_RDONLY, os.ModeNamedPipe)
+		_, err = fmt.Fprintf(p.Input, "%d%s\n", user.Id, tmp_dir)
+		if err != nil {
+			return fmt.Errorf("unable to communicate with process: %s", err)
+		}
+
+		debugf("Debug: Opening pipes to child")
+		// note: open outpipe before inpipe to avoid deadlock
+		out, err := os.OpenFile(pipe_name_out, os.O_RDONLY, os.ModeNamedPipe)
+		if err != nil {
+			return fmt.Errorf("could not open temp dir outpipe: %s", err)
+		}
+
+		in, err := os.OpenFile(pipe_name_in, os.O_WRONLY, os.ModeNamedPipe)
+		if err != nil {
+			return fmt.Errorf("could not open temp dir inpipe: %s", err)
+		}
+
+		result.User = user
+		result.Input = in
+		result.Outfile = out
+		result.TempDir = tmp_dir
+		return nil
+	})
 	if err != nil {
-		return nil, 0.0, fmt.Errorf("could not open temp dir outpipe: %s", err)
+		return nil, 0.0, fmt.Errorf("unable to start child process: %s", err)
 	}
 
-	in, err := os.OpenFile(pipe_name_in, os.O_WRONLY, os.ModeNamedPipe)
+	debugf("Debug: Attempting to read from child")
+	err = result.Outfile.SetReadDeadline(time.Now().Add(MAXIMA_SPAWN_TIMEOUT))
 	if err != nil {
-		return nil, 0.0, fmt.Errorf("could not open temp dir inpipe: %s", err)
+		return nil, 0.0, fmt.Errorf("unable to set read deadline: %s", err)
 	}
-
-	out.SetReadDeadline(time.Now().Add(10 * time.Second))
-	bufout := bufio.NewReader(out)
+	bufout := bufio.NewReader(result.Outfile)
 	_, err = fmt.Fscanf(bufout, "\nT")
 	if err != nil {
 		return nil, 0.0, fmt.Errorf("not able to find end marker: %s", err)
 	}
+	result.Output = bufout
+
 	total := time.Since(start)
+	total_ms := float64(total.Microseconds()) / 1000
 	debugf("Debug: child took %s for startup", total)
-	return &ChildProcess{
-		User:    user,
-		Input:   in,
-		Output:  bufout,
-		Outfile: out,
-		TempDir: tmp_dir,
-	}, float64(total.Microseconds()) / 1000, nil
+	return &result, total_ms, nil
 }
 
 type MaximaResponse struct {
@@ -198,15 +240,22 @@ func (p *ChildProcess) eval_command(command string, timeout uint64) MaximaRespon
 	in_err := make(chan error, 1)
 	// write to stdin in separate goroutine to prevent deadlocks
 	go func() {
-		p.Input.SetWriteDeadline(time.Now().Add(time.Duration(timeout+10) * time.Millisecond))
-		_, err := io.Copy(p.Input, strings.NewReader(command))
+		err := p.Input.SetWriteDeadline(time.Now().Add(time.Duration(timeout+10) * time.Millisecond))
+		if err != nil {
+			in_err <- err
+			return
+		}
+		_, err = io.Copy(p.Input, strings.NewReader(command))
 		p.Input.Close()
 		in_err <- err
 	}()
 	var outbuf bytes.Buffer
 	// read from stdout
-	p.Outfile.SetReadDeadline(time.Now().Add(time.Duration(timeout+10) * time.Millisecond))
-	_, err := io.Copy(&outbuf, p.Output)
+	err := p.Outfile.SetReadDeadline(time.Now().Add(time.Duration(timeout+10) * time.Millisecond))
+	if err != nil {
+		return MaximaResponse{nil, 0.0, err}
+	}
+	_, err = io.Copy(&outbuf, p.Output)
 	p.Outfile.Close()
 	input_err := <-in_err
 	if input_err != nil {
@@ -222,29 +271,36 @@ func (p *ChildProcess) eval_command(command string, timeout uint64) MaximaRespon
 
 // kills all processes of user and remove temporary directories
 func process_cleanup(user *User, user_queue chan<- *User, tmp_dir string) {
-	defer os.RemoveAll(tmp_dir)
-	defer func() { user_queue <- user }()
-
-	// TODO: replace with cgroups-v2 based freeze solution once docker support for cgroups2 lands
-	procs, err := ioutil.ReadDir("/proc")
-	if err != nil {
-		return
-	}
-	for _, dir := range procs {
-		pid, err := strconv.Atoi(dir.Name())
+	user.go_execute_as(func(err error) {
 		if err != nil {
-			continue
-		}
-		stat, ok := dir.Sys().(*syscall.Stat_t)
-		if !ok {
-			continue
+			log.Fatalf("Fatal: Error dropping privilege for process cleanup: %s", err)
+			return
 		}
-		if int(stat.Uid) != user.Uid {
-			continue
+		// we have an real uid of goemaxima-nobody in this context and an effective id
+		// of the target user maxima-%d
+		//
+		// this allows us to kill all processes of the user:
+		// processes we are allowed to kill with our real uid:
+		// 		just ourselves as long as goemaxima-nobody contains no other processes
+		//      but kill -1 doesn't kill the process itself on linux
+		// processes we are allowed to kill with our effective uid:
+		//      all the processes that the target user maxima-%d contains
+		// any CAP_KILL capability is also not effective because we changed our effective uid
+		// to be non-zero
+		//
+		// note that we do not walk proc since that would not allow atomic killing of processes
+		// which could allow someone to avoid getting killed by fork()ing very fast.
+		// Also, since we are in a docker container, using cgroups does not work well right now
+		//
+		// We ignore the error because we may not actually kill any processes
+		_ = syscall.Kill(-1, syscall.SIGKILL)
+		err = os.RemoveAll(tmp_dir)
+		if err != nil {
+			log.Printf("Warn: could not clean up directories of child: %s", err)
 		}
-		syscall.Kill(pid, syscall.SIGKILL)
-	}
-	debugf("Debug: Process %d cleand up", user.Id)
+		user_queue <- user
+		debugf("Debug: Process %d cleaned up", user.Id)
+	})
 }
 
 type MaximaRequest struct {
@@ -272,7 +328,7 @@ func (req *MaximaRequest) write_timeout_err() {
 	req.Metrics.NumTimeout.Inc()
 }
 
-func (req *MaximaRequest) respond_with_error(format string, a ...interface{}) {
+func (req *MaximaRequest) respond_with_log_error(format string, a ...interface{}) {
 	req.W.WriteHeader(http.StatusInternalServerError)
 	fmt.Fprint(req.W, "500 - internal server error\n")
 	req.Metrics.NumIntError.Inc()
@@ -281,7 +337,10 @@ func (req *MaximaRequest) respond_with_error(format string, a ...interface{}) {
 
 func (req *MaximaRequest) WriteResponseWithoutPlots(response MaximaResponse) {
 	req.W.Header().Set("Content-Type", "text/plain;charset=UTF-8")
-	response.Response.WriteTo(req.W)
+	_, err := response.Response.WriteTo(req.W)
+	if err != nil {
+		req.respond_with_log_error("could not write response body")
+	}
 	req.Metrics.ResponseTime.Observe(response.Time)
 	req.Metrics.NumSuccess.Inc()
 }
@@ -290,27 +349,36 @@ func (req *MaximaRequest) WriteResponseWithPlots(response MaximaResponse, output
 	debugf("Debug: output (%d) is zip, OUTPUT len %d", req.User.Id, response.Response.Len())
 	req.W.Header().Set("Content-Type", "application/zip;charset=UTF-8")
 	zipfile := zip.NewWriter(req.W)
+
 	out, err := zipfile.Create("OUTPUT")
 	if err != nil {
-		req.respond_with_error("Error: Could not add OUTPUT to zip archive: %s", err)
+		req.respond_with_log_error("Error: Could not add OUTPUT to zip archive: %s", err)
 		return
 	}
-	response.Response.WriteTo(out)
+
+	_, err = response.Response.WriteTo(out)
+	if err != nil {
+		req.respond_with_log_error("could not write response body")
+	}
+
 	// loop over all plots in the output directory and put them into the zip
 	// that is to be returned
 	for _, file := range plots_output {
 		ffilein, err := os.Open(filepath.Join(output_dir, file.Name()))
 		if err != nil {
-			req.respond_with_error("Error: could not open plot %s: %s", file.Name(), err)
+			req.respond_with_log_error("Error: could not open plot %s: %s", file.Name(), err)
 			return
 		}
 		defer ffilein.Close()
 		fzipput, err := zipfile.Create("/" + file.Name())
 		if err != nil {
-			req.respond_with_error("Error: could not add file %s to zip archive: %s", file.Name(), err)
+			req.respond_with_log_error("Error: could not add file %s to zip archive: %s", file.Name(), err)
 			return
 		}
-		io.Copy(fzipput, ffilein)
+		_, err = io.Copy(fzipput, ffilein)
+		if err != nil {
+			req.respond_with_log_error("could not write response body")
+		}
 	}
 	zipfile.Close()
 	req.Metrics.ResponseTime.Observe(response.Time)
@@ -325,19 +393,23 @@ func (req *MaximaRequest) WriteResponse(response MaximaResponse) {
 		return
 	}
 	if err != nil {
-		req.respond_with_error("Error: Communicating with maxima failed: %s", err)
+		req.respond_with_log_error("Error: Communicating with maxima failed: %s", err)
 		return
 	}
 	if req.Health {
 		if bytes.Contains(response.Response.Bytes(), []byte("healthcheck successful")) {
 			req.W.Header().Set("Content-Type", "text/plain;charset=UTF-8")
-			response.Response.WriteTo(req.W)
+			_, err = response.Response.WriteTo(req.W)
+			if err != nil {
+				req.respond_with_log_error("could not write response body")
+			}
+
 			debugf("Healthcheck passed")
 			// note: we don't update metrics here since they would get
 			// too polluted by the healthchecks
 			return
 		} else {
-			req.respond_with_error("Error: Healthcheck did not pass, output: %s", response.Response)
+			req.respond_with_log_error("Error: Healthcheck did not pass, output: %s", response.Response)
 			return
 		}
 	}
@@ -373,7 +445,7 @@ func (req *MaximaRequest) Respond() {
 	case outstr := <-proc_out:
 		req.WriteResponse(outstr)
 		return
-	case <-time.After(time.Duration(req.Timeout) * time.Millisecond):
+	case <-time.After(time.Duration(req.Timeout+10) * time.Millisecond):
 		debugf("Timeout with internal timer")
 		req.write_timeout_err()
 		return
@@ -450,25 +522,26 @@ func generate_maximas(binpath string, libs []string, queue chan<- *ChildProcess,
 	select {
 	case mom := <-mother_proc:
 		mother = mom
-	case <-time.After(10 * time.Second):
+	case <-time.After(MAXIMA_SPAWN_TIMEOUT):
 		log.Fatal("Fatal: Could not start the mother process, timed out")
 	}
 	fails := 0
 	for {
 		user := <-user_queue
-		new_proc, time, err := mother.spawn_new(user)
+		new_proc, tim, err := mother.spawn_new(user)
 		if err != nil {
 			fails += 1
 			log.Printf("Error: Could not spawn child process - fail %d, %s", fails, err)
 			if fails == 3 {
 				log.Fatal("Fatal: Failed to spawn child process 3 times in a row, giving up")
 			}
+			continue
 		} else {
 			fails = 0
 		}
 		debugf("Debug: Spawning process with id %d", user.Id)
 		metrics.QueueLen.Inc()
-		metrics.SpawnTime.Observe(time)
+		metrics.SpawnTime.Observe(tim)
 		queue <- new_proc
 		debugf("Debug: Spawned process with id %d", user.Id)
 	}
@@ -554,6 +627,11 @@ func main() {
 		log.Fatalf("Fatal: Cannot create %s: %s", tmp_prefix, err)
 	}
 
+	err = os.Chmod(tmp_prefix, 01777)
+	if err != nil {
+		log.Fatalf("Fatal: Cannot set permission on %s: %s", tmp_prefix, err)
+	}
+
 	// queue of ready maxima processes
 	queue := make(chan *ChildProcess, queue_len)
 	// queue of available user ids
@@ -582,6 +660,18 @@ func main() {
 		}
 	}
 
+	drop_queue := make(chan ExecutionInfo, user_number)
+	err = InitializeDropper(drop_queue)
+	if err != nil {
+		log.Fatalf("Fatal: cannot run privilege dropper: %s", err)
+	}
+	// run two droppers for more parallelism
+	err = InitializeDropper(drop_queue)
+	if err != nil {
+		log.Fatalf("Fatal: cannot run privilege dropper: %s", err)
+	}
+	privilege_drop_channel = drop_queue
+
 	libs := append(strings.Split(os.Getenv("GOEMAXIMA_EXTRA_PACKAGES"), ":"), os.Getenv("GOEMAXIMA_LIB_PATH"))
 	// spawn maxima processes in separate goroutine
 	go generate_maximas(os.Args[1], libs, queue, user_queue, &metrics)
-- 
GitLab