quark

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 837fede8ab3a4bff12d4390880d4000066859e34
Author: Kris Yotam <krisyotam@protonmail.com>
Date:   Thu, 29 Jan 2026 16:18:30 -0600

Initial commit: quark - tiny HTTP server

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Diffstat:
ALICENSE | 27+++++++++++++++++++++++++++
AMakefile | 46++++++++++++++++++++++++++++++++++++++++++++++
AREADME.md | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aarg.h | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Aconfig.def.h | 39+++++++++++++++++++++++++++++++++++++++
Aconfig.mk | 16++++++++++++++++
Aconnection.c | 315+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aconnection.h | 32++++++++++++++++++++++++++++++++
Adata.c | 231+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adata.h | 18++++++++++++++++++
Ahttp.c | 1046+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahttp.h | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amain.c | 364+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aquark.1 | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aqueue.c | 217+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aqueue.h | 33+++++++++++++++++++++++++++++++++
Aserver.c | 177+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aserver.h | 35+++++++++++++++++++++++++++++++++++
Asock.c | 209+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asock.h | 18++++++++++++++++++
Autil.c | 281+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Autil.h | 42++++++++++++++++++++++++++++++++++++++++++
22 files changed, 3504 insertions(+), 0 deletions(-)

diff --git a/LICENSE b/LICENSE @@ -0,0 +1,27 @@ +ISC-License + +Copyright 2016-2021 Laslo Hunhold <dev@frign.de> + +Copyright 2004 Ted Unangst <tedu@openbsd.org> +Copyright 2004 Todd C. Miller <Todd.Miller@courtesan.com> +Copyright 2008 Otto Moerbeek <otto@drijf.net> +Copyright 2017-2018 Hiltjo Posthuma <hiltjo@codemadness.org> +Copyright 2017-2021 Quentin Rameau <quinq@fifth.space> +Copyright 2018 Josuah Demangeon <mail@josuah.net> +Copyright 2018 Dominik Schmidt <domischmidt@swissonline.ch> +Copyright 2018 Aaron Burrow <burrows@charstarstar.com> +Copyright 2020 Nihal Jere <nihal@nihaljere.xyz> +Copyright 2020 Rainer Holzner <rholzner@web.de> +Copyright 2020 Jeremy Bobbin <jer@jer.cx> + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/Makefile b/Makefile @@ -0,0 +1,46 @@ +# See LICENSE file for copyright and license details +# quark - simple web server +.POSIX: + +include config.mk + +COMPONENTS = connection data http queue server sock util + +all: quark + +connection.o: connection.c config.h connection.h data.h http.h server.h sock.h util.h config.mk +data.o: data.c config.h data.h http.h server.h util.h config.mk +http.o: http.c config.h http.h server.h util.h config.mk +main.o: main.c arg.h config.h server.h sock.h util.h config.mk +server.o: server.c config.h connection.h http.h queue.h server.h util.h config.mk +sock.o: sock.c config.h sock.h util.h config.mk +util.o: util.c config.h util.h config.mk + +quark: config.h $(COMPONENTS:=.o) $(COMPONENTS:=.h) main.o config.mk + $(CC) -o $@ $(CPPFLAGS) $(CFLAGS) $(COMPONENTS:=.o) main.o $(LDFLAGS) + +config.h: + cp config.def.h $@ + +clean: + rm -f quark main.o $(COMPONENTS:=.o) + +dist: + rm -rf "quark-$(VERSION)" + mkdir -p "quark-$(VERSION)" + cp -R LICENSE Makefile arg.h config.def.h config.mk quark.1 \ + $(COMPONENTS:=.c) $(COMPONENTS:=.h) main.c "quark-$(VERSION)" + tar -cf - "quark-$(VERSION)" | gzip -c > "quark-$(VERSION).tar.gz" + rm -rf "quark-$(VERSION)" + +install: all + mkdir -p "$(DESTDIR)$(PREFIX)/bin" + cp -f quark "$(DESTDIR)$(PREFIX)/bin" + chmod 755 "$(DESTDIR)$(PREFIX)/bin/quark" + mkdir -p "$(DESTDIR)$(MANPREFIX)/man1" + cp quark.1 "$(DESTDIR)$(MANPREFIX)/man1/quark.1" + chmod 644 "$(DESTDIR)$(MANPREFIX)/man1/quark.1" + +uninstall: + rm -f "$(DESTDIR)$(PREFIX)/bin/quark" + rm -f "$(DESTDIR)$(MANPREFIX)/man1/quark.1" diff --git a/README.md b/README.md @@ -0,0 +1,74 @@ +# Kris's build of quark + +My build of [quark](https://tools.suckless.org/quark/), an extremely small and simple HTTP GET/HEAD-only web server for static content. + +--- + +## About + +quark is a tiny, secure HTTP server from suckless.org designed to serve static files with minimal attack surface. + +--- + +## Features + +- **Minimal**: Tiny codebase, easy to audit +- **Secure**: Privilege separation, chroot support +- **Fast**: Event-driven architecture +- **Standards**: HTTP/1.1 compliant (GET/HEAD only) +- **Virtual hosts**: Support for multiple domains +- **TLS**: Optional TLS support + +--- + +## Usage + +```bash +# Serve current directory on port 8080 +quark -p 8080 -d /var/www/html + +# With virtual hosts +quark -p 80 -d /var/www -v vhosts.conf + +# With chroot (as root) +quark -p 80 -d /var/www -u nobody -g nogroup +``` + +### Options + +| Flag | Description | +|------|-------------| +| `-d` | Document root directory | +| `-p` | Port to listen on | +| `-h` | Host/IP to bind to | +| `-u` | User to drop privileges to | +| `-g` | Group to drop privileges to | +| `-v` | Virtual hosts config file | +| `-l` | Enable directory listing | + +--- + +## Installation + +```bash +git clone https://github.com/krisyotam/quark +cd quark +sudo make install +``` + +--- + +## Other Suckless Repos + +- [dwm](https://github.com/krisyotam/dwm) - dynamic window manager +- [st](https://github.com/krisyotam/st) - simple terminal +- [dmenu](https://github.com/krisyotam/dmenu) - dynamic menu +- [dwmblocks](https://github.com/krisyotam/dwmblocks) - modular status bar +- [scron](https://github.com/krisyotam/scron) - simple cron daemon + +--- + +## Contact + +- Kris Yotam <krisyotam@protonmail.com> +- [https://krisyotam.com](https://krisyotam.com) diff --git a/arg.h b/arg.h @@ -0,0 +1,50 @@ +/* + * ISC-License + * + * Copyright 2004-2017 Christoph Lohmann <20h@r-36.net> + * Copyright 2017-2018 Laslo Hunhold <dev@frign.de> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef ARG_H +#define ARG_H + +extern char *argv0; + +/* int main(int argc, char *argv[]) */ +#define ARGBEGIN for (argv0 = *argv, *argv ? (argc--, argv++) : ((void *)0); \ + *argv && (*argv)[0] == '-' && (*argv)[1]; argc--, argv++) { \ + int i_, argused_; \ + if ((*argv)[1] == '-' && !(*argv)[2]) { \ + argc--, argv++; \ + break; \ + } \ + for (i_ = 1, argused_ = 0; (*argv)[i_]; i_++) { \ + switch((*argv)[i_]) +#define ARGEND if (argused_) { \ + if ((*argv)[i_ + 1]) { \ + break; \ + } else { \ + argc--, argv++; \ + break; \ + } \ + } \ + } \ + } +#define ARGC() ((*argv)[i_]) +#define ARGF_(x) (((*argv)[i_ + 1]) ? (argused_ = 1, &((*argv)[i_ + 1])) : \ + (*(argv + 1)) ? (argused_ = 1, *(argv + 1)) : (x)) +#define EARGF(x) ARGF_(((x), exit(1), (char *)0)) +#define ARGF() ARGF_((char *)0) + +#endif diff --git a/config.def.h b/config.def.h @@ -0,0 +1,39 @@ +#ifndef CONFIG_H +#define CONFIG_H + +#define BUFFER_SIZE 4096 +#define FIELD_MAX 200 + +/* mime-types */ +static const struct { + char *ext; + char *type; +} mimes[] = { + { "xml", "application/xml; charset=utf-8" }, + { "xhtml", "application/xhtml+xml; charset=utf-8" }, + { "html", "text/html; charset=utf-8" }, + { "htm", "text/html; charset=utf-8" }, + { "css", "text/css; charset=utf-8" }, + { "txt", "text/plain; charset=utf-8" }, + { "md", "text/plain; charset=utf-8" }, + { "c", "text/plain; charset=utf-8" }, + { "h", "text/plain; charset=utf-8" }, + { "gz", "application/x-gtar" }, + { "tar", "application/tar" }, + { "pdf", "application/x-pdf" }, + { "png", "image/png" }, + { "gif", "image/gif" }, + { "jpeg", "image/jpg" }, + { "jpg", "image/jpg" }, + { "iso", "application/x-iso9660-image" }, + { "webp", "image/webp" }, + { "svg", "image/svg+xml; charset=utf-8" }, + { "flac", "audio/flac" }, + { "mp3", "audio/mpeg" }, + { "ogg", "audio/ogg" }, + { "mp4", "video/mp4" }, + { "ogv", "video/ogg" }, + { "webm", "video/webm" }, +}; + +#endif /* CONFIG_H */ diff --git a/config.mk b/config.mk @@ -0,0 +1,16 @@ +# quark version +VERSION = 0 + +# Customize below to fit your system + +# paths +PREFIX = /usr/local +MANPREFIX = $(PREFIX)/share/man + +# flags +CPPFLAGS = -DVERSION=\"$(VERSION)\" -D_DEFAULT_SOURCE -D_XOPEN_SOURCE=700 -D_BSD_SOURCE +CFLAGS = -std=c99 -pedantic -Wall -Wextra -Os +LDFLAGS = -lpthread -s + +# compiler and linker +CC = cc diff --git a/connection.c b/connection.c @@ -0,0 +1,315 @@ +/* See LICENSE file for copyright and license details. */ +#include <errno.h> +#include <netinet/in.h> +#include <stdio.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "connection.h" +#include "data.h" +#include "http.h" +#include "server.h" +#include "sock.h" +#include "util.h" + +struct worker_data { + int insock; + size_t nslots; + const struct server *srv; +}; + +void +connection_log(const struct connection *c) +{ + char inaddr_str[INET6_ADDRSTRLEN /* > INET_ADDRSTRLEN */]; + char tstmp[21]; + + /* create timestamp */ + if (!strftime(tstmp, sizeof(tstmp), "%Y-%m-%dT%H:%M:%SZ", + gmtime(&(time_t){time(NULL)}))) { + warn("strftime: Exceeded buffer capacity"); + tstmp[0] = '\0'; /* tstmp contents are undefined on failure */ + /* continue anyway */ + } + + /* generate address-string */ + if (sock_get_inaddr_str(&c->ia, inaddr_str, LEN(inaddr_str))) { + warn("sock_get_inaddr_str: Couldn't generate adress-string"); + inaddr_str[0] = '\0'; + } + + printf("%s\t%s\t%s%.*d\t%s\t%s%s%s%s%s\n", + tstmp, + inaddr_str, + (c->res.status == 0) ? "dropped" : "", + (c->res.status == 0) ? 0 : 3, + c->res.status, + c->req.field[REQ_HOST][0] ? c->req.field[REQ_HOST] : "-", + c->req.path[0] ? c->req.path : "-", + c->req.query[0] ? "?" : "", + c->req.query, + c->req.fragment[0] ? "#" : "", + c->req.fragment); +} + +void +connection_reset(struct connection *c) +{ + if (c != NULL) { + shutdown(c->fd, SHUT_RDWR); + close(c->fd); + memset(c, 0, sizeof(*c)); + } +} + +void +connection_serve(struct connection *c, const struct server *srv) +{ + enum status s; + int done; + + switch (c->state) { + case C_VACANT: + /* + * we were passed a "fresh" connection which should now + * try to receive the header, reset buf beforehand + */ + memset(&c->buf, 0, sizeof(c->buf)); + + c->state = C_RECV_HEADER; + /* fallthrough */ + case C_RECV_HEADER: + /* receive header */ + done = 0; + if ((s = http_recv_header(c->fd, &c->buf, &done))) { + http_prepare_error_response(&c->req, &c->res, s); + goto response; + } + if (!done) { + /* not done yet */ + return; + } + + /* parse header */ + if ((s = http_parse_header(c->buf.data, &c->req))) { + http_prepare_error_response(&c->req, &c->res, s); + goto response; + } + + /* prepare response struct */ + http_prepare_response(&c->req, &c->res, srv); +response: + /* generate response header */ + if ((s = http_prepare_header_buf(&c->res, &c->buf))) { + http_prepare_error_response(&c->req, &c->res, s); + if ((s = http_prepare_header_buf(&c->res, &c->buf))) { + /* couldn't generate the header, we failed for good */ + c->res.status = s; + goto err; + } + } + + c->state = C_SEND_HEADER; + /* fallthrough */ + case C_SEND_HEADER: + if ((s = http_send_buf(c->fd, &c->buf))) { + c->res.status = s; + goto err; + } + if (c->buf.len > 0) { + /* not done yet */ + return; + } + + c->state = C_SEND_BODY; + /* fallthrough */ + case C_SEND_BODY: + if (c->req.method == M_GET) { + if (c->buf.len == 0) { + /* fill buffer with body data */ + if ((s = data_fct[c->res.type](&c->res, &c->buf, + &c->progress))) { + /* too late to do any real error handling */ + c->res.status = s; + goto err; + } + + /* if the buffer remains empty, we are done */ + if (c->buf.len == 0) { + break; + } + } else { + /* send buffer */ + if ((s = http_send_buf(c->fd, &c->buf))) { + /* too late to do any real error handling */ + c->res.status = s; + goto err; + } + } + return; + } + break; + default: + warn("serve: invalid connection state"); + return; + } +err: + connection_log(c); + connection_reset(c); +} + +static struct connection * +connection_get_drop_candidate(struct connection *connection, size_t nslots) +{ + struct connection *c, *minc; + size_t i, j, maxcnt, cnt; + + /* + * determine the most-unimportant connection 'minc' of the in-address + * with most connections; this algorithm has a complexity of O(n²) + * in time but is O(1) in space; there are algorithms with O(n) in + * time and space, but this would require memory allocation, + * which we avoid. Given the simplicity of the inner loop and + * relatively small number of slots per thread, this is fine. + */ + for (i = 0, minc = NULL, maxcnt = 0; i < nslots; i++) { + /* + * we determine how many connections have the same + * in-address as connection[i], but also minimize over + * that set with other criteria, yielding a general + * minimizer c. We first set it to connection[i] and + * update it, if a better candidate shows up, in the inner + * loop + */ + c = &connection[i]; + + for (j = 0, cnt = 0; j < nslots; j++) { + if (!sock_same_addr(&connection[i].ia, + &connection[j].ia)) { + continue; + } + cnt++; + + /* minimize over state */ + if (connection[j].state < c->state) { + c = &connection[j]; + } else if (connection[j].state == c->state) { + /* minimize over progress */ + if (c->state == C_SEND_BODY && + connection[i].res.type != c->res.type) { + /* + * mixed response types; progress + * is not comparable + * + * the res-type-enum is ordered as + * DIRLISTING, ERROR, FILE, i.e. + * in rising priority, because a + * file transfer is most important, + * followed by error-messages. + * Dirlistings as an "interactive" + * feature (that take up lots of + * resources) have the lowest + * priority + */ + if (connection[i].res.type < + c->res.type) { + c = &connection[j]; + } + } else if (connection[j].progress < + c->progress) { + /* + * for C_SEND_BODY with same response + * type, C_RECV_HEADER and C_SEND_BODY + * it is sufficient to compare the + * raw progress + */ + c = &connection[j]; + } + } + } + + if (cnt > maxcnt) { + /* this run yielded an even greedier in-address */ + minc = c; + maxcnt = cnt; + } + } + + return minc; +} + +struct connection * +connection_accept(int insock, struct connection *connection, size_t nslots) +{ + struct connection *c = NULL; + size_t i; + + /* find vacant connection (i.e. one with no fd assigned to it) */ + for (i = 0; i < nslots; i++) { + if (connection[i].fd == 0) { + c = &connection[i]; + break; + } + } + if (i == nslots) { + /* + * all our connection-slots are occupied and the only + * way out is to drop another connection, because not + * accepting this connection just kicks this can further + * down the road (to the next queue_wait()) without + * solving anything. + * + * This may sound bad, but this case can only be hit + * either when there's a (D)DoS-attack or a massive + * influx of requests. The latter is impossible to solve + * at this moment without expanding resources, but the + * former has certain characteristics allowing us to + * handle this gracefully. + * + * During an attack (e.g. Slowloris, R-U-Dead-Yet, Slow + * Read or just plain flooding) we can not see who is + * waiting to be accept()ed. + * However, an attacker usually already has many + * connections open (while well-behaved clients could + * do everything with just one connection using + * keep-alive). Inferring a likely attacker-connection + * is an educated guess based on which in-address is + * occupying the most connection slots. Among those, + * connections in early stages (receiving or sending + * headers) are preferred over connections in late + * stages (sending body). + * + * This quantitative approach effectively drops malicious + * connections while preserving even long-running + * benevolent connections like downloads. + */ + c = connection_get_drop_candidate(connection, nslots); + c->res.status = 0; + connection_log(c); + connection_reset(c); + } + + /* accept connection */ + if ((c->fd = accept(insock, (struct sockaddr *)&c->ia, + &(socklen_t){sizeof(c->ia)})) < 0) { + if (errno != EAGAIN && errno != EWOULDBLOCK) { + /* + * this should not happen, as we received the + * event that there are pending connections here + */ + warn("accept:"); + } + return NULL; + } + + /* set socket to non-blocking mode */ + if (sock_set_nonblocking(c->fd)) { + /* we can't allow blocking sockets */ + return NULL; + } + + return c; +} diff --git a/connection.h b/connection.h @@ -0,0 +1,32 @@ +/* See LICENSE file for copyright and license details. */ +#ifndef CONNECTION_H +#define CONNECTION_H + +#include "http.h" +#include "server.h" +#include "util.h" + +enum connection_state { + C_VACANT, + C_RECV_HEADER, + C_SEND_HEADER, + C_SEND_BODY, + NUM_CONN_STATES, +}; + +struct connection { + enum connection_state state; + int fd; + struct sockaddr_storage ia; + struct request req; + struct response res; + struct buffer buf; + size_t progress; +}; + +struct connection *connection_accept(int, struct connection *, size_t); +void connection_log(const struct connection *); +void connection_reset(struct connection *); +void connection_serve(struct connection *, const struct server *); + +#endif /* CONNECTION_H */ diff --git a/data.c b/data.c @@ -0,0 +1,231 @@ +/* See LICENSE file for copyright and license details. */ +#include <dirent.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <time.h> +#include <unistd.h> + +#include "data.h" +#include "http.h" +#include "util.h" + +enum status (* const data_fct[])(const struct response *, + struct buffer *, size_t *) = { + [RESTYPE_DIRLISTING] = data_prepare_dirlisting_buf, + [RESTYPE_ERROR] = data_prepare_error_buf, + [RESTYPE_FILE] = data_prepare_file_buf, +}; + +static int +compareent(const struct dirent **d1, const struct dirent **d2) +{ + int v; + + v = ((*d2)->d_type == DT_DIR ? 1 : -1) - + ((*d1)->d_type == DT_DIR ? 1 : -1); + if (v) { + return v; + } + + return strcmp((*d1)->d_name, (*d2)->d_name); +} + +static char * +suffix(int t) +{ + switch (t) { + case DT_FIFO: return "|"; + case DT_DIR: return "/"; + case DT_LNK: return "@"; + case DT_SOCK: return "="; + } + + return ""; +} + +static void +html_escape(const char *src, char *dst, size_t dst_siz) +{ + const struct { + char c; + char *s; + } escape[] = { + { '&', "&amp;" }, + { '<', "&lt;" }, + { '>', "&gt;" }, + { '"', "&quot;" }, + { '\'', "&#x27;" }, + }; + size_t i, j, k, esclen; + + for (i = 0, j = 0; src[i] != '\0'; i++) { + for (k = 0; k < LEN(escape); k++) { + if (src[i] == escape[k].c) { + break; + } + } + if (k == LEN(escape)) { + /* no escape char at src[i] */ + if (j == dst_siz - 1) { + /* silent truncation */ + break; + } else { + dst[j++] = src[i]; + } + } else { + /* escape char at src[i] */ + esclen = strlen(escape[k].s); + + if (j >= dst_siz - esclen) { + /* silent truncation */ + break; + } else { + memcpy(&dst[j], escape[k].s, esclen); + j += esclen; + } + } + } + dst[j] = '\0'; +} + +enum status +data_prepare_dirlisting_buf(const struct response *res, + struct buffer *buf, size_t *progress) +{ + enum status s = 0; + struct dirent **e; + size_t i; + int dirlen; + char esc[PATH_MAX /* > NAME_MAX */ * 6]; /* strlen("&...;") <= 6 */ + + /* reset buffer */ + memset(buf, 0, sizeof(*buf)); + + /* read directory */ + if ((dirlen = scandir(res->internal_path, &e, NULL, compareent)) < 0) { + return S_FORBIDDEN; + } + + if (*progress == 0) { + /* write listing header (sizeof(esc) >= PATH_MAX) */ + html_escape(res->path, esc, MIN(PATH_MAX, sizeof(esc))); + if (buffer_appendf(buf, + "<!DOCTYPE html>\n<html>\n\t<head>" + "<title>Index of %s</title></head>\n" + "\t<body>\n\t\t<a href=\"..\">..</a>", + esc) < 0) { + s = S_REQUEST_TIMEOUT; + goto cleanup; + } + } + + /* listing entries */ + for (i = *progress; i < (size_t)dirlen; i++) { + /* skip hidden files, "." and ".." */ + if (e[i]->d_name[0] == '.') { + continue; + } + + /* entry line */ + html_escape(e[i]->d_name, esc, sizeof(esc)); + if (buffer_appendf(buf, + "<br />\n\t\t<a href=\"%s%s\">%s%s</a>", + esc, + (e[i]->d_type == DT_DIR) ? "/" : "", + esc, + suffix(e[i]->d_type))) { + /* buffer full */ + break; + } + } + *progress = i; + + if (*progress == (size_t)dirlen) { + /* listing footer */ + if (buffer_appendf(buf, "\n\t</body>\n</html>\n") < 0) { + s = S_REQUEST_TIMEOUT; + goto cleanup; + } + (*progress)++; + } + +cleanup: + while (dirlen--) { + free(e[dirlen]); + } + free(e); + + return s; +} + +enum status +data_prepare_error_buf(const struct response *res, struct buffer *buf, + size_t *progress) +{ + /* reset buffer */ + memset(buf, 0, sizeof(*buf)); + + if (*progress == 0) { + /* write error body */ + if (buffer_appendf(buf, + "<!DOCTYPE html>\n<html>\n\t<head>\n" + "\t\t<title>%d %s</title>\n\t</head>\n" + "\t<body>\n\t\t<h1>%d %s</h1>\n" + "\t</body>\n</html>\n", + res->status, status_str[res->status], + res->status, status_str[res->status])) { + return S_INTERNAL_SERVER_ERROR; + } + (*progress)++; + } + + return 0; +} + +enum status +data_prepare_file_buf(const struct response *res, struct buffer *buf, + size_t *progress) +{ + FILE *fp; + enum status s = 0; + ssize_t r; + size_t remaining; + + /* reset buffer */ + memset(buf, 0, sizeof(*buf)); + + /* open file */ + if (!(fp = fopen(res->internal_path, "r"))) { + s = S_FORBIDDEN; + goto cleanup; + } + + /* seek to lower bound + progress */ + if (fseek(fp, res->file.lower + *progress, SEEK_SET)) { + s = S_INTERNAL_SERVER_ERROR; + goto cleanup; + } + + /* read data into buf */ + remaining = res->file.upper - res->file.lower + 1 - *progress; + while ((r = fread(buf->data + buf->len, 1, + MIN(sizeof(buf->data) - buf->len, + remaining), fp))) { + if (r < 0) { + s = S_INTERNAL_SERVER_ERROR; + goto cleanup; + } + buf->len += r; + *progress += r; + remaining -= r; + } + +cleanup: + if (fp) { + fclose(fp); + } + + return s; +} diff --git a/data.h b/data.h @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#ifndef DATA_H +#define DATA_H + +#include "http.h" +#include "util.h" + +extern enum status (* const data_fct[])(const struct response *, + struct buffer *, size_t *); + +enum status data_prepare_dirlisting_buf(const struct response *, + struct buffer *, size_t *); +enum status data_prepare_error_buf(const struct response *, + struct buffer *, size_t *); +enum status data_prepare_file_buf(const struct response *, + struct buffer *, size_t *); + +#endif /* DATA_H */ diff --git a/http.c b/http.c @@ -0,0 +1,1046 @@ +/* See LICENSE file for copyright and license details. */ +#include <arpa/inet.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <netinet/in.h> +#include <regex.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <strings.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "config.h" +#include "http.h" +#include "util.h" + +const char *req_field_str[] = { + [REQ_HOST] = "Host", + [REQ_RANGE] = "Range", + [REQ_IF_MODIFIED_SINCE] = "If-Modified-Since", +}; + +const char *req_method_str[] = { + [M_GET] = "GET", + [M_HEAD] = "HEAD", +}; + +const char *status_str[] = { + [S_OK] = "OK", + [S_PARTIAL_CONTENT] = "Partial Content", + [S_MOVED_PERMANENTLY] = "Moved Permanently", + [S_NOT_MODIFIED] = "Not Modified", + [S_BAD_REQUEST] = "Bad Request", + [S_FORBIDDEN] = "Forbidden", + [S_NOT_FOUND] = "Not Found", + [S_METHOD_NOT_ALLOWED] = "Method Not Allowed", + [S_REQUEST_TIMEOUT] = "Request Time-out", + [S_RANGE_NOT_SATISFIABLE] = "Range Not Satisfiable", + [S_REQUEST_TOO_LARGE] = "Request Header Fields Too Large", + [S_INTERNAL_SERVER_ERROR] = "Internal Server Error", + [S_VERSION_NOT_SUPPORTED] = "HTTP Version not supported", +}; + +const char *res_field_str[] = { + [RES_ACCEPT_RANGES] = "Accept-Ranges", + [RES_ALLOW] = "Allow", + [RES_LOCATION] = "Location", + [RES_LAST_MODIFIED] = "Last-Modified", + [RES_CONTENT_LENGTH] = "Content-Length", + [RES_CONTENT_RANGE] = "Content-Range", + [RES_CONTENT_TYPE] = "Content-Type", +}; + +enum status +http_prepare_header_buf(const struct response *res, struct buffer *buf) +{ + char tstmp[FIELD_MAX]; + size_t i; + + /* reset buffer */ + memset(buf, 0, sizeof(*buf)); + + /* generate timestamp */ + if (timestamp(tstmp, sizeof(tstmp), time(NULL))) { + goto err; + } + + /* write data */ + if (buffer_appendf(buf, + "HTTP/1.1 %d %s\r\n" + "Date: %s\r\n" + "Connection: close\r\n", + res->status, status_str[res->status], tstmp)) { + goto err; + } + + for (i = 0; i < NUM_RES_FIELDS; i++) { + if (res->field[i][0] != '\0' && + buffer_appendf(buf, "%s: %s\r\n", res_field_str[i], + res->field[i])) { + goto err; + } + } + + if (buffer_appendf(buf, "\r\n")) { + goto err; + } + + return 0; +err: + memset(buf, 0, sizeof(*buf)); + return S_INTERNAL_SERVER_ERROR; +} + +enum status +http_send_buf(int fd, struct buffer *buf) +{ + ssize_t r; + + if (buf == NULL) { + return S_INTERNAL_SERVER_ERROR; + } + + while (buf->len > 0) { + if ((r = write(fd, buf->data, buf->len)) <= 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + /* + * socket is blocking, return normally. + * given the buffer still contains data, + * this indicates to the caller that we + * have been interrupted. + */ + return 0; + } else { + return S_REQUEST_TIMEOUT; + } + } + memmove(buf->data, buf->data + r, buf->len - r); + buf->len -= r; + } + + return 0; +} + +static void +decode(const char src[PATH_MAX], char dest[PATH_MAX]) +{ + size_t i; + uint8_t n; + const char *s; + + for (s = src, i = 0; *s; i++) { + if (*s == '%' && isxdigit((unsigned char)s[1]) && + isxdigit((unsigned char)s[2])) { + sscanf(s + 1, "%2hhx", &n); + dest[i] = n; + s += 3; + } else { + dest[i] = *s++; + } + } + dest[i] = '\0'; +} + +enum status +http_recv_header(int fd, struct buffer *buf, int *done) +{ + enum status s; + ssize_t r; + + while (1) { + if ((r = read(fd, buf->data + buf->len, + sizeof(buf->data) - buf->len)) < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + /* + * socket is drained, return normally, + * but set done to zero + */ + *done = 0; + return 0; + } else { + s = S_REQUEST_TIMEOUT; + goto err; + } + } else if (r == 0) { + /* + * unexpected EOF because the client probably + * hung up. This is technically a bad request, + * because it's incomplete + */ + s = S_BAD_REQUEST; + goto err; + } + buf->len += r; + + /* check if we are done (header terminated) */ + if (buf->len >= 4 && !memcmp(buf->data + buf->len - 4, + "\r\n\r\n", 4)) { + break; + } + + /* buffer is full or read over, but header is not terminated */ + if (r == 0 || buf->len == sizeof(buf->data)) { + s = S_REQUEST_TOO_LARGE; + goto err; + } + } + + /* header is complete, remove last \r\n and set done */ + buf->len -= 2; + *done = 1; + + return 0; +err: + memset(buf, 0, sizeof(*buf)); + return s; +} + +enum status +http_parse_header(const char *h, struct request *req) +{ + struct in6_addr addr; + size_t i, mlen; + const char *p, *q, *r, *s, *t; + char *m, *n; + + /* empty the request struct */ + memset(req, 0, sizeof(*req)); + + /* + * parse request line + */ + + /* METHOD */ + for (i = 0; i < NUM_REQ_METHODS; i++) { + mlen = strlen(req_method_str[i]); + if (!strncmp(req_method_str[i], h, mlen)) { + req->method = i; + break; + } + } + if (i == NUM_REQ_METHODS) { + return S_METHOD_NOT_ALLOWED; + } + + /* a single space must follow the method */ + if (h[mlen] != ' ') { + return S_BAD_REQUEST; + } + + /* basis for next step */ + p = h + mlen + 1; + + /* RESOURCE */ + + /* + * path?query#fragment + * ^ ^ ^ ^ + * | | | | + * p r s q + * + */ + if (!(q = strchr(p, ' '))) { + return S_BAD_REQUEST; + } + + /* search for first '?' */ + for (r = p; r < q; r++) { + if (!isprint(*r)) { + return S_BAD_REQUEST; + } + if (*r == '?') { + break; + } + } + if (r == q) { + /* not found */ + r = NULL; + } + + /* search for first '#' */ + for (s = p; s < q; s++) { + if (!isprint(*s)) { + return S_BAD_REQUEST; + } + if (*s == '#') { + break; + } + } + if (s == q) { + /* not found */ + s = NULL; + } + + if (r != NULL && s != NULL && s < r) { + /* + * '#' comes before '?' and thus the '?' is literal, + * because the query must come before the fragment + */ + r = NULL; + } + + /* write path using temporary endpointer t */ + if (r != NULL) { + /* resource contains a query, path ends at r */ + t = r; + } else if (s != NULL) { + /* resource contains only a fragment, path ends at s */ + t = s; + } else { + /* resource contains no queries, path ends at q */ + t = q; + } + if ((size_t)(t - p + 1) > LEN(req->path)) { + return S_REQUEST_TOO_LARGE; + } + memcpy(req->path, p, t - p); + req->path[t - p] = '\0'; + decode(req->path, req->path); + + /* write query if present */ + if (r != NULL) { + /* query ends either at s (if fragment present) or q */ + t = (s != NULL) ? s : q; + + if ((size_t)(t - (r + 1) + 1) > LEN(req->query)) { + return S_REQUEST_TOO_LARGE; + } + memcpy(req->query, r + 1, t - (r + 1)); + req->query[t - (r + 1)] = '\0'; + } + + /* write fragment if present */ + if (s != NULL) { + /* the fragment always starts at s + 1 and ends at q */ + if ((size_t)(q - (s + 1) + 1) > LEN(req->fragment)) { + return S_REQUEST_TOO_LARGE; + } + memcpy(req->fragment, s + 1, q - (s + 1)); + req->fragment[q - (s + 1)] = '\0'; + } + + /* basis for next step */ + p = q + 1; + + /* HTTP-VERSION */ + if (strncmp(p, "HTTP/", sizeof("HTTP/") - 1)) { + return S_BAD_REQUEST; + } + p += sizeof("HTTP/") - 1; + if (strncmp(p, "1.0", sizeof("1.0") - 1) && + strncmp(p, "1.1", sizeof("1.1") - 1)) { + return S_VERSION_NOT_SUPPORTED; + } + p += sizeof("1.*") - 1; + + /* check terminator */ + if (strncmp(p, "\r\n", sizeof("\r\n") - 1)) { + return S_BAD_REQUEST; + } + + /* basis for next step */ + p += sizeof("\r\n") - 1; + + /* + * parse request-fields + */ + + /* match field type */ + for (; *p != '\0';) { + for (i = 0; i < NUM_REQ_FIELDS; i++) { + if (!strncasecmp(p, req_field_str[i], + strlen(req_field_str[i]))) { + break; + } + } + if (i == NUM_REQ_FIELDS) { + /* unmatched field, skip this line */ + if (!(q = strstr(p, "\r\n"))) { + return S_BAD_REQUEST; + } + p = q + (sizeof("\r\n") - 1); + continue; + } + + p += strlen(req_field_str[i]); + + /* a single colon must follow the field name */ + if (*p != ':') { + return S_BAD_REQUEST; + } + + /* skip whitespace */ + for (++p; *p == ' ' || *p == '\t'; p++) + ; + + /* extract field content */ + if (!(q = strstr(p, "\r\n"))) { + return S_BAD_REQUEST; + } + if ((size_t)(q - p + 1) > LEN(req->field[i])) { + return S_REQUEST_TOO_LARGE; + } + memcpy(req->field[i], p, q - p); + req->field[i][q - p] = '\0'; + + /* go to next line */ + p = q + (sizeof("\r\n") - 1); + } + + /* + * clean up host + */ + + m = strrchr(req->field[REQ_HOST], ':'); + n = strrchr(req->field[REQ_HOST], ']'); + + /* strip port suffix but don't interfere with IPv6 bracket notation + * as per RFC 2732 */ + if (m && (!n || m > n)) { + /* port suffix must not be empty */ + if (*(m + 1) == '\0') { + return S_BAD_REQUEST; + } + *m = '\0'; + } + + /* strip the brackets from the IPv6 notation and validate the address */ + if (n) { + /* brackets must be on the outside */ + if (req->field[REQ_HOST][0] != '[' || *(n + 1) != '\0') { + return S_BAD_REQUEST; + } + + /* remove the right bracket */ + *n = '\0'; + m = req->field[REQ_HOST] + 1; + + /* validate the contained IPv6 address */ + if (inet_pton(AF_INET6, m, &addr) != 1) { + return S_BAD_REQUEST; + } + + /* copy it into the host field */ + memmove(req->field[REQ_HOST], m, n - m + 1); + } + + return 0; +} + +static void +encode(const char src[PATH_MAX], char dest[PATH_MAX]) +{ + size_t i; + const char *s; + + for (s = src, i = 0; *s && i < (PATH_MAX - 4); s++) { + if (iscntrl(*s) || (unsigned char)*s > 127) { + i += snprintf(dest + i, PATH_MAX - i, "%%%02X", + (unsigned char)*s); + } else { + dest[i] = *s; + i++; + } + } + dest[i] = '\0'; +} + +static enum status +path_normalize(char *uri, int *redirect) +{ + size_t len; + int last = 0; + char *p, *q; + + /* require and skip first slash */ + if (uri[0] != '/') { + return S_BAD_REQUEST; + } + p = uri + 1; + + /* get length of URI */ + len = strlen(p); + + for (; !last; ) { + /* bound uri component within (p,q) */ + if (!(q = strchr(p, '/'))) { + q = strchr(p, '\0'); + last = 1; + } + + if (*p == '\0') { + break; + } else if (p == q || (q - p == 1 && p[0] == '.')) { + /* "/" or "./" */ + goto squash; + } else if (q - p == 2 && p[0] == '.' && p[1] == '.') { + /* "../" */ + if (p != uri + 1) { + /* place p right after the previous / */ + for (p -= 2; p > uri && *p != '/'; p--); + p++; + } + goto squash; + } else { + /* move on */ + p = q + 1; + continue; + } +squash: + /* squash (p,q) into void */ + if (last) { + *p = '\0'; + len = p - uri; + } else { + memmove(p, q + 1, len - ((q + 1) - uri) + 2); + len -= (q + 1) - p; + } + if (redirect != NULL) { + *redirect = 1; + } + } + + return 0; +} + +static enum status +path_add_vhost_prefix(char uri[PATH_MAX], int *redirect, + const struct server *srv, const struct response *res) +{ + if (srv->vhost && res->vhost && res->vhost->prefix) { + if (prepend(uri, PATH_MAX, res->vhost->prefix)) { + return S_REQUEST_TOO_LARGE; + } + if (redirect != NULL) { + *redirect = 1; + } + } + + return 0; +} + +static enum status +path_apply_prefix_mapping(char uri[PATH_MAX], int *redirect, + const struct server *srv, const struct response *res) +{ + size_t i, len; + + for (i = 0; i < srv->map_len; i++) { + len = strlen(srv->map[i].from); + if (!strncmp(uri, srv->map[i].from, len)) { + /* + * if vhosts are enabled only apply mappings + * defined for the current canonical host + */ + if (srv->vhost && res->vhost && srv->map[i].chost && + strcmp(srv->map[i].chost, res->vhost->chost)) { + continue; + } + + /* swap out URI prefix */ + memmove(uri, uri + len, strlen(uri) + 1); + if (prepend(uri, PATH_MAX, srv->map[i].to)) { + return S_REQUEST_TOO_LARGE; + } + + if (redirect != NULL) { + *redirect = 1; + } + + /* break so we don't possibly hit an infinite loop */ + break; + } + } + + return 0; +} + +static enum status +path_ensure_dirslash(char uri[PATH_MAX], int *redirect) +{ + size_t len; + + /* append '/' to URI if not present */ + len = strlen(uri); + if (len + 1 + 1 > PATH_MAX) { + return S_REQUEST_TOO_LARGE; + } + if (len > 0 && uri[len - 1] != '/') { + uri[len] = '/'; + uri[len + 1] = '\0'; + if (redirect != NULL) { + *redirect = 1; + } + } + + return 0; +} + +static enum status +parse_range(const char *str, size_t size, size_t *lower, size_t *upper) +{ + char first[FIELD_MAX], last[FIELD_MAX]; + const char *p, *q, *r, *err; + + /* default to the complete range */ + *lower = 0; + *upper = size - 1; + + /* done if no range-string is given */ + if (str == NULL || *str == '\0') { + return 0; + } + + /* skip opening statement */ + if (strncmp(str, "bytes=", sizeof("bytes=") - 1)) { + return S_BAD_REQUEST; + } + p = str + (sizeof("bytes=") - 1); + + /* check string (should only contain numbers and a hyphen) */ + for (r = p, q = NULL; *r != '\0'; r++) { + if (*r < '0' || *r > '9') { + if (*r == '-') { + if (q != NULL) { + /* we have already seen a hyphen */ + return S_BAD_REQUEST; + } else { + /* place q after the hyphen */ + q = r + 1; + } + } else if (*r == ',' && r > p) { + /* + * we refuse to accept range-lists out + * of spite towards this horrible part + * of the spec + */ + return S_RANGE_NOT_SATISFIABLE; + } else { + return S_BAD_REQUEST; + } + } + } + if (q == NULL) { + /* the input string must contain a hyphen */ + return S_BAD_REQUEST; + } + r = q + strlen(q); + + /* + * byte-range=first-last\0 + * ^ ^ ^ + * | | | + * p q r + */ + + /* copy 'first' and 'last' to their respective arrays */ + if ((size_t)((q - 1) - p + 1) > sizeof(first) || + (size_t)(r - q + 1) > sizeof(last)) { + return S_REQUEST_TOO_LARGE; + } + memcpy(first, p, (q - 1) - p); + first[(q - 1) - p] = '\0'; + memcpy(last, q, r - q); + last[r - q] = '\0'; + + if (first[0] != '\0') { + /* + * range has format "first-last" or "first-", + * i.e. return bytes 'first' to 'last' (or the + * last byte if 'last' is not given), + * inclusively, and byte-numbering beginning at 0 + */ + *lower = strtonum(first, 0, MIN(SIZE_MAX, LLONG_MAX), + &err); + if (!err) { + if (last[0] != '\0') { + *upper = strtonum(last, 0, + MIN(SIZE_MAX, LLONG_MAX), + &err); + } else { + *upper = size - 1; + } + } + if (err) { + /* one of the strtonum()'s failed */ + return S_BAD_REQUEST; + } + + /* check ranges */ + if (*lower > *upper || *lower >= size) { + return S_RANGE_NOT_SATISFIABLE; + } + + /* adjust upper limit to be at most the last byte */ + *upper = MIN(*upper, size - 1); + } else { + /* last must not also be empty */ + if (last[0] == '\0') { + return S_BAD_REQUEST; + } + + /* + * Range has format "-num", i.e. return the 'num' + * last bytes + */ + + /* + * use upper as a temporary storage for 'num', + * as we know 'upper' is size - 1 + */ + *upper = strtonum(last, 0, MIN(SIZE_MAX, LLONG_MAX), &err); + if (err) { + return S_BAD_REQUEST; + } + + /* determine lower */ + if (*upper > size) { + /* more bytes requested than we have */ + *lower = 0; + } else { + *lower = size - *upper; + } + + /* set upper to the correct value */ + *upper = size - 1; + } + + return 0; +} + +void +http_prepare_response(const struct request *req, struct response *res, + const struct server *srv) +{ + enum status s, tmps; + struct in6_addr addr; + struct stat st; + struct tm tm = { 0 }; + size_t i; + int redirect, hasport, ipv6host; + static char tmppath[PATH_MAX]; + char *p, *mime; + + /* empty all response fields */ + memset(res, 0, sizeof(*res)); + + /* determine virtual host */ + if (srv->vhost) { + for (i = 0; i < srv->vhost_len; i++) { + if (!regexec(&(srv->vhost[i].re), + req->field[REQ_HOST], 0, NULL, 0)) { + /* we have a matching vhost */ + res->vhost = &(srv->vhost[i]); + break; + } + } + if (i == srv->vhost_len) { + s = S_NOT_FOUND; + goto err; + } + } + + /* copy request-path to response-path and clean it up */ + redirect = 0; + memcpy(res->path, req->path, MIN(sizeof(res->path), sizeof(req->path))); + if ((tmps = path_normalize(res->path, &redirect)) || + (tmps = path_add_vhost_prefix(res->path, &redirect, srv, res)) || + (tmps = path_apply_prefix_mapping(res->path, &redirect, srv, res)) || + (tmps = path_normalize(res->path, &redirect))) { + s = tmps; + goto err; + } + + /* redirect all non-canonical hosts to their canonical forms */ + if (srv->vhost && res->vhost && + strcmp(req->field[REQ_HOST], res->vhost->chost)) { + redirect = 1; + } + + /* reject all non-well-known hidden targets (see RFC 8615) */ + if (strstr(res->path, "/.") && strncmp(res->path, "/.well-known/", + sizeof("/.well-known/") - 1)) { + s = S_FORBIDDEN; + goto err; + } + + /* + * generate and stat internal path based on the cleaned up request + * path and the virtual host while ignoring query and fragment + * (valid according to RFC 3986) + */ + if (esnprintf(res->internal_path, sizeof(res->internal_path), "/%s/%s", + (srv->vhost && res->vhost) ? res->vhost->dir : "", + res->path)) { + s = S_REQUEST_TOO_LARGE; + goto err; + } + if ((tmps = path_normalize(res->internal_path, NULL))) { + s = tmps; + goto err; + } + if (stat(res->internal_path, &st) < 0) { + s = (errno == EACCES) ? S_FORBIDDEN : S_NOT_FOUND; + goto err; + } + + /* + * if the path points at a directory, make sure both the path + * and internal path have a trailing slash + */ + if (S_ISDIR(st.st_mode)) { + if ((tmps = path_ensure_dirslash(res->path, &redirect)) || + (tmps = path_ensure_dirslash(res->internal_path, NULL))) { + s = tmps; + goto err; + } + } + + /* redirect if the path-cleanup necessitated it earlier */ + if (redirect) { + res->status = S_MOVED_PERMANENTLY; + + /* encode path */ + encode(res->path, tmppath); + + /* determine target location */ + if (srv->vhost && res->vhost) { + /* absolute redirection URL */ + + /* do we need to add a port to the Location? */ + hasport = srv->port && strcmp(srv->port, "80"); + + /* RFC 2732 specifies to use brackets for IPv6-addresses + * in URLs, so we need to check if our host is one and + * honor that later when we fill the "Location"-field */ + if ((ipv6host = inet_pton(AF_INET6, res->vhost->chost, + &addr)) < 0) { + s = S_INTERNAL_SERVER_ERROR; + goto err; + } + + /* + * write location to response struct (re-including + * the query and fragment, if present) + */ + if (esnprintf(res->field[RES_LOCATION], + sizeof(res->field[RES_LOCATION]), + "//%s%s%s%s%s%s%s%s%s%s", + ipv6host ? "[" : "", + res->vhost->chost, + ipv6host ? "]" : "", + hasport ? ":" : "", + hasport ? srv->port : "", + tmppath, + req->query[0] ? "?" : "", + req->query, + req->fragment[0] ? "#" : "", + req->fragment)) { + s = S_REQUEST_TOO_LARGE; + goto err; + } + } else { + /* + * write relative redirection URI to response struct + * (re-including the query and fragment, if present) + */ + if (esnprintf(res->field[RES_LOCATION], + sizeof(res->field[RES_LOCATION]), + "%s%s%s%s%s", + tmppath, + req->query[0] ? "?" : "", + req->query, + req->fragment[0] ? "#" : "", + req->fragment)) { + s = S_REQUEST_TOO_LARGE; + goto err; + } + } + + return; + } + + if (S_ISDIR(st.st_mode)) { + /* + * when we serve a directory, we first check if there + * exists a directory index. If not, we either make + * a directory listing (if enabled) or send an error + */ + + /* + * append docindex to internal_path temporarily + * (internal_path is guaranteed to end with '/') + */ + if (esnprintf(tmppath, sizeof(tmppath), "%s%s", + res->internal_path, srv->docindex)) { + s = S_REQUEST_TOO_LARGE; + goto err; + } + + /* stat the temporary path, which must be a regular file */ + if (stat(tmppath, &st) < 0 || !S_ISREG(st.st_mode)) { + if (srv->listdirs) { + /* serve directory listing */ + + /* check if directory is accessible */ + if (access(res->internal_path, R_OK) != 0) { + s = S_FORBIDDEN; + goto err; + } else { + res->status = S_OK; + } + res->type = RESTYPE_DIRLISTING; + + if (esnprintf(res->field[RES_CONTENT_TYPE], + sizeof(res->field[RES_CONTENT_TYPE]), + "%s", "text/html; charset=utf-8")) { + s = S_INTERNAL_SERVER_ERROR; + goto err; + } + + return; + } else { + /* reject */ + s = (!S_ISREG(st.st_mode) || errno == EACCES) ? + S_FORBIDDEN : S_NOT_FOUND; + goto err; + } + } else { + /* the docindex exists; copy tmppath to internal path */ + if (esnprintf(res->internal_path, + sizeof(res->internal_path), "%s", + tmppath)) { + s = S_REQUEST_TOO_LARGE; + goto err; + } + } + } + + /* modified since */ + if (req->field[REQ_IF_MODIFIED_SINCE][0]) { + /* parse field */ + if (!strptime(req->field[REQ_IF_MODIFIED_SINCE], + "%a, %d %b %Y %T GMT", &tm)) { + s = S_BAD_REQUEST; + goto err; + } + + /* compare with last modification date of the file */ + if (difftime(st.st_mtim.tv_sec, timegm(&tm)) <= 0) { + res->status = S_NOT_MODIFIED; + return; + } + } + + /* range */ + if ((s = parse_range(req->field[REQ_RANGE], st.st_size, + &(res->file.lower), &(res->file.upper)))) { + if (s == S_RANGE_NOT_SATISFIABLE) { + res->status = S_RANGE_NOT_SATISFIABLE; + + if (esnprintf(res->field[RES_CONTENT_RANGE], + sizeof(res->field[RES_CONTENT_RANGE]), + "bytes */%zu", st.st_size)) { + s = S_INTERNAL_SERVER_ERROR; + goto err; + } + + return; + } else { + goto err; + } + } + + /* mime */ + mime = "application/octet-stream"; + if ((p = strrchr(res->internal_path, '.'))) { + for (i = 0; i < LEN(mimes); i++) { + if (!strcmp(mimes[i].ext, p + 1)) { + mime = mimes[i].type; + break; + } + } + } + + /* fill response struct */ + res->type = RESTYPE_FILE; + + /* check if file is readable */ + res->status = (access(res->internal_path, R_OK)) ? S_FORBIDDEN : + (req->field[REQ_RANGE][0] != '\0') ? + S_PARTIAL_CONTENT : S_OK; + + if (esnprintf(res->field[RES_ACCEPT_RANGES], + sizeof(res->field[RES_ACCEPT_RANGES]), + "%s", "bytes")) { + s = S_INTERNAL_SERVER_ERROR; + goto err; + } + + if (esnprintf(res->field[RES_CONTENT_LENGTH], + sizeof(res->field[RES_CONTENT_LENGTH]), + "%zu", res->file.upper - res->file.lower + 1)) { + s = S_INTERNAL_SERVER_ERROR; + goto err; + } + if (req->field[REQ_RANGE][0] != '\0') { + if (esnprintf(res->field[RES_CONTENT_RANGE], + sizeof(res->field[RES_CONTENT_RANGE]), + "bytes %zd-%zd/%zu", res->file.lower, + res->file.upper, st.st_size)) { + s = S_INTERNAL_SERVER_ERROR; + goto err; + } + } + if (esnprintf(res->field[RES_CONTENT_TYPE], + sizeof(res->field[RES_CONTENT_TYPE]), + "%s", mime)) { + s = S_INTERNAL_SERVER_ERROR; + goto err; + } + if (timestamp(res->field[RES_LAST_MODIFIED], + sizeof(res->field[RES_LAST_MODIFIED]), + st.st_mtim.tv_sec)) { + s = S_INTERNAL_SERVER_ERROR; + goto err; + } + + return; +err: + http_prepare_error_response(req, res, s); +} + +void +http_prepare_error_response(const struct request *req, + struct response *res, enum status s) +{ + /* used later */ + (void)req; + + /* empty all response fields */ + memset(res, 0, sizeof(*res)); + + res->type = RESTYPE_ERROR; + res->status = s; + + if (esnprintf(res->field[RES_CONTENT_TYPE], + sizeof(res->field[RES_CONTENT_TYPE]), + "text/html; charset=utf-8")) { + res->status = S_INTERNAL_SERVER_ERROR; + } + + if (res->status == S_METHOD_NOT_ALLOWED) { + if (esnprintf(res->field[RES_ALLOW], + sizeof(res->field[RES_ALLOW]), + "Allow: GET, HEAD")) { + res->status = S_INTERNAL_SERVER_ERROR; + } + } +} diff --git a/http.h b/http.h @@ -0,0 +1,97 @@ +/* See LICENSE file for copyright and license details. */ +#ifndef HTTP_H +#define HTTP_H + +#include <limits.h> +#include <sys/socket.h> + +#include "config.h" +#include "server.h" +#include "util.h" + +enum req_field { + REQ_HOST, + REQ_RANGE, + REQ_IF_MODIFIED_SINCE, + NUM_REQ_FIELDS, +}; + +extern const char *req_field_str[]; + +enum req_method { + M_GET, + M_HEAD, + NUM_REQ_METHODS, +}; + +extern const char *req_method_str[]; + +struct request { + enum req_method method; + char path[PATH_MAX]; + char query[FIELD_MAX]; + char fragment[FIELD_MAX]; + char field[NUM_REQ_FIELDS][FIELD_MAX]; +}; + +enum status { + S_OK = 200, + S_PARTIAL_CONTENT = 206, + S_MOVED_PERMANENTLY = 301, + S_NOT_MODIFIED = 304, + S_BAD_REQUEST = 400, + S_FORBIDDEN = 403, + S_NOT_FOUND = 404, + S_METHOD_NOT_ALLOWED = 405, + S_REQUEST_TIMEOUT = 408, + S_RANGE_NOT_SATISFIABLE = 416, + S_REQUEST_TOO_LARGE = 431, + S_INTERNAL_SERVER_ERROR = 500, + S_VERSION_NOT_SUPPORTED = 505, +}; + +extern const char *status_str[]; + +enum res_field { + RES_ACCEPT_RANGES, + RES_ALLOW, + RES_LOCATION, + RES_LAST_MODIFIED, + RES_CONTENT_LENGTH, + RES_CONTENT_RANGE, + RES_CONTENT_TYPE, + NUM_RES_FIELDS, +}; + +extern const char *res_field_str[]; + +enum res_type { + RESTYPE_DIRLISTING, + RESTYPE_ERROR, + RESTYPE_FILE, + NUM_RES_TYPES, +}; + +struct response { + enum res_type type; + enum status status; + char field[NUM_RES_FIELDS][FIELD_MAX]; + char path[PATH_MAX]; + char internal_path[PATH_MAX]; + struct vhost *vhost; + struct { + size_t lower; + size_t upper; + } file; +}; + +enum status http_prepare_header_buf(const struct response *, struct buffer *); +enum status http_send_buf(int, struct buffer *); +enum status http_recv_header(int, struct buffer *, int *); +enum status http_parse_header(const char *, struct request *); +void http_prepare_response(const struct request *, struct response *, + const struct server *); +void http_prepare_error_response(const struct request *, + struct response *, enum status); + +#endif /* HTTP_H */ diff --git a/main.c b/main.c @@ -0,0 +1,364 @@ +/* See LICENSE file for copyright and license details. */ +#include <errno.h> +#include <grp.h> +#include <limits.h> +#include <pwd.h> +#include <regex.h> +#include <signal.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "arg.h" +#include "server.h" +#include "sock.h" +#include "util.h" + +static char *udsname; + +static void +cleanup(void) +{ + if (udsname) { + sock_rem_uds(udsname); + } +} + +static void +sigcleanup(int sig) +{ + cleanup(); + kill(0, sig); + _exit(1); +} + +static void +handlesignals(void(*hdl)(int)) +{ + struct sigaction sa = { + .sa_handler = hdl, + }; + + sigemptyset(&sa.sa_mask); + sigaction(SIGTERM, &sa, NULL); + sigaction(SIGHUP, &sa, NULL); + sigaction(SIGINT, &sa, NULL); + sigaction(SIGQUIT, &sa, NULL); +} + +static void +usage(void) +{ + const char *opts = "[-u user] [-g group] [-n num] [-d dir] [-l] " + "[-i file] [-v vhost] ... [-m map] ..."; + + die("usage: %s -p port [-h host] %s\n" + " %s -U file [-p port] %s", argv0, + opts, argv0, opts); +} + +int +main(int argc, char *argv[]) +{ + struct group *grp = NULL; + struct passwd *pwd = NULL; + struct rlimit rlim; + struct server srv = { + .docindex = "index.html", + }; + size_t i; + int insock, status = 0; + const char *err; + char *tok[4]; + + /* defaults */ + size_t nthreads = 4; + size_t nslots = 64; + char *servedir = "."; + char *user = "nobody"; + char *group = "nogroup"; + + ARGBEGIN { + case 'd': + servedir = EARGF(usage()); + break; + case 'g': + group = EARGF(usage()); + break; + case 'h': + srv.host = EARGF(usage()); + break; + case 'i': + srv.docindex = EARGF(usage()); + if (strchr(srv.docindex, '/')) { + die("The document index must not contain '/'"); + } + break; + case 'l': + srv.listdirs = 1; + break; + case 'm': + if (spacetok(EARGF(usage()), tok, 3) || !tok[0] || !tok[1]) { + usage(); + } + if (!(srv.map = reallocarray(srv.map, ++srv.map_len, + sizeof(struct map)))) { + die("reallocarray:"); + } + srv.map[srv.map_len - 1].from = tok[0]; + srv.map[srv.map_len - 1].to = tok[1]; + srv.map[srv.map_len - 1].chost = tok[2]; + break; + case 's': + err = NULL; + nslots = strtonum(EARGF(usage()), 1, INT_MAX, &err); + if (err) { + die("strtonum '%s': %s", EARGF(usage()), err); + } + break; + case 't': + err = NULL; + nthreads = strtonum(EARGF(usage()), 1, INT_MAX, &err); + if (err) { + die("strtonum '%s': %s", EARGF(usage()), err); + } + break; + case 'p': + srv.port = EARGF(usage()); + break; + case 'U': + udsname = EARGF(usage()); + break; + case 'u': + user = EARGF(usage()); + break; + case 'v': + if (spacetok(EARGF(usage()), tok, 4) || !tok[0] || !tok[1] || + !tok[2]) { + usage(); + } + if (!(srv.vhost = reallocarray(srv.vhost, ++srv.vhost_len, + sizeof(*srv.vhost)))) { + die("reallocarray:"); + } + srv.vhost[srv.vhost_len - 1].chost = tok[0]; + srv.vhost[srv.vhost_len - 1].regex = tok[1]; + srv.vhost[srv.vhost_len - 1].dir = tok[2]; + srv.vhost[srv.vhost_len - 1].prefix = tok[3]; + break; + default: + usage(); + } ARGEND + + if (argc) { + usage(); + } + + /* can't have both host and UDS but must have one of port or UDS*/ + if ((srv.host && udsname) || !(srv.port || udsname)) { + usage(); + } + + if (udsname && (!access(udsname, F_OK) || errno != ENOENT)) { + die("UNIX-domain socket '%s': %s", udsname, errno ? + strerror(errno) : "File exists"); + } + + /* compile and check the supplied vhost regexes */ + for (i = 0; i < srv.vhost_len; i++) { + if (regcomp(&srv.vhost[i].re, srv.vhost[i].regex, + REG_EXTENDED | REG_ICASE | REG_NOSUB)) { + die("regcomp '%s': invalid regex", + srv.vhost[i].regex); + } + } + + /* validate user and group */ + errno = 0; + if (!user || !(pwd = getpwnam(user))) { + die("getpwnam '%s': %s", user ? user : "null", + errno ? strerror(errno) : "Entry not found"); + } + errno = 0; + if (!group || !(grp = getgrnam(group))) { + die("getgrnam '%s': %s", group ? group : "null", + errno ? strerror(errno) : "Entry not found"); + } + + /* open a new process group */ + setpgid(0, 0); + + handlesignals(sigcleanup); + + /* + * set the maximum number of open file descriptors as needed + * - 3 initial fd's + * - nthreads fd's for the listening socket + * - (nthreads * nslots) fd's for the connection-fd + * - (5 * nthreads) fd's for general purpose thread-use + */ + rlim.rlim_cur = rlim.rlim_max = 3 + nthreads + nthreads * nslots + + 5 * nthreads; + if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { + if (errno == EPERM) { + die("You need to run as root or have " + "CAP_SYS_RESOURCE set, or are asking for more " + "file descriptors than the system can offer"); + } else { + die("setrlimit:"); + } + } + + /* + * create the (non-blocking) listening socket + * + * we could use SO_REUSEPORT and create a listening socket for + * each thread (for better load-balancing, given each thread + * would get his own kernel-queue), but this increases latency + * (as a thread might get stuck on a larger request, making all + * other request wait in line behind it). + * + * socket contention with a single listening socket is a + * non-issue and thread-load-balancing is better fixed in the + * kernel by changing epoll-sheduling from a FIFO- to a + * LIFO-model, especially as it doesn't affect performance + */ + insock = udsname ? sock_get_uds(udsname, pwd->pw_uid, grp->gr_gid) : + sock_get_ips(srv.host, srv.port); + if (sock_set_nonblocking(insock)) { + return 1; + } + + /* + * before dropping privileges, we fork, as we need to remove + * the UNIX-domain socket when we shut down, which we need + * privileges for + */ + switch (fork()) { + case -1: + warn("fork:"); + break; + case 0: + /* restore default handlers */ + handlesignals(SIG_DFL); + + /* reap children automatically */ + if (signal(SIGCHLD, SIG_IGN) == SIG_ERR) { + die("signal: Failed to set SIG_IGN on SIGCHLD"); + } + if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) { + die("signal: Failed to set SIG_IGN on SIGPIPE"); + } + + /* + * try increasing the thread-limit by the number + * of threads we need (which is the only reliable + * workaround I know given the thread-limit is per user + * rather than per process), but ignore EPERM errors, + * because this most probably means the user has already + * set the value to the kernel's limit, and there's not + * much we can do in any other case. + * There's also no danger of overflow as the value + * returned by getrlimit() is way below the limits of the + * rlim_t datatype. + */ + if (getrlimit(RLIMIT_NPROC, &rlim) < 0) { + die("getrlimit:"); + } + if (rlim.rlim_max == RLIM_INFINITY) { + if (rlim.rlim_cur != RLIM_INFINITY) { + /* try increasing current limit by nthreads */ + rlim.rlim_cur += nthreads; + } + } else { + /* try increasing current and hard limit by nthreads */ + rlim.rlim_cur = rlim.rlim_max += nthreads; + } + if (setrlimit(RLIMIT_NPROC, &rlim) < 0 && errno != EPERM) { + die("setrlimit()"); + } + + /* limit ourselves to reading the servedir and block further unveils */ + eunveil(servedir, "r"); + eunveil(NULL, NULL); + + /* chroot */ + if (chdir(servedir) < 0) { + die("chdir '%s':", servedir); + } + if (chroot(".") < 0) { + if (errno == EPERM) { + die("You need to run as root or have " + "CAP_SYS_CHROOT set"); + } else { + die("chroot:"); + } + } + + /* drop root */ + if (pwd->pw_uid == 0 || grp->gr_gid == 0) { + die("Won't run under root %s for hopefully obvious reasons", + (pwd->pw_uid == 0) ? (grp->gr_gid == 0) ? + "user and group" : "user" : "group"); + } + + if (setgroups(1, &(grp->gr_gid)) < 0) { + if (errno == EPERM) { + die("You need to run as root or have " + "CAP_SETGID set"); + } else { + die("setgroups:"); + } + } + if (setgid(grp->gr_gid) < 0) { + if (errno == EPERM) { + die("You need to run as root or have " + "CAP_SETGID set"); + } else { + die("setgid:"); + } + + } + if (setuid(pwd->pw_uid) < 0) { + if (errno == EPERM) { + die("You need to run as root or have " + "CAP_SETUID set"); + } else { + die("setuid:"); + } + } + + if (udsname) { + epledge("stdio rpath proc unix", NULL); + } else { + epledge("stdio rpath proc inet", NULL); + } + + /* accept incoming connections */ + server_init_thread_pool(insock, nthreads, nslots, &srv); + + exit(0); + default: + /* limit ourselves even further while we are waiting */ + if (udsname) { + eunveil(udsname, "c"); + eunveil(NULL, NULL); + epledge("stdio cpath", NULL); + } else { + eunveil("/", ""); + eunveil(NULL, NULL); + epledge("stdio", NULL); + } + + while (wait(&status) > 0) + ; + } + + cleanup(); + return status; +} diff --git a/quark.1 b/quark.1 @@ -0,0 +1,137 @@ +.Dd 2020-09-27 +.Dt QUARK 1 +.Os suckless.org +.Sh NAME +.Nm quark +.Nd simple static web server +.Sh SYNOPSIS +.Nm +.Fl p Ar port +.Op Fl h Ar host +.Op Fl u Ar user +.Op Fl g Ar group +.Op Fl s Ar num +.Op Fl t Ar num +.Op Fl d Ar dir +.Op Fl l +.Op Fl i Ar file +.Oo Fl v Ar vhost Oc ... +.Oo Fl m Ar map Oc ... +.Nm +.Fl U Ar file +.Op Fl p Ar port +.Op Fl u Ar user +.Op Fl g Ar group +.Op Fl s Ar num +.Op Fl t Ar num +.Op Fl d Ar dir +.Op Fl l +.Op Fl i Ar file +.Oo Fl v Ar vhost Oc ... +.Oo Fl m Ar map Oc ... +.Sh DESCRIPTION +.Nm +is a simple HTTP GET/HEAD-only web server for static content. +It supports virtual hosts (see +.Fl v ) , +explicit redirects (see +.Fl m ) , +directory listings (see +.Fl l ) , +conditional "If-Modified-Since"-requests (RFC 7232), range requests +(RFC 7233) and well-known URIs (RFC 8615), while refusing to serve +hidden files and directories. +.Sh OPTIONS +.Bl -tag -width Ds +.It Fl d Ar dir +Serve +.Ar dir +after chrooting into it. +The default is ".". +.It Fl g Ar group +Set group ID when dropping privileges, and in socket mode the group of the +socket file, to the ID of +.Ar group . +The default is "nogroup". +.It Fl h Ar host +Use +.Ar host +as the server hostname. +The default is the loopback interface (i.e. localhost). +.It Fl i Ar file +Set +.Ar file +as the directory index. +The default is "index.html". +.It Fl l +Enable directory listing. +.It Fl m Ar map +Add the URI prefix mapping rule specified by +.Ar map , +which has the form +.Qq Pa from to [chost] , +where each element is separated with spaces (0x20) that can be +escaped with '\\'. +.Pp +The prefix +.Pa from +of all matching URIs is replaced with +.Pa to , +optionally limited to the canonical virtual host +.Pa chost . +If no virtual hosts are given, +.Pa chost +is ignored. +.It Fl p Ar port +In host mode, listen on port +.Ar port +for incoming connections. +In socket mode, use +.Ar port +for constructing proper virtual host +redirects on non-standard ports. +.It Fl U Ar file +Create the UNIX-domain socket +.Ar file , +listen on it for incoming connections and remove it on exit. +.It Fl s Ar num +Set the number of connection slots per worker thread to +.Ar num . +The default is 64. +.It Fl t Ar num +Set the number of worker threads to +.Ar num . +The default is 4. +.It Fl u Ar user +Set user ID when dropping privileges, +and in socket mode the user of the socket file, +to the ID of +.Ar user . +The default is "nobody". +.It Fl v Ar vhost +Add the virtual host specified by +.Ar vhost , +which has the form +.Qq Pa chost regex dir [prefix] , +where each element is separated with spaces (0x20) that can be +escaped with '\\'. +.Pp +A request matching the virtual host regular expression +.Pa regex +(see +.Xr regex 3 ) +is redirected to the canonical host +.Pa chost , +if they differ, using the directory +.Pa dir +as the root directory, optionally prefixing the URI with +.Pa prefix . +If any virtual hosts are specified, all requests on non-matching +hosts are discarded. +.El +.Sh CUSTOMIZATION +.Nm +can be customized by creating a custom config.h from config.def.h and +(re)compiling the source code. This keeps it fast, secure and simple. +.Sh AUTHORS +.An Laslo Hunhold Aq Mt dev@frign.de diff --git a/queue.c b/queue.c @@ -0,0 +1,217 @@ +/* See LICENSE file for copyright and license details. */ +#include <stddef.h> + +#ifdef __linux__ + #include <sys/epoll.h> +#else + #include <sys/types.h> + #include <sys/event.h> + #include <sys/time.h> +#endif + +#include "queue.h" +#include "util.h" + +int +queue_create(void) +{ + int qfd; + + #ifdef __linux__ + if ((qfd = epoll_create1(0)) < 0) { + warn("epoll_create1:"); + } + #else + if ((qfd = kqueue()) < 0) { + warn("kqueue:"); + } + #endif + + return qfd; +} + +int +queue_add_fd(int qfd, int fd, enum queue_event_type t, int shared, + const void *data) +{ + #ifdef __linux__ + struct epoll_event e; + + /* set event flag */ + if (shared) { + /* + * if the fd is shared, "exclusive" is the only + * way to avoid spurious wakeups and "blocking" + * accept()'s. + */ + e.events = EPOLLEXCLUSIVE; + } else { + /* + * if we have the fd for ourselves (i.e. only + * within the thread), we want to be + * edge-triggered, as our logic makes sure + * that the buffers are drained when we return + * to epoll_wait() + */ + e.events = EPOLLET; + } + + switch (t) { + case QUEUE_EVENT_IN: + e.events |= EPOLLIN; + break; + case QUEUE_EVENT_OUT: + e.events |= EPOLLOUT; + break; + } + + /* set data pointer */ + e.data.ptr = (void *)data; + + /* register fd in the interest list */ + if (epoll_ctl(qfd, EPOLL_CTL_ADD, fd, &e) < 0) { + warn("epoll_ctl:"); + return -1; + } + #else + struct kevent e; + int events; + + /* prepare event flag */ + events = (shared) ? 0 : EV_CLEAR; + + switch (t) { + case QUEUE_EVENT_IN: + events |= EVFILT_READ; + break; + case QUEUE_EVENT_OUT: + events |= EVFILT_WRITE; + break; + } + + EV_SET(&e, fd, events, EV_ADD, 0, 0, (void *)data); + + if (kevent(qfd, &e, 1, NULL, 0, NULL) < 0) { + warn("kevent:"); + return -1; + } + #endif + + return 0; +} + +int +queue_mod_fd(int qfd, int fd, enum queue_event_type t, const void *data) +{ + #ifdef __linux__ + struct epoll_event e; + + /* set event flag (only for non-shared fd's) */ + e.events = EPOLLET; + + switch (t) { + case QUEUE_EVENT_IN: + e.events |= EPOLLIN; + break; + case QUEUE_EVENT_OUT: + e.events |= EPOLLOUT; + break; + } + + /* set data pointer */ + e.data.ptr = (void *)data; + + /* register fd in the interest list */ + if (epoll_ctl(qfd, EPOLL_CTL_MOD, fd, &e) < 0) { + warn("epoll_ctl:"); + return -1; + } + #else + struct kevent e; + int events; + + events = EV_CLEAR; + + switch (t) { + case QUEUE_EVENT_IN: + events |= EVFILT_READ; + break; + case QUEUE_EVENT_OUT: + events |= EVFILT_WRITE; + break; + } + + EV_SET(&e, fd, events, EV_ADD, 0, 0, (void *)data); + + if (kevent(qfd, &e, 1, NULL, 0, NULL) < 0) { + warn("kevent:"); + return -1; + } + #endif + + return 0; +} + +int +queue_rem_fd(int qfd, int fd) +{ + #ifdef __linux__ + struct epoll_event e; + + if (epoll_ctl(qfd, EPOLL_CTL_DEL, fd, &e) < 0) { + warn("epoll_ctl:"); + return -1; + } + #else + struct kevent e; + + EV_SET(&e, fd, 0, EV_DELETE, 0, 0, 0); + + if (kevent(qfd, &e, 1, NULL, 0, NULL) < 0) { + warn("kevent:"); + return -1; + } + #endif + + return 0; +} + +ssize_t +queue_wait(int qfd, queue_event *e, size_t elen) +{ + ssize_t nready; + + #ifdef __linux__ + if ((nready = epoll_wait(qfd, e, elen, -1)) < 0) { + warn("epoll_wait:"); + return -1; + } + #else + if ((nready = kevent(qfd, NULL, 0, e, elen, NULL)) < 0) { + warn("kevent:"); + return -1; + } + #endif + + return nready; +} + +void * +queue_event_get_data(const queue_event *e) +{ + #ifdef __linux__ + return e->data.ptr; + #else + return e->udata; + #endif +} + +int +queue_event_is_error(const queue_event *e) +{ + #ifdef __linux__ + return (e->events & ~(EPOLLIN | EPOLLOUT)) ? 1 : 0; + #else + return (e->flags & EV_EOF) ? 1 : 0; + #endif +} diff --git a/queue.h b/queue.h @@ -0,0 +1,33 @@ +#ifndef QUEUE_H +#define QUEUE_H + +#include <stddef.h> + +#ifdef __linux__ + #include <sys/epoll.h> + + typedef struct epoll_event queue_event; +#else + #include <sys/types.h> + #include <sys/event.h> + #include <sys/time.h> + + typedef struct kevent queue_event; +#endif + +enum queue_event_type { + QUEUE_EVENT_IN, + QUEUE_EVENT_OUT, +}; + +int queue_create(void); +int queue_add_fd(int, int, enum queue_event_type, int, const void *); +int queue_mod_fd(int, int, enum queue_event_type, const void *); +int queue_rem_fd(int, int); +ssize_t queue_wait(int, queue_event *, size_t); + +void *queue_event_get_data(const queue_event *); + +int queue_event_is_error(const queue_event *e); + +#endif /* QUEUE_H */ diff --git a/server.c b/server.c @@ -0,0 +1,177 @@ +/* See LICENSE file for copyright and license details. */ +#include <errno.h> +#include <pthread.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "connection.h" +#include "queue.h" +#include "server.h" +#include "util.h" + +struct worker_data { + int insock; + size_t nslots; + const struct server *srv; +}; + +static void * +server_worker(void *data) +{ + queue_event *event = NULL; + struct connection *connection, *c, *newc; + struct worker_data *d = (struct worker_data *)data; + int qfd; + ssize_t nready; + size_t i; + + /* allocate connections */ + if (!(connection = calloc(d->nslots, sizeof(*connection)))) { + die("calloc:"); + } + + /* create event queue */ + if ((qfd = queue_create()) < 0) { + exit(1); + } + + /* add insock to the interest list (with data=NULL) */ + if (queue_add_fd(qfd, d->insock, QUEUE_EVENT_IN, 1, NULL) < 0) { + exit(1); + } + + /* allocate event array */ + if (!(event = reallocarray(event, d->nslots, sizeof(*event)))) { + die("reallocarray:"); + } + + for (;;) { + /* wait for new activity */ + if ((nready = queue_wait(qfd, event, d->nslots)) < 0) { + exit(1); + } + + /* handle events */ + for (i = 0; i < (size_t)nready; i++) { + c = queue_event_get_data(&event[i]); + + if (queue_event_is_error(&event[i])) { + if (c != NULL) { + queue_rem_fd(qfd, c->fd); + c->res.status = 0; + connection_log(c); + connection_reset(c); + } + + continue; + } + + if (c == NULL) { + /* add new connection to the interest list */ + if (!(newc = connection_accept(d->insock, + connection, + d->nslots))) { + /* + * the socket is either blocking + * or something failed. + * In both cases, we just carry on + */ + continue; + } + + /* + * add event to the interest list + * (we want IN, because we start + * with receiving the header) + */ + if (queue_add_fd(qfd, newc->fd, + QUEUE_EVENT_IN, + 0, newc) < 0) { + /* not much we can do here */ + continue; + } + } else { + /* serve existing connection */ + connection_serve(c, d->srv); + + if (c->fd == 0) { + /* we are done */ + memset(c, 0, sizeof(struct connection)); + continue; + } + + /* + * rearm the event based on the state + * we are "stuck" at + */ + switch(c->state) { + case C_RECV_HEADER: + if (queue_mod_fd(qfd, c->fd, + QUEUE_EVENT_IN, + c) < 0) { + connection_reset(c); + break; + } + break; + case C_SEND_HEADER: + case C_SEND_BODY: + if (queue_mod_fd(qfd, c->fd, + QUEUE_EVENT_OUT, + c) < 0) { + connection_reset(c); + break; + } + break; + default: + break; + } + } + } + } + + return NULL; +} + +void +server_init_thread_pool(int insock, size_t nthreads, size_t nslots, + const struct server *srv) +{ + pthread_t *thread = NULL; + struct worker_data *d = NULL; + size_t i; + + /* allocate worker_data structs */ + if (!(d = reallocarray(d, nthreads, sizeof(*d)))) { + die("reallocarray:"); + } + for (i = 0; i < nthreads; i++) { + d[i].insock = insock; + d[i].nslots = nslots; + d[i].srv = srv; + } + + /* allocate and initialize thread pool */ + if (!(thread = reallocarray(thread, nthreads, sizeof(*thread)))) { + die("reallocarray:"); + } + for (i = 0; i < nthreads; i++) { + if (pthread_create(&thread[i], NULL, server_worker, &d[i]) != 0) { + if (errno == EAGAIN) { + die("You need to run as root or have " + "CAP_SYS_RESOURCE set, or are trying " + "to create more threads than the " + "system can offer"); + } else { + die("pthread_create:"); + } + } + } + + /* wait for threads */ + for (i = 0; i < nthreads; i++) { + if ((errno = pthread_join(thread[i], NULL))) { + warn("pthread_join:"); + } + } +} diff --git a/server.h b/server.h @@ -0,0 +1,35 @@ +/* See LICENSE file for copyright and license details. */ +#ifndef SERVER_H +#define SERVER_H + +#include <regex.h> +#include <stddef.h> + +struct vhost { + char *chost; + char *regex; + char *dir; + char *prefix; + regex_t re; +}; + +struct map { + char *chost; + char *from; + char *to; +}; + +struct server { + char *host; + char *port; + char *docindex; + int listdirs; + struct vhost *vhost; + size_t vhost_len; + struct map *map; + size_t map_len; +}; + +void server_init_thread_pool(int, size_t, size_t, const struct server *); + +#endif /* SERVER_H */ diff --git a/sock.c b/sock.c @@ -0,0 +1,209 @@ +/* See LICENSE file for copyright and license details. */ +#include <arpa/inet.h> +#include <errno.h> +#include <fcntl.h> +#include <netdb.h> +#include <netinet/in.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/un.h> +#include <unistd.h> + +#include "sock.h" +#include "util.h" + +int +sock_get_ips(const char *host, const char* port) +{ + struct addrinfo hints = { + .ai_flags = AI_NUMERICSERV, + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *ai, *p; + int ret, insock = 0; + + if ((ret = getaddrinfo(host, port, &hints, &ai))) { + die("getaddrinfo: %s", gai_strerror(ret)); + } + + for (p = ai; p; p = p->ai_next) { + if ((insock = socket(p->ai_family, p->ai_socktype, + p->ai_protocol)) < 0) { + continue; + } + if (setsockopt(insock, SOL_SOCKET, SO_REUSEADDR, + &(int){1}, sizeof(int)) < 0) { + die("setsockopt:"); + } + if (bind(insock, p->ai_addr, p->ai_addrlen) < 0) { + /* bind failed, close the insock and retry */ + if (close(insock) < 0) { + die("close:"); + } + continue; + } + break; + } + freeaddrinfo(ai); + if (!p) { + /* we exhaustet the addrinfo-list and found no connection */ + if (errno == EACCES) { + die("You need to run as root or have " + "CAP_NET_BIND_SERVICE set to bind to " + "privileged ports"); + } else { + die("bind:"); + } + } + + if (listen(insock, SOMAXCONN) < 0) { + die("listen:"); + } + + return insock; +} + +int +sock_get_uds(const char *udsname, uid_t uid, gid_t gid) +{ + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + size_t udsnamelen; + int insock, sockmode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | + S_IROTH | S_IWOTH; + + if ((insock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { + die("socket:"); + } + + if ((udsnamelen = strlen(udsname)) > sizeof(addr.sun_path) - 1) { + die("UNIX-domain socket name truncated"); + } + memcpy(addr.sun_path, udsname, udsnamelen + 1); + + if (bind(insock, (const struct sockaddr *)&addr, sizeof(addr)) < 0) { + die("bind '%s':", udsname); + } + + if (listen(insock, SOMAXCONN) < 0) { + sock_rem_uds(udsname); + die("listen:"); + } + + if (chmod(udsname, sockmode) < 0) { + sock_rem_uds(udsname); + die("chmod '%s':", udsname); + } + + if (chown(udsname, uid, gid) < 0) { + sock_rem_uds(udsname); + die("chown '%s':", udsname); + } + + return insock; +} + +void +sock_rem_uds(const char *udsname) +{ + if (unlink(udsname) < 0) { + die("unlink '%s':", udsname); + } +} + +int +sock_set_timeout(int fd, int sec) +{ + struct timeval tv; + + tv.tv_sec = sec; + tv.tv_usec = 0; + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) < 0 || + setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) < 0) { + warn("setsockopt:"); + return 1; + } + + return 0; +} + +int +sock_set_nonblocking(int fd) +{ + int flags; + + if ((flags = fcntl(fd, F_GETFL, 0)) < 0) { + warn("fcntl:"); + return 1; + } + + flags |= O_NONBLOCK; + + if (fcntl(fd, F_SETFL, flags) < 0) { + warn("fcntl:"); + return 1; + } + + return 0; +} + +int +sock_get_inaddr_str(const struct sockaddr_storage *in_sa, char *str, + size_t len) +{ + switch (in_sa->ss_family) { + case AF_INET: + if (!inet_ntop(AF_INET, + &(((struct sockaddr_in *)in_sa)->sin_addr), + str, len)) { + warn("inet_ntop:"); + return 1; + } + break; + case AF_INET6: + if (!inet_ntop(AF_INET6, + &(((struct sockaddr_in6 *)in_sa)->sin6_addr), + str, len)) { + warn("inet_ntop:"); + return 1; + } + break; + case AF_UNIX: + snprintf(str, len, "uds"); + break; + default: + snprintf(str, len, "-"); + } + + return 0; +} + +int +sock_same_addr(const struct sockaddr_storage *sa1, const struct sockaddr_storage *sa2) +{ + /* return early if address-families don't match */ + if (sa1->ss_family != sa2->ss_family) { + return 0; + } + + switch (sa1->ss_family) { + case AF_INET6: + return memcmp(((struct sockaddr_in6 *)sa1)->sin6_addr.s6_addr, + ((struct sockaddr_in6 *)sa2)->sin6_addr.s6_addr, + sizeof(((struct sockaddr_in6 *)sa1)->sin6_addr.s6_addr)) == 0; + case AF_INET: + return ((struct sockaddr_in *)sa1)->sin_addr.s_addr == + ((struct sockaddr_in *)sa2)->sin_addr.s_addr; + default: /* AF_UNIX */ + return strcmp(((struct sockaddr_un *)sa1)->sun_path, + ((struct sockaddr_un *)sa2)->sun_path) == 0; + } +} diff --git a/sock.h b/sock.h @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#ifndef SOCK_H +#define SOCK_H + +#include <stddef.h> +#include <sys/socket.h> +#include <sys/types.h> + +int sock_get_ips(const char *, const char *); +int sock_get_uds(const char *, uid_t, gid_t); +void sock_rem_uds(const char *); +int sock_set_timeout(int, int); +int sock_set_nonblocking(int); +int sock_get_inaddr_str(const struct sockaddr_storage *, char *, size_t); +int sock_same_addr(const struct sockaddr_storage *, + const struct sockaddr_storage *); + +#endif /* SOCK_H */ diff --git a/util.c b/util.c @@ -0,0 +1,281 @@ +/* See LICENSE file for copyright and license details. */ +#include <errno.h> +#include <limits.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <time.h> + +#ifdef __OpenBSD__ +#include <unistd.h> +#endif /* __OpenBSD__ */ + +#include "util.h" + +char *argv0; + +static void +verr(const char *fmt, va_list ap) +{ + if (argv0 && strncmp(fmt, "usage", sizeof("usage") - 1)) { + fprintf(stderr, "%s: ", argv0); + } + + vfprintf(stderr, fmt, ap); + + if (fmt[0] && fmt[strlen(fmt) - 1] == ':') { + fputc(' ', stderr); + perror(NULL); + } else { + fputc('\n', stderr); + } +} + +void +warn(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + verr(fmt, ap); + va_end(ap); +} + +void +die(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + verr(fmt, ap); + va_end(ap); + + exit(1); +} + +void +epledge(const char *promises, const char *execpromises) +{ + (void)promises; + (void)execpromises; + +#ifdef __OpenBSD__ + if (pledge(promises, execpromises) == -1) { + die("pledge:"); + } +#endif /* __OpenBSD__ */ +} + +void +eunveil(const char *path, const char *permissions) +{ + (void)path; + (void)permissions; + +#ifdef __OpenBSD__ + if (unveil(path, permissions) == -1) { + die("unveil:"); + } +#endif /* __OpenBSD__ */ +} + +int +timestamp(char *buf, size_t len, time_t t) +{ + struct tm tm; + + if (gmtime_r(&t, &tm) == NULL || + strftime(buf, len, "%a, %d %b %Y %T GMT", &tm) == 0) { + return 1; + } + + return 0; +} + +int +esnprintf(char *str, size_t size, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = vsnprintf(str, size, fmt, ap); + va_end(ap); + + return (ret < 0 || (size_t)ret >= size); +} + +int +prepend(char *str, size_t size, const char *prefix) +{ + size_t len = strlen(str), prefixlen = strlen(prefix); + + if (len + prefixlen + 1 > size) { + return 1; + } + + memmove(str + prefixlen, str, len + 1); + memcpy(str, prefix, prefixlen); + + return 0; +} + +int +spacetok(const char *s, char **t, size_t tlen) +{ + const char *tok; + size_t i, j, toki, spaces; + + /* fill token-array with NULL-pointers */ + for (i = 0; i < tlen; i++) { + t[i] = NULL; + } + toki = 0; + + /* don't allow NULL string or leading spaces */ + if (!s || *s == ' ') { + return 1; + } +start: + /* skip spaces */ + for (; *s == ' '; s++) + ; + + /* don't allow trailing spaces */ + if (*s == '\0') { + goto err; + } + + /* consume token */ + for (tok = s, spaces = 0; ; s++) { + if (*s == '\\' && *(s + 1) == ' ') { + spaces++; + s++; + continue; + } else if (*s == ' ') { + /* end of token */ + goto token; + } else if (*s == '\0') { + /* end of string */ + goto token; + } + } +token: + if (toki >= tlen) { + goto err; + } + if (!(t[toki] = malloc(s - tok - spaces + 1))) { + die("malloc:"); + } + for (i = 0, j = 0; j < s - tok - spaces + 1; i++, j++) { + if (tok[i] == '\\' && tok[i + 1] == ' ') { + i++; + } + t[toki][j] = tok[i]; + } + t[toki][s - tok - spaces] = '\0'; + toki++; + + if (*s == ' ') { + s++; + goto start; + } + + return 0; +err: + for (i = 0; i < tlen; i++) { + free(t[i]); + t[i] = NULL; + } + + return 1; +} + + + +#define INVALID 1 +#define TOOSMALL 2 +#define TOOLARGE 3 + +long long +strtonum(const char *numstr, long long minval, long long maxval, + const char **errstrp) +{ + long long ll = 0; + int error = 0; + char *ep; + struct errval { + const char *errstr; + int err; + } ev[4] = { + { NULL, 0 }, + { "invalid", EINVAL }, + { "too small", ERANGE }, + { "too large", ERANGE }, + }; + + ev[0].err = errno; + errno = 0; + if (minval > maxval) { + error = INVALID; + } else { + ll = strtoll(numstr, &ep, 10); + if (numstr == ep || *ep != '\0') + error = INVALID; + else if ((ll == LLONG_MIN && errno == ERANGE) || ll < minval) + error = TOOSMALL; + else if ((ll == LLONG_MAX && errno == ERANGE) || ll > maxval) + error = TOOLARGE; + } + if (errstrp != NULL) + *errstrp = ev[error].errstr; + errno = ev[error].err; + if (error) + ll = 0; + + return ll; +} + +/* + * This is sqrt(SIZE_MAX+1), as s1*s2 <= SIZE_MAX + * if both s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW + */ +#define MUL_NO_OVERFLOW ((size_t)1 << (sizeof(size_t) * 4)) + +void * +reallocarray(void *optr, size_t nmemb, size_t size) +{ + if ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && + nmemb > 0 && SIZE_MAX / nmemb < size) { + errno = ENOMEM; + return NULL; + } + return realloc(optr, size * nmemb); +} + +int +buffer_appendf(struct buffer *buf, const char *suffixfmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, suffixfmt); + ret = vsnprintf(buf->data + buf->len, + sizeof(buf->data) - buf->len, suffixfmt, ap); + va_end(ap); + + if (ret < 0 || (size_t)ret >= (sizeof(buf->data) - buf->len)) { + /* truncation occured, discard and error out */ + memset(buf->data + buf->len, 0, + sizeof(buf->data) - buf->len); + return 1; + } + + /* increase buffer length by number of bytes written */ + buf->len += ret; + + return 0; +} diff --git a/util.h b/util.h @@ -0,0 +1,42 @@ +/* See LICENSE file for copyright and license details. */ +#ifndef UTIL_H +#define UTIL_H + +#include <regex.h> +#include <stddef.h> +#include <time.h> + +#include "config.h" + +/* general purpose buffer */ +struct buffer { + char data[BUFFER_SIZE]; + size_t len; +}; + +#undef MIN +#define MIN(x,y) ((x) < (y) ? (x) : (y)) +#undef MAX +#define MAX(x,y) ((x) > (y) ? (x) : (y)) +#undef LEN +#define LEN(x) (sizeof (x) / sizeof *(x)) + +extern char *argv0; + +void warn(const char *, ...); +void die(const char *, ...); + +void epledge(const char *, const char *); +void eunveil(const char *, const char *); + +int timestamp(char *, size_t, time_t); +int esnprintf(char *, size_t, const char *, ...); +int prepend(char *, size_t, const char *); +int spacetok(const char *, char **, size_t); + +void *reallocarray(void *, size_t, size_t); +long long strtonum(const char *, long long, long long, const char **); + +int buffer_appendf(struct buffer *, const char *, ...); + +#endif /* UTIL_H */