commit f042087d967b02645376087c5eaa0b4259f93cb6
Author: Kris Yotam <krisyotam@protonmail.com>
Date: Mon, 16 Feb 2026 02:43:07 -0600
Initial commit: sparser - Simple Parser
Suckless C tool that extracts external URLs from text files.
Supports HTML, Markdown, MDX, plain text. Can recursively walk
directories and deduplicate output. Zero external dependencies.
Diffstat:
| A | .claude/CLAUDE.md | | | 146 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | .gitignore | | | 2 | ++ |
| A | LICENSE | | | 21 | +++++++++++++++++++++ |
| A | Makefile | | | 41 | +++++++++++++++++++++++++++++++++++++++++ |
| A | config.h | | | 21 | +++++++++++++++++++++ |
| A | extract.c | | | 212 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | extract.h | | | 22 | ++++++++++++++++++++++ |
| A | sparser.c | | | 350 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | util.c | | | 101 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | util.h | | | 22 | ++++++++++++++++++++++ |
10 files changed, 938 insertions(+), 0 deletions(-)
diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md
@@ -0,0 +1,146 @@
+# sparser — CLAUDE.md
+
+## Project
+
+sparser (Simple Parser) is a suckless tool that extracts external URLs
+from text-based files. It handles HTML, Markdown (MD/MDX), plain text,
+and other text files. It can process a single file, read from stdin, or
+recursively walk a directory tree. Outputs one URL per line to stdout.
+
+Designed to pair with suploader for a pipeline:
+ sparser -R /content | suploader -
+
+## Coding Standards — Suckless C Style
+
+All code in this project MUST follow the suckless.org coding style:
+
+### Language
+- C99 (ISO/IEC 9899:1999), no extensions
+- POSIX.1-2008 (`_POSIX_C_SOURCE 200809L`)
+
+### Indentation & Whitespace
+- Tabs for indentation (1 tab = 1 level)
+- Spaces for alignment only, never for indentation
+- No tabs except at the beginning of a line
+- Maximum line length: 79 characters
+
+### Comments
+- Use `/* */` only, never `//`
+- Comment fallthrough cases in switch statements
+
+### Variables
+- All declarations at the top of the block
+- Pointer `*` adjacent to variable name: `char *p`, not `char* p`
+- No C99 `bool`; use `int` (0/1)
+- Global/static variables not used outside TU must be `static`
+
+### Functions
+- Return type on its own line
+- Function name at column 0 on next line (enables `grep ^funcname`)
+- Opening `{` on its own line for functions
+- Functions not used outside their file: `static`
+
+```c
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: sparser [-v] [-R] path\n");
+ exit(1);
+}
+```
+
+### Braces
+- Opening `{` on same line for control flow (if, for, while, switch)
+- Closing `}` on its own line unless continuing (else, do-while)
+- Use braces even for single statements when sibling branches use them
+
+### Naming
+- lowercase_with_underscores for functions and variables
+- UPPERCASE for macros and constants
+- CamelCase for typedef'd struct types
+- No `_t` suffix (reserved by POSIX)
+- Prefix module functions with module name
+
+### Control Flow
+- Space after `if`, `for`, `while`, `switch`
+- No space after `(` or before `)`
+- Use `goto` for cleanup/unwind, not nested ifs
+- Return/exit early on failure
+- Test against 0, not -1: `if (func() < 0)`
+
+### Error Handling
+- All allocation checked; goto cleanup on failure
+- `die()` for fatal errors (prints message, exits)
+- `warn()` for recoverable errors (prints, continues)
+
+### File Organization Order
+1. License header
+2. System includes (alphabetical)
+3. Local includes
+4. Macros
+5. Type definitions
+6. Function declarations
+7. Global variables
+8. Function definitions (same order as declarations)
+
+### Headers
+- System headers first, alphabetical
+- Local headers after blank line
+- No cyclic dependencies
+- Include only what is needed
+
+## Architecture
+
+### Module Layout
+
+| Module | Prefix | File | Responsibility |
+|--------|--------|------|----------------|
+| Main | — | sparser.c | Entry point, directory walking, file dispatch |
+| Extract | `extract_` | extract.c | URL extraction from text content |
+| Utilities | `die`, `warn`, `x*` | util.c | Memory wrappers, string ops, error handling |
+| Config | — | config.h | Compile-time constants |
+
+### Architecture Rules
+- **Separate compilation.** Every .c file compiles independently.
+- **No dynamic loading.** All features compiled in.
+- **No external dependencies.** Pure C99 + POSIX.
+- **Line-oriented output.** One URL per line to stdout.
+- **Unix pipeline friendly.** Works with pipes, xargs, etc.
+
+## Build
+
+```sh
+make # build sparser binary
+make clean # remove build artifacts
+make install # install to /usr/local/bin
+```
+
+Dependencies: none (pure C99 + POSIX)
+
+## Usage
+
+```sh
+# Extract URLs from a single file
+sparser page.html
+
+# Recursive directory scan
+sparser -R /content
+
+# Read from stdin
+cat file.md | sparser -
+
+# Verbose (show file names being processed)
+sparser -v -R /content
+
+# Deduplicate output
+sparser -u -R /content
+
+# Pipeline with suploader
+sparser -u -R /content | suploader -
+```
+
+## Git Conventions
+
+- No `Co-Authored-By: Claude` lines
+- Commit messages: imperative, <72 chars, no period
+- One logical change per commit
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+sparser
+*.o
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT/X Consortium License
+
+(c) 2026 Kris Yotam <krisyotam@proton.me>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,41 @@
+# sparser - Simple Parser
+# See LICENSE file for copyright and license details.
+
+VERSION = 0.1.0
+
+# paths
+PREFIX = /usr/local
+MANPREFIX = $(PREFIX)/share/man
+
+# flags
+CPPFLAGS = -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_POSIX_C_SOURCE=200809L -DVERSION=\"$(VERSION)\"
+CFLAGS = -std=c99 -pedantic -Wall -Wextra -Os $(CPPFLAGS)
+LDFLAGS =
+
+# compiler
+CC = cc
+
+# sources
+SRC = sparser.c extract.c util.c
+OBJ = $(SRC:.c=.o)
+
+all: sparser
+
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+
+sparser: $(OBJ)
+ $(CC) -o $@ $(OBJ) $(LDFLAGS)
+
+clean:
+ rm -f sparser $(OBJ)
+
+install: all
+ mkdir -p $(DESTDIR)$(PREFIX)/bin
+ cp -f sparser $(DESTDIR)$(PREFIX)/bin
+ chmod 755 $(DESTDIR)$(PREFIX)/bin/sparser
+
+uninstall:
+ rm -f $(DESTDIR)$(PREFIX)/bin/sparser
+
+.PHONY: all clean install uninstall
diff --git a/config.h b/config.h
@@ -0,0 +1,21 @@
+/* See LICENSE file for copyright and license details.
+ * sparser - Simple Parser
+ * configuration header
+ */
+
+#ifndef CONFIG_H
+#define CONFIG_H
+
+/* Program metadata */
+#define PROG_NAME "sparser"
+#define PROG_VERSION "0.1.0"
+
+/* File processing limits */
+#define MAX_FILE_SIZE (100 * 1024 * 1024) /* 100 MB max file */
+#define MAX_LINE_LEN 8192
+#define MAX_URL_LEN 4096
+
+/* Supported text file extensions (checked during -R recursion) */
+/* Binary files and executables are always skipped */
+
+#endif /* CONFIG_H */
diff --git a/extract.c b/extract.c
@@ -0,0 +1,212 @@
+/* See LICENSE file for copyright and license details.
+ *
+ * URL extraction from text content.
+ *
+ * Strategy: scan for "http://" and "https://" anchors,
+ * then greedily extend the match character by character
+ * until hitting a character that cannot be part of a URL.
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "config.h"
+#include "extract.h"
+#include "util.h"
+
+/*
+ * Characters that are valid in a URL.
+ * RFC 3986: unreserved / pct-encoded / sub-delims / ":" / "@"
+ * / "/" / "?" / "#" / "[" / "]"
+ *
+ * We exclude common trailing punctuation that typically isn't
+ * part of the URL (periods, commas, parens when unbalanced,
+ * angle brackets, quotes).
+ */
+static int
+is_url_char(unsigned char c)
+{
+ if (isalnum(c))
+ return 1;
+
+ switch (c) {
+ case '-': case '.': case '_': case '~': /* unreserved */
+ case ':': case '/': case '?': case '#': /* gen-delims */
+ case '[': case ']': case '@':
+ case '!': case '$': case '&': case '\'': /* sub-delims */
+ case '(': case ')': case '*': case '+':
+ case ',': case ';': case '=':
+ case '%': /* pct-encoded */
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * Strip trailing punctuation that is commonly not part of URLs
+ * when they appear in prose text. E.g.:
+ * "Visit https://example.com." -> strip trailing "."
+ * "(see https://example.com)" -> strip trailing ")"
+ * "https://example.com," -> strip trailing ","
+ */
+static size_t
+strip_trailing(const char *url, size_t len)
+{
+ int parens;
+ size_t i;
+
+ while (len > 0) {
+ unsigned char c = url[len - 1];
+
+ /* Always strip trailing periods, commas, semicolons,
+ * colons, exclamation marks */
+ if (c == '.' || c == ',' || c == ';' ||
+ c == ':' || c == '!' || c == '\'') {
+ len--;
+ continue;
+ }
+
+ /* Strip trailing ) only if unbalanced */
+ if (c == ')') {
+ parens = 0;
+ for (i = 0; i < len; i++) {
+ if (url[i] == '(')
+ parens++;
+ else if (url[i] == ')')
+ parens--;
+ }
+ if (parens < 0) {
+ len--;
+ continue;
+ }
+ }
+
+ /* Strip trailing ] only if unbalanced */
+ if (c == ']') {
+ parens = 0;
+ for (i = 0; i < len; i++) {
+ if (url[i] == '[')
+ parens++;
+ else if (url[i] == ']')
+ parens--;
+ }
+ if (parens < 0) {
+ len--;
+ continue;
+ }
+ }
+
+ /* Strip trailing > (common in angle-bracket URLs) */
+ if (c == '>') {
+ len--;
+ continue;
+ }
+
+ break;
+ }
+
+ return len;
+}
+
+/*
+ * Extract a single URL starting at the given position.
+ * Returns the length of the URL, or 0 if invalid.
+ */
+static size_t
+extract_one(const char *data, size_t pos, size_t total_len)
+{
+ size_t start, len;
+
+ start = pos;
+ len = 0;
+
+ /* Must start with http:// or https:// */
+ if (total_len - pos >= 8 &&
+ strncmp(data + pos, "https://", 8) == 0) {
+ len = 8;
+ } else if (total_len - pos >= 7 &&
+ strncmp(data + pos, "http://", 7) == 0) {
+ len = 7;
+ } else {
+ return 0;
+ }
+
+ /* Greedily extend while characters are valid URL chars */
+ while (start + len < total_len &&
+ is_url_char((unsigned char)data[start + len])) {
+ len++;
+ if (len >= MAX_URL_LEN)
+ break;
+ }
+
+ /* Must have something after the protocol */
+ if ((data[start + 4] == 's' && len <= 8) || len <= 7)
+ return 0;
+
+ /* Strip trailing punctuation */
+ len = strip_trailing(data + start, len);
+
+ return len;
+}
+
+void
+extract_urls(const char *data, size_t len,
+ UrlCallback cb, void *ctx)
+{
+ size_t pos, url_len;
+ char *url;
+
+ pos = 0;
+ while (pos < len) {
+ /* Scan for http:// or https:// */
+ if (data[pos] != 'h') {
+ pos++;
+ continue;
+ }
+
+ if (pos + 7 > len) {
+ pos++;
+ continue;
+ }
+
+ if (strncmp(data + pos, "http://", 7) != 0 &&
+ strncmp(data + pos, "https://", 8) != 0) {
+ pos++;
+ continue;
+ }
+
+ url_len = extract_one(data, pos, len);
+ if (url_len == 0) {
+ pos++;
+ continue;
+ }
+
+ /* Copy URL and deliver via callback */
+ url = xmalloc(url_len + 1);
+ memcpy(url, data + pos, url_len);
+ url[url_len] = '\0';
+
+ cb(url, ctx);
+ free(url);
+
+ pos += url_len;
+ }
+}
+
+int
+is_binary(const char *data, size_t len)
+{
+ size_t i, check_len;
+
+ /* Check first 8KB for null bytes */
+ check_len = len < 8192 ? len : 8192;
+ for (i = 0; i < check_len; i++) {
+ if (data[i] == '\0')
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/extract.h b/extract.h
@@ -0,0 +1,22 @@
+/* See LICENSE file for copyright and license details. */
+
+#ifndef EXTRACT_H
+#define EXTRACT_H
+
+#include <stddef.h>
+
+/* Callback invoked for each extracted URL.
+ * url: the extracted URL string
+ * ctx: user context pointer */
+typedef void (*UrlCallback)(const char *url, void *ctx);
+
+/* Extract all http/https URLs from a buffer.
+ * Calls cb for each URL found.
+ * Handles: plain text, HTML, Markdown, MDX */
+void extract_urls(const char *data, size_t len,
+ UrlCallback cb, void *ctx);
+
+/* Check if a file appears to be binary (contains null bytes) */
+int is_binary(const char *data, size_t len);
+
+#endif /* EXTRACT_H */
diff --git a/sparser.c b/sparser.c
@@ -0,0 +1,350 @@
+/* See LICENSE file for copyright and license details.
+ *
+ * sparser - Simple Parser
+ *
+ * Extracts external URLs from text files.
+ * Supports HTML, Markdown, MDX, plain text.
+ * Can recursively walk directories.
+ */
+
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "extract.h"
+#include "util.h"
+
+/* Hash table for URL deduplication */
+#define DEDUP_SIZE 65521
+
+typedef struct DeNode {
+ char *url;
+ struct DeNode *next;
+} DeNode;
+
+/* Global options */
+static int verbose = 0;
+static int recurse = 0;
+static int dedup = 0;
+static DeNode *dedup_table[DEDUP_SIZE];
+
+static void
+usage(void)
+{
+ fprintf(stderr,
+ "usage: sparser [-vuR] [path | -]\n"
+ "\n"
+ " -v verbose (print filenames to stderr)\n"
+ " -u deduplicate URLs\n"
+ " -R recursive directory scan\n"
+ "\n"
+ " path file or directory to scan\n"
+ " - read from stdin\n");
+ exit(1);
+}
+
+/* FNV-1a hash */
+static unsigned long
+fnv1a(const char *s)
+{
+ unsigned long h = 2166136261UL;
+
+ for (; *s; s++) {
+ h ^= (unsigned char)*s;
+ h *= 16777619UL;
+ }
+ return h;
+}
+
+static int
+dedup_seen(const char *url)
+{
+ unsigned long h;
+ DeNode *n;
+
+ h = fnv1a(url) % DEDUP_SIZE;
+ for (n = dedup_table[h]; n; n = n->next) {
+ if (strcmp(n->url, url) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+static void
+dedup_add(const char *url)
+{
+ unsigned long h;
+ DeNode *n;
+
+ h = fnv1a(url) % DEDUP_SIZE;
+ n = xmalloc(sizeof(DeNode));
+ n->url = xstrdup(url);
+ n->next = dedup_table[h];
+ dedup_table[h] = n;
+}
+
+static void
+dedup_free(void)
+{
+ size_t i;
+ DeNode *n, *next;
+
+ for (i = 0; i < DEDUP_SIZE; i++) {
+ for (n = dedup_table[i]; n; n = next) {
+ next = n->next;
+ free(n->url);
+ free(n);
+ }
+ }
+}
+
+/* Callback for each extracted URL */
+static void
+url_found(const char *url, void *ctx)
+{
+ (void)ctx;
+
+ if (dedup) {
+ if (dedup_seen(url))
+ return;
+ dedup_add(url);
+ }
+
+ puts(url);
+}
+
+/* Check if a filename has a text-like extension */
+static int
+is_text_ext(const char *name)
+{
+ /* Common text extensions we want to process */
+ static const char *exts[] = {
+ ".html", ".htm", ".xhtml",
+ ".md", ".mdx", ".markdown",
+ ".txt", ".text", ".rst",
+ ".xml", ".rss", ".atom",
+ ".json", ".yaml", ".yml",
+ ".css", ".js", ".jsx", ".ts", ".tsx",
+ ".org", ".adoc", ".tex", ".bib",
+ ".csv", ".tsv",
+ ".cfg", ".conf", ".ini",
+ ".sh", ".bash", ".zsh", ".fish",
+ ".py", ".rb", ".pl", ".c", ".h",
+ ".go", ".rs", ".java", ".hs",
+ NULL
+ };
+ int i;
+
+ for (i = 0; exts[i]; i++) {
+ if (str_ends_with(name, exts[i]))
+ return 1;
+ }
+
+ /* Files without extension (README, LICENSE, etc.) */
+ if (!strchr(name, '.'))
+ return 1;
+
+ return 0;
+}
+
+/* Read entire file into memory. Returns NULL on error. */
+static char *
+read_file(const char *path, size_t *out_len)
+{
+ FILE *fp;
+ char *data;
+ long fsize;
+
+ if (strcmp(path, "-") == 0) {
+ /* Read stdin into buffer */
+ size_t cap, len, n;
+
+ cap = 4096;
+ len = 0;
+ data = xmalloc(cap);
+
+ while ((n = fread(data + len, 1, cap - len,
+ stdin)) > 0) {
+ len += n;
+ if (len >= cap) {
+ cap *= 2;
+ if (cap > MAX_FILE_SIZE)
+ break;
+ data = xrealloc(data, cap);
+ }
+ }
+
+ data[len] = '\0';
+ *out_len = len;
+ return data;
+ }
+
+ fp = fopen(path, "rb");
+ if (!fp)
+ return NULL;
+
+ if (fseek(fp, 0, SEEK_END) != 0) {
+ fclose(fp);
+ return NULL;
+ }
+
+ fsize = ftell(fp);
+ if (fsize < 0 || fsize > MAX_FILE_SIZE) {
+ fclose(fp);
+ return NULL;
+ }
+
+ rewind(fp);
+
+ data = xmalloc(fsize + 1);
+ if (fread(data, 1, fsize, fp) != (size_t)fsize) {
+ free(data);
+ fclose(fp);
+ return NULL;
+ }
+
+ data[fsize] = '\0';
+ fclose(fp);
+
+ *out_len = fsize;
+ return data;
+}
+
+/* Process a single file */
+static void
+process_file(const char *path)
+{
+ char *data;
+ size_t len;
+
+ if (verbose)
+ fprintf(stderr, "%s\n", path);
+
+ data = read_file(path, &len);
+ if (!data) {
+ if (verbose)
+ warn("cannot read: %s", path);
+ return;
+ }
+
+ if (len == 0) {
+ free(data);
+ return;
+ }
+
+ /* Skip binary files */
+ if (is_binary(data, len)) {
+ if (verbose)
+ fprintf(stderr, " skip binary: %s\n", path);
+ free(data);
+ return;
+ }
+
+ extract_urls(data, len, url_found, NULL);
+ free(data);
+}
+
+/* Recursively walk a directory */
+static void
+walk_dir(const char *dirpath)
+{
+ DIR *d;
+ struct dirent *ent;
+ struct stat st;
+ char path[4096];
+
+ d = opendir(dirpath);
+ if (!d) {
+ warn("cannot open directory: %s", dirpath);
+ return;
+ }
+
+ while ((ent = readdir(d)) != NULL) {
+ /* Skip hidden files and . / .. */
+ if (ent->d_name[0] == '.')
+ continue;
+
+ /* Skip common non-content directories */
+ if (strcmp(ent->d_name, "node_modules") == 0 ||
+ strcmp(ent->d_name, ".git") == 0 ||
+ strcmp(ent->d_name, "__pycache__") == 0 ||
+ strcmp(ent->d_name, "vendor") == 0 ||
+ strcmp(ent->d_name, ".next") == 0 ||
+ strcmp(ent->d_name, "dist") == 0 ||
+ strcmp(ent->d_name, "build") == 0)
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s",
+ dirpath, ent->d_name);
+
+ if (stat(path, &st) != 0)
+ continue;
+
+ if (S_ISDIR(st.st_mode)) {
+ walk_dir(path);
+ } else if (S_ISREG(st.st_mode)) {
+ if (is_text_ext(ent->d_name))
+ process_file(path);
+ }
+ }
+
+ closedir(d);
+}
+
+int
+main(int argc, char *argv[])
+{
+ const char *path;
+ struct stat st;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "vuRh")) != -1) {
+ switch (opt) {
+ case 'v':
+ verbose = 1;
+ break;
+ case 'u':
+ dedup = 1;
+ break;
+ case 'R':
+ recurse = 1;
+ break;
+ case 'h': /* fallthrough */
+ default:
+ usage();
+ }
+ }
+
+ if (optind >= argc)
+ usage();
+
+ path = argv[optind];
+
+ /* Reading from stdin */
+ if (strcmp(path, "-") == 0) {
+ process_file("-");
+ goto done;
+ }
+
+ if (stat(path, &st) != 0)
+ die("cannot stat: %s:", path);
+
+ if (S_ISDIR(st.st_mode)) {
+ if (!recurse)
+ die("use -R to scan directories");
+ walk_dir(path);
+ } else if (S_ISREG(st.st_mode)) {
+ process_file(path);
+ } else {
+ die("not a regular file or directory: %s", path);
+ }
+
+done:
+ if (dedup)
+ dedup_free();
+ return 0;
+}
diff --git a/util.c b/util.c
@@ -0,0 +1,101 @@
+/* See LICENSE file for copyright and license details. */
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "util.h"
+
+void
+die(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ if (fmt[0] && fmt[strlen(fmt) - 1] == ':') {
+ fputc(' ', stderr);
+ perror(NULL);
+ } else {
+ fputc('\n', stderr);
+ }
+ exit(1);
+}
+
+void
+warn(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "warning: ");
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+}
+
+void *
+xmalloc(size_t size)
+{
+ void *p = malloc(size);
+
+ if (!p)
+ die("malloc:");
+ return p;
+}
+
+void *
+xrealloc(void *ptr, size_t size)
+{
+ void *p = realloc(ptr, size);
+
+ if (!p)
+ die("realloc:");
+ return p;
+}
+
+char *
+xstrdup(const char *s)
+{
+ char *p = strdup(s);
+
+ if (!p)
+ die("strdup:");
+ return p;
+}
+
+char *
+str_trim(char *str)
+{
+ char *end;
+
+ while (isspace((unsigned char)*str))
+ str++;
+ if (*str == '\0')
+ return str;
+ end = str + strlen(str) - 1;
+ while (end > str && isspace((unsigned char)*end))
+ end--;
+ end[1] = '\0';
+ return str;
+}
+
+int
+str_starts_with(const char *str, const char *prefix)
+{
+ return strncmp(str, prefix, strlen(prefix)) == 0;
+}
+
+int
+str_ends_with(const char *str, const char *suffix)
+{
+ size_t slen = strlen(str);
+ size_t suflen = strlen(suffix);
+
+ if (suflen > slen)
+ return 0;
+ return strcmp(str + slen - suflen, suffix) == 0;
+}
diff --git a/util.h b/util.h
@@ -0,0 +1,22 @@
+/* See LICENSE file for copyright and license details. */
+
+#ifndef UTIL_H
+#define UTIL_H
+
+#include <stddef.h>
+
+/* Memory allocation with error handling */
+void *xmalloc(size_t size);
+void *xrealloc(void *ptr, size_t size);
+char *xstrdup(const char *s);
+
+/* String utilities */
+char *str_trim(char *str);
+int str_starts_with(const char *str, const char *prefix);
+int str_ends_with(const char *str, const char *suffix);
+
+/* Error handling */
+void die(const char *fmt, ...);
+void warn(const char *fmt, ...);
+
+#endif /* UTIL_H */