sbot

Simple web archiver — self-contained GWTAR archives
git clone git clone https://git.krisyotam.com/krisyotam/sbot.git
Log | Files | Refs | README | LICENSE

util.c (7123B)


      1 /* See LICENSE file for copyright and license details. */
      2 
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 #include <stdarg.h>
      7 #include <ctype.h>
      8 #include <time.h>
      9 
     10 #include "util.h"
     11 
     12 static const char base64_table[] =
     13     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
     14 
     15 void
     16 die(const char *fmt, ...)
     17 {
     18     va_list ap;
     19     va_start(ap, fmt);
     20     vfprintf(stderr, fmt, ap);
     21     va_end(ap);
     22     if (fmt[0] && fmt[strlen(fmt)-1] == ':') {
     23         fputc(' ', stderr);
     24         perror(NULL);
     25     } else {
     26         fputc('\n', stderr);
     27     }
     28     exit(1);
     29 }
     30 
     31 void
     32 warn(const char *fmt, ...)
     33 {
     34     va_list ap;
     35     va_start(ap, fmt);
     36     fprintf(stderr, "warning: ");
     37     vfprintf(stderr, fmt, ap);
     38     va_end(ap);
     39     fputc('\n', stderr);
     40 }
     41 
     42 void *
     43 xmalloc(size_t size)
     44 {
     45     void *p = malloc(size);
     46     if (!p)
     47         die("malloc:");
     48     return p;
     49 }
     50 
     51 void *
     52 xrealloc(void *ptr, size_t size)
     53 {
     54     void *p = realloc(ptr, size);
     55     if (!p)
     56         die("realloc:");
     57     return p;
     58 }
     59 
     60 char *
     61 xstrdup(const char *s)
     62 {
     63     char *p = strdup(s);
     64     if (!p)
     65         die("strdup:");
     66     return p;
     67 }
     68 
     69 char *
     70 base64_encode(const unsigned char *data, size_t input_len, size_t *output_len)
     71 {
     72     size_t olen = 4 * ((input_len + 2) / 3);
     73     char *encoded = xmalloc(olen + 1);
     74 
     75     size_t i, j;
     76     for (i = 0, j = 0; i < input_len;) {
     77         unsigned int a = i < input_len ? data[i++] : 0;
     78         unsigned int b = i < input_len ? data[i++] : 0;
     79         unsigned int c = i < input_len ? data[i++] : 0;
     80         unsigned int triple = (a << 16) | (b << 8) | c;
     81 
     82         encoded[j++] = base64_table[(triple >> 18) & 0x3F];
     83         encoded[j++] = base64_table[(triple >> 12) & 0x3F];
     84         encoded[j++] = base64_table[(triple >> 6) & 0x3F];
     85         encoded[j++] = base64_table[triple & 0x3F];
     86     }
     87 
     88     /* Add padding */
     89     size_t mod = input_len % 3;
     90     if (mod) {
     91         encoded[olen - 1] = '=';
     92         if (mod == 1)
     93             encoded[olen - 2] = '=';
     94     }
     95 
     96     encoded[olen] = '\0';
     97     if (output_len)
     98         *output_len = olen;
     99     return encoded;
    100 }
    101 
    102 int
    103 str_starts_with(const char *str, const char *prefix)
    104 {
    105     return strncmp(str, prefix, strlen(prefix)) == 0;
    106 }
    107 
    108 int
    109 str_ends_with(const char *str, const char *suffix)
    110 {
    111     size_t slen = strlen(str);
    112     size_t suflen = strlen(suffix);
    113     if (suflen > slen)
    114         return 0;
    115     return strcmp(str + slen - suflen, suffix) == 0;
    116 }
    117 
    118 char *
    119 str_tolower(char *str)
    120 {
    121     for (char *p = str; *p; p++)
    122         *p = tolower((unsigned char)*p);
    123     return str;
    124 }
    125 
    126 char *
    127 str_trim(char *str)
    128 {
    129     char *end;
    130     while (isspace((unsigned char)*str))
    131         str++;
    132     if (*str == '\0')
    133         return str;
    134     end = str + strlen(str) - 1;
    135     while (end > str && isspace((unsigned char)*end))
    136         end--;
    137     end[1] = '\0';
    138     return str;
    139 }
    140 
    141 char *
    142 url_get_domain(const char *url)
    143 {
    144     const char *start, *end;
    145     char *domain;
    146 
    147     /* Skip protocol */
    148     if (str_starts_with(url, "https://"))
    149         start = url + 8;
    150     else if (str_starts_with(url, "http://"))
    151         start = url + 7;
    152     else
    153         start = url;
    154 
    155     /* Find end of domain */
    156     end = start;
    157     while (*end && *end != '/' && *end != ':' && *end != '?')
    158         end++;
    159 
    160     size_t len = end - start;
    161     domain = xmalloc(len + 1);
    162     memcpy(domain, start, len);
    163     domain[len] = '\0';
    164 
    165     return domain;
    166 }
    167 
    168 int
    169 url_same_domain(const char *url1, const char *url2)
    170 {
    171     char *d1 = url_get_domain(url1);
    172     char *d2 = url_get_domain(url2);
    173     int same = strcasecmp(d1, d2) == 0;
    174     free(d1);
    175     free(d2);
    176     return same;
    177 }
    178 
    179 char *
    180 url_resolve(const char *base, const char *relative)
    181 {
    182     char *result;
    183 
    184     /* Already absolute */
    185     if (str_starts_with(relative, "http://") ||
    186         str_starts_with(relative, "https://") ||
    187         str_starts_with(relative, "data:")) {
    188         return xstrdup(relative);
    189     }
    190 
    191     /* Protocol-relative */
    192     if (str_starts_with(relative, "//")) {
    193         size_t len = 6 + strlen(relative);
    194         result = xmalloc(len + 1);
    195         snprintf(result, len + 1, "https:%s", relative);
    196         return result;
    197     }
    198 
    199     char *domain = url_get_domain(base);
    200     const char *proto = str_starts_with(base, "https://") ? "https://" : "http://";
    201 
    202     /* Root-relative */
    203     if (relative[0] == '/') {
    204         size_t len = strlen(proto) + strlen(domain) + strlen(relative);
    205         result = xmalloc(len + 1);
    206         snprintf(result, len + 1, "%s%s%s", proto, domain, relative);
    207         free(domain);
    208         return result;
    209     }
    210 
    211     /* Find base path */
    212     const char *path_start;
    213     if (str_starts_with(base, "https://"))
    214         path_start = base + 8;
    215     else if (str_starts_with(base, "http://"))
    216         path_start = base + 7;
    217     else
    218         path_start = base;
    219 
    220     /* Skip domain */
    221     while (*path_start && *path_start != '/')
    222         path_start++;
    223 
    224     /* Find last slash in path */
    225     const char *last_slash = strrchr(path_start, '/');
    226     if (!last_slash)
    227         last_slash = path_start;
    228 
    229     size_t base_len = last_slash - path_start + 1;
    230     size_t len = strlen(proto) + strlen(domain) + base_len + strlen(relative);
    231     result = xmalloc(len + 1);
    232     snprintf(result, len + 1, "%s%s%.*s%s", proto, domain, (int)base_len, path_start, relative);
    233 
    234     free(domain);
    235     return result;
    236 }
    237 
    238 char *
    239 get_mime_type(const char *url)
    240 {
    241     /* Strip query string */
    242     char *copy = xstrdup(url);
    243     char *query = strchr(copy, '?');
    244     if (query)
    245         *query = '\0';
    246 
    247     str_tolower(copy);
    248 
    249     const char *mime = "application/octet-stream";
    250 
    251     if (str_ends_with(copy, ".jpg") || str_ends_with(copy, ".jpeg"))
    252         mime = "image/jpeg";
    253     else if (str_ends_with(copy, ".png"))
    254         mime = "image/png";
    255     else if (str_ends_with(copy, ".gif"))
    256         mime = "image/gif";
    257     else if (str_ends_with(copy, ".webp"))
    258         mime = "image/webp";
    259     else if (str_ends_with(copy, ".svg"))
    260         mime = "image/svg+xml";
    261     else if (str_ends_with(copy, ".ico"))
    262         mime = "image/x-icon";
    263     else if (str_ends_with(copy, ".css"))
    264         mime = "text/css";
    265     else if (str_ends_with(copy, ".js"))
    266         mime = "application/javascript";
    267     else if (str_ends_with(copy, ".woff"))
    268         mime = "font/woff";
    269     else if (str_ends_with(copy, ".woff2"))
    270         mime = "font/woff2";
    271     else if (str_ends_with(copy, ".ttf"))
    272         mime = "font/ttf";
    273     else if (str_ends_with(copy, ".otf"))
    274         mime = "font/otf";
    275     else if (str_ends_with(copy, ".eot"))
    276         mime = "application/vnd.ms-fontobject";
    277 
    278     free(copy);
    279     return xstrdup(mime);
    280 }
    281 
    282 char *
    283 sanitize_filename(const char *url)
    284 {
    285     char *domain = url_get_domain(url);
    286     size_t len = strlen(domain) + 32;
    287     char *filename = xmalloc(len);
    288 
    289     /* Replace dots with underscores */
    290     for (char *p = domain; *p; p++)
    291         if (*p == '.')
    292             *p = '_';
    293 
    294     snprintf(filename, len, "%s", domain);
    295     free(domain);
    296     return filename;
    297 }
    298 
    299 char *
    300 get_iso_date(void)
    301 {
    302     time_t t = time(NULL);
    303     struct tm *tm = gmtime(&t);
    304     char *buf = xmalloc(32);
    305     strftime(buf, 32, "%Y-%m-%dT%H:%M:%SZ", tm);
    306     return buf;
    307 }