suploader

Simple uploader — submit URLs to web archive services
git clone git clone https://git.krisyotam.com/krisyotam/suploader.git
Log | Files | Refs | LICENSE

services.c (5975B)


      1 /* See LICENSE file for copyright and license details. */
      2 
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 #include <unistd.h>
      7 #include <curl/curl.h>
      8 
      9 #include "config.h"
     10 #include "services.h"
     11 #include "util.h"
     12 
     13 static CURL *curl_handle = NULL;
     14 
     15 /* Discard response body */
     16 static size_t
     17 discard_cb(void *contents, size_t size, size_t nmemb, void *userp)
     18 {
     19 	(void)contents;
     20 	(void)userp;
     21 	return size * nmemb;
     22 }
     23 
     24 static int
     25 is_transient(long code)
     26 {
     27 	return code == 429 || code == 500 || code == 502 ||
     28 	       code == 503 || code == 504;
     29 }
     30 
     31 /* Perform a GET request, return HTTP status or -1 on error */
     32 static long
     33 http_get(const char *url)
     34 {
     35 	CURLcode res;
     36 	long status;
     37 	int attempt;
     38 
     39 	for (attempt = 0; attempt < MAX_RETRIES; attempt++) {
     40 		if (attempt > 0) {
     41 			unsigned int delay;
     42 
     43 			delay = RETRY_BASE * (1 << (attempt - 1));
     44 			sleep(delay);
     45 		}
     46 
     47 		curl_easy_reset(curl_handle);
     48 		curl_easy_setopt(curl_handle, CURLOPT_URL, url);
     49 		curl_easy_setopt(curl_handle,
     50 		    CURLOPT_WRITEFUNCTION, discard_cb);
     51 		curl_easy_setopt(curl_handle,
     52 		    CURLOPT_USERAGENT, USER_AGENT);
     53 		curl_easy_setopt(curl_handle,
     54 		    CURLOPT_FOLLOWLOCATION, 1L);
     55 		curl_easy_setopt(curl_handle,
     56 		    CURLOPT_MAXREDIRS, MAX_REDIRECTS);
     57 		curl_easy_setopt(curl_handle,
     58 		    CURLOPT_CONNECTTIMEOUT, CONNECT_TIMEOUT);
     59 		curl_easy_setopt(curl_handle,
     60 		    CURLOPT_TIMEOUT, REQUEST_TIMEOUT);
     61 		curl_easy_setopt(curl_handle,
     62 		    CURLOPT_SSL_VERIFYPEER, 1L);
     63 		curl_easy_setopt(curl_handle,
     64 		    CURLOPT_SSL_VERIFYHOST, 2L);
     65 
     66 		res = curl_easy_perform(curl_handle);
     67 		if (res != CURLE_OK)
     68 			continue;
     69 
     70 		curl_easy_getinfo(curl_handle,
     71 		    CURLINFO_RESPONSE_CODE, &status);
     72 
     73 		if (is_transient(status))
     74 			continue;
     75 
     76 		return status;
     77 	}
     78 	return -1;
     79 }
     80 
     81 /* Perform a POST request, return HTTP status or -1 on error */
     82 static long
     83 http_post(const char *url, const char *postdata)
     84 {
     85 	CURLcode res;
     86 	long status;
     87 	int attempt;
     88 
     89 	for (attempt = 0; attempt < MAX_RETRIES; attempt++) {
     90 		if (attempt > 0) {
     91 			unsigned int delay;
     92 
     93 			delay = RETRY_BASE * (1 << (attempt - 1));
     94 			sleep(delay);
     95 		}
     96 
     97 		curl_easy_reset(curl_handle);
     98 		curl_easy_setopt(curl_handle, CURLOPT_URL, url);
     99 		curl_easy_setopt(curl_handle,
    100 		    CURLOPT_POSTFIELDS, postdata);
    101 		curl_easy_setopt(curl_handle,
    102 		    CURLOPT_WRITEFUNCTION, discard_cb);
    103 		curl_easy_setopt(curl_handle,
    104 		    CURLOPT_USERAGENT, USER_AGENT);
    105 		curl_easy_setopt(curl_handle,
    106 		    CURLOPT_FOLLOWLOCATION, 1L);
    107 		curl_easy_setopt(curl_handle,
    108 		    CURLOPT_MAXREDIRS, MAX_REDIRECTS);
    109 		curl_easy_setopt(curl_handle,
    110 		    CURLOPT_CONNECTTIMEOUT, CONNECT_TIMEOUT);
    111 		curl_easy_setopt(curl_handle,
    112 		    CURLOPT_TIMEOUT, REQUEST_TIMEOUT);
    113 		curl_easy_setopt(curl_handle,
    114 		    CURLOPT_SSL_VERIFYPEER, 1L);
    115 		curl_easy_setopt(curl_handle,
    116 		    CURLOPT_SSL_VERIFYHOST, 2L);
    117 
    118 		res = curl_easy_perform(curl_handle);
    119 		if (res != CURLE_OK)
    120 			continue;
    121 
    122 		curl_easy_getinfo(curl_handle,
    123 		    CURLINFO_RESPONSE_CODE, &status);
    124 
    125 		if (is_transient(status))
    126 			continue;
    127 
    128 		return status;
    129 	}
    130 	return -1;
    131 }
    132 
    133 void
    134 svc_init(void)
    135 {
    136 	curl_global_init(CURL_GLOBAL_ALL);
    137 	curl_handle = curl_easy_init();
    138 	if (!curl_handle)
    139 		die("curl_easy_init failed");
    140 }
    141 
    142 void
    143 svc_cleanup(void)
    144 {
    145 	if (curl_handle) {
    146 		curl_easy_cleanup(curl_handle);
    147 		curl_handle = NULL;
    148 	}
    149 	curl_global_cleanup();
    150 }
    151 
    152 int
    153 svc_submit_ia(const char *url, int verbose)
    154 {
    155 	char *save_url;
    156 	size_t len;
    157 	long status;
    158 
    159 	len = strlen("https://web.archive.org/save/") +
    160 	      strlen(url) + 1;
    161 	save_url = xmalloc(len);
    162 	snprintf(save_url, len,
    163 	    "https://web.archive.org/save/%s", url);
    164 
    165 	if (verbose)
    166 		fprintf(stderr, "  IA: %s\n", save_url);
    167 
    168 	status = http_get(save_url);
    169 	free(save_url);
    170 
    171 	if (status >= 200 && status < 400) {
    172 		if (verbose)
    173 			fprintf(stderr, "  IA: OK (%ld)\n", status);
    174 		return 0;
    175 	}
    176 
    177 	warn("IA: HTTP %ld for %s", status, url);
    178 	return -1;
    179 }
    180 
    181 int
    182 svc_submit_archiveph(const char *url, int verbose)
    183 {
    184 	char *postdata, *escaped;
    185 	size_t len;
    186 	long status;
    187 
    188 	escaped = curl_easy_escape(curl_handle, url, 0);
    189 	if (!escaped) {
    190 		warn("archive.ph: URL escape failed for %s", url);
    191 		return -1;
    192 	}
    193 
    194 	len = strlen("url=") + strlen(escaped) +
    195 	      strlen("&submit=") + 1;
    196 	postdata = xmalloc(len);
    197 	snprintf(postdata, len, "url=%s&submit=", escaped);
    198 	curl_free(escaped);
    199 
    200 	if (verbose)
    201 		fprintf(stderr, "  archive.ph: submitting %s\n",
    202 		    url);
    203 
    204 	status = http_post("https://archive.ph/submit/", postdata);
    205 	free(postdata);
    206 
    207 	if (status >= 200 && status < 400) {
    208 		if (verbose)
    209 			fprintf(stderr,
    210 			    "  archive.ph: OK (%ld)\n", status);
    211 		return 0;
    212 	}
    213 
    214 	/* archive.ph returns 3xx redirect on success */
    215 	if (status >= 300 && status < 400) {
    216 		if (verbose)
    217 			fprintf(stderr,
    218 			    "  archive.ph: OK (redirect)\n");
    219 		return 0;
    220 	}
    221 
    222 	warn("archive.ph: HTTP %ld for %s", status, url);
    223 	return -1;
    224 }
    225 
    226 int
    227 svc_submit_wikiwix(const char *url, int verbose)
    228 {
    229 	char *cache_url;
    230 	size_t len;
    231 	long status;
    232 
    233 	len = strlen("https://archive.wikiwix.com/cache/?url=") +
    234 	      strlen(url) + 1;
    235 	cache_url = xmalloc(len);
    236 	snprintf(cache_url, len,
    237 	    "https://archive.wikiwix.com/cache/?url=%s", url);
    238 
    239 	if (verbose)
    240 		fprintf(stderr, "  Wikiwix: %s\n", cache_url);
    241 
    242 	status = http_get(cache_url);
    243 	free(cache_url);
    244 
    245 	if (status >= 200 && status < 400) {
    246 		if (verbose)
    247 			fprintf(stderr,
    248 			    "  Wikiwix: OK (%ld)\n", status);
    249 		return 0;
    250 	}
    251 
    252 	warn("Wikiwix: HTTP %ld for %s", status, url);
    253 	return -1;
    254 }
    255 
    256 int
    257 svc_submit(const char *url, int services, int verbose)
    258 {
    259 	int ok;
    260 
    261 	ok = 0;
    262 
    263 	/* Skip .onion URLs - archive services can't reach them */
    264 	if (strstr(url, ".onion/") || strstr(url, ".onion:")) {
    265 		if (verbose)
    266 			fprintf(stderr,
    267 			    "  skip: .onion URL\n");
    268 		return 0;
    269 	}
    270 
    271 	if (services & SVC_IA) {
    272 		if (svc_submit_ia(url, verbose) == 0)
    273 			ok++;
    274 	}
    275 	if (services & SVC_WIKIWIX) {
    276 		if (svc_submit_wikiwix(url, verbose) == 0)
    277 			ok++;
    278 	}
    279 	if (services & SVC_ARCHIVEPH) {
    280 		if (svc_submit_archiveph(url, verbose) == 0)
    281 			ok++;
    282 	}
    283 
    284 	return ok;
    285 }