services.c (5975B)
1 /* See LICENSE file for copyright and license details. */ 2 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 #include <unistd.h> 7 #include <curl/curl.h> 8 9 #include "config.h" 10 #include "services.h" 11 #include "util.h" 12 13 static CURL *curl_handle = NULL; 14 15 /* Discard response body */ 16 static size_t 17 discard_cb(void *contents, size_t size, size_t nmemb, void *userp) 18 { 19 (void)contents; 20 (void)userp; 21 return size * nmemb; 22 } 23 24 static int 25 is_transient(long code) 26 { 27 return code == 429 || code == 500 || code == 502 || 28 code == 503 || code == 504; 29 } 30 31 /* Perform a GET request, return HTTP status or -1 on error */ 32 static long 33 http_get(const char *url) 34 { 35 CURLcode res; 36 long status; 37 int attempt; 38 39 for (attempt = 0; attempt < MAX_RETRIES; attempt++) { 40 if (attempt > 0) { 41 unsigned int delay; 42 43 delay = RETRY_BASE * (1 << (attempt - 1)); 44 sleep(delay); 45 } 46 47 curl_easy_reset(curl_handle); 48 curl_easy_setopt(curl_handle, CURLOPT_URL, url); 49 curl_easy_setopt(curl_handle, 50 CURLOPT_WRITEFUNCTION, discard_cb); 51 curl_easy_setopt(curl_handle, 52 CURLOPT_USERAGENT, USER_AGENT); 53 curl_easy_setopt(curl_handle, 54 CURLOPT_FOLLOWLOCATION, 1L); 55 curl_easy_setopt(curl_handle, 56 CURLOPT_MAXREDIRS, MAX_REDIRECTS); 57 curl_easy_setopt(curl_handle, 58 CURLOPT_CONNECTTIMEOUT, CONNECT_TIMEOUT); 59 curl_easy_setopt(curl_handle, 60 CURLOPT_TIMEOUT, REQUEST_TIMEOUT); 61 curl_easy_setopt(curl_handle, 62 CURLOPT_SSL_VERIFYPEER, 1L); 63 curl_easy_setopt(curl_handle, 64 CURLOPT_SSL_VERIFYHOST, 2L); 65 66 res = curl_easy_perform(curl_handle); 67 if (res != CURLE_OK) 68 continue; 69 70 curl_easy_getinfo(curl_handle, 71 CURLINFO_RESPONSE_CODE, &status); 72 73 if (is_transient(status)) 74 continue; 75 76 return status; 77 } 78 return -1; 79 } 80 81 /* Perform a POST request, return HTTP status or -1 on error */ 82 static long 83 http_post(const char *url, const char *postdata) 84 { 85 CURLcode res; 86 long status; 87 int attempt; 88 89 for (attempt = 0; attempt < MAX_RETRIES; attempt++) { 90 if (attempt > 0) { 91 unsigned int delay; 92 93 delay = RETRY_BASE * (1 << (attempt - 1)); 94 sleep(delay); 95 } 96 97 curl_easy_reset(curl_handle); 98 curl_easy_setopt(curl_handle, CURLOPT_URL, url); 99 curl_easy_setopt(curl_handle, 100 CURLOPT_POSTFIELDS, postdata); 101 curl_easy_setopt(curl_handle, 102 CURLOPT_WRITEFUNCTION, discard_cb); 103 curl_easy_setopt(curl_handle, 104 CURLOPT_USERAGENT, USER_AGENT); 105 curl_easy_setopt(curl_handle, 106 CURLOPT_FOLLOWLOCATION, 1L); 107 curl_easy_setopt(curl_handle, 108 CURLOPT_MAXREDIRS, MAX_REDIRECTS); 109 curl_easy_setopt(curl_handle, 110 CURLOPT_CONNECTTIMEOUT, CONNECT_TIMEOUT); 111 curl_easy_setopt(curl_handle, 112 CURLOPT_TIMEOUT, REQUEST_TIMEOUT); 113 curl_easy_setopt(curl_handle, 114 CURLOPT_SSL_VERIFYPEER, 1L); 115 curl_easy_setopt(curl_handle, 116 CURLOPT_SSL_VERIFYHOST, 2L); 117 118 res = curl_easy_perform(curl_handle); 119 if (res != CURLE_OK) 120 continue; 121 122 curl_easy_getinfo(curl_handle, 123 CURLINFO_RESPONSE_CODE, &status); 124 125 if (is_transient(status)) 126 continue; 127 128 return status; 129 } 130 return -1; 131 } 132 133 void 134 svc_init(void) 135 { 136 curl_global_init(CURL_GLOBAL_ALL); 137 curl_handle = curl_easy_init(); 138 if (!curl_handle) 139 die("curl_easy_init failed"); 140 } 141 142 void 143 svc_cleanup(void) 144 { 145 if (curl_handle) { 146 curl_easy_cleanup(curl_handle); 147 curl_handle = NULL; 148 } 149 curl_global_cleanup(); 150 } 151 152 int 153 svc_submit_ia(const char *url, int verbose) 154 { 155 char *save_url; 156 size_t len; 157 long status; 158 159 len = strlen("https://web.archive.org/save/") + 160 strlen(url) + 1; 161 save_url = xmalloc(len); 162 snprintf(save_url, len, 163 "https://web.archive.org/save/%s", url); 164 165 if (verbose) 166 fprintf(stderr, " IA: %s\n", save_url); 167 168 status = http_get(save_url); 169 free(save_url); 170 171 if (status >= 200 && status < 400) { 172 if (verbose) 173 fprintf(stderr, " IA: OK (%ld)\n", status); 174 return 0; 175 } 176 177 warn("IA: HTTP %ld for %s", status, url); 178 return -1; 179 } 180 181 int 182 svc_submit_archiveph(const char *url, int verbose) 183 { 184 char *postdata, *escaped; 185 size_t len; 186 long status; 187 188 escaped = curl_easy_escape(curl_handle, url, 0); 189 if (!escaped) { 190 warn("archive.ph: URL escape failed for %s", url); 191 return -1; 192 } 193 194 len = strlen("url=") + strlen(escaped) + 195 strlen("&submit=") + 1; 196 postdata = xmalloc(len); 197 snprintf(postdata, len, "url=%s&submit=", escaped); 198 curl_free(escaped); 199 200 if (verbose) 201 fprintf(stderr, " archive.ph: submitting %s\n", 202 url); 203 204 status = http_post("https://archive.ph/submit/", postdata); 205 free(postdata); 206 207 if (status >= 200 && status < 400) { 208 if (verbose) 209 fprintf(stderr, 210 " archive.ph: OK (%ld)\n", status); 211 return 0; 212 } 213 214 /* archive.ph returns 3xx redirect on success */ 215 if (status >= 300 && status < 400) { 216 if (verbose) 217 fprintf(stderr, 218 " archive.ph: OK (redirect)\n"); 219 return 0; 220 } 221 222 warn("archive.ph: HTTP %ld for %s", status, url); 223 return -1; 224 } 225 226 int 227 svc_submit_wikiwix(const char *url, int verbose) 228 { 229 char *cache_url; 230 size_t len; 231 long status; 232 233 len = strlen("https://archive.wikiwix.com/cache/?url=") + 234 strlen(url) + 1; 235 cache_url = xmalloc(len); 236 snprintf(cache_url, len, 237 "https://archive.wikiwix.com/cache/?url=%s", url); 238 239 if (verbose) 240 fprintf(stderr, " Wikiwix: %s\n", cache_url); 241 242 status = http_get(cache_url); 243 free(cache_url); 244 245 if (status >= 200 && status < 400) { 246 if (verbose) 247 fprintf(stderr, 248 " Wikiwix: OK (%ld)\n", status); 249 return 0; 250 } 251 252 warn("Wikiwix: HTTP %ld for %s", status, url); 253 return -1; 254 } 255 256 int 257 svc_submit(const char *url, int services, int verbose) 258 { 259 int ok; 260 261 ok = 0; 262 263 /* Skip .onion URLs - archive services can't reach them */ 264 if (strstr(url, ".onion/") || strstr(url, ".onion:")) { 265 if (verbose) 266 fprintf(stderr, 267 " skip: .onion URL\n"); 268 return 0; 269 } 270 271 if (services & SVC_IA) { 272 if (svc_submit_ia(url, verbose) == 0) 273 ok++; 274 } 275 if (services & SVC_WIKIWIX) { 276 if (svc_submit_wikiwix(url, verbose) == 0) 277 ok++; 278 } 279 if (services & SVC_ARCHIVEPH) { 280 if (svc_submit_archiveph(url, verbose) == 0) 281 ok++; 282 } 283 284 return ok; 285 }