fetch.c (4034B)
1 /* See LICENSE file for copyright and license details. */ 2 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 #include <unistd.h> 7 #include <curl/curl.h> 8 9 #include "config.h" 10 #include "fetch.h" 11 #include "util.h" 12 13 static CURL *curl_handle = NULL; 14 15 static size_t 16 write_cb(void *contents, size_t size, size_t nmemb, void *userp) 17 { 18 size_t realsize = size * nmemb; 19 Response *resp = (Response *)userp; 20 char *ptr; 21 22 ptr = xrealloc(resp->data, resp->size + realsize + 1); 23 resp->data = ptr; 24 memcpy(&(resp->data[resp->size]), contents, realsize); 25 resp->size += realsize; 26 resp->data[resp->size] = '\0'; 27 return realsize; 28 } 29 30 /* 31 * Check if an HTTP status code is transient (worth retrying). 32 * 429 = rate limited, 5xx = server errors 33 */ 34 static int 35 is_transient(long code) 36 { 37 return code == 429 || code == 500 || code == 502 || 38 code == 503 || code == 504; 39 } 40 41 void 42 fetch_init(void) 43 { 44 curl_global_init(CURL_GLOBAL_ALL); 45 curl_handle = curl_easy_init(); 46 if (!curl_handle) 47 die("curl_easy_init failed"); 48 } 49 50 void 51 fetch_cleanup(void) 52 { 53 if (curl_handle) { 54 curl_easy_cleanup(curl_handle); 55 curl_handle = NULL; 56 } 57 curl_global_cleanup(); 58 } 59 60 Response * 61 fetch_url(const char *url) 62 { 63 Response *resp; 64 CURLcode res; 65 char *ct, *effective_url; 66 int attempt; 67 68 for (attempt = 0; attempt < FETCH_MAX_RETRIES; attempt++) { 69 if (attempt > 0) { 70 unsigned int delay; 71 72 delay = FETCH_RETRY_BASE * (1 << (attempt - 1)); 73 warn("retry %d/%d for %s (waiting %us)", 74 attempt, FETCH_MAX_RETRIES - 1, url, delay); 75 sleep(delay); 76 } 77 78 resp = xmalloc(sizeof(Response)); 79 resp->data = xmalloc(1); 80 resp->data[0] = '\0'; 81 resp->size = 0; 82 resp->content_type = NULL; 83 resp->status_code = 0; 84 resp->final_url = NULL; 85 86 curl_easy_reset(curl_handle); 87 curl_easy_setopt(curl_handle, CURLOPT_URL, url); 88 curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, 89 write_cb); 90 curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, 91 (void *)resp); 92 curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, 93 USER_AGENT); 94 curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L); 95 curl_easy_setopt(curl_handle, CURLOPT_MAXREDIRS, 96 MAX_REDIRECTS); 97 curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, 98 CONNECT_TIMEOUT); 99 curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, 100 REQUEST_TIMEOUT); 101 curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 1L); 102 curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 2L); 103 curl_easy_setopt(curl_handle, CURLOPT_ACCEPT_ENCODING, 104 ""); 105 106 res = curl_easy_perform(curl_handle); 107 108 if (res != CURLE_OK) { 109 /* Network-level failure */ 110 if (res == CURLE_OPERATION_TIMEDOUT || 111 res == CURLE_COULDNT_CONNECT || 112 res == CURLE_GOT_NOTHING) { 113 warn("fetch: %s: %s", 114 url, curl_easy_strerror(res)); 115 response_free(resp); 116 resp = NULL; 117 continue; 118 } 119 /* Non-transient curl error */ 120 warn("fetch: %s: %s", 121 url, curl_easy_strerror(res)); 122 response_free(resp); 123 return NULL; 124 } 125 126 curl_easy_getinfo(curl_handle, 127 CURLINFO_RESPONSE_CODE, 128 &resp->status_code); 129 130 ct = NULL; 131 if (curl_easy_getinfo(curl_handle, 132 CURLINFO_CONTENT_TYPE, 133 &ct) == CURLE_OK && ct) 134 resp->content_type = xstrdup(ct); 135 136 effective_url = NULL; 137 if (curl_easy_getinfo(curl_handle, 138 CURLINFO_EFFECTIVE_URL, 139 &effective_url) == CURLE_OK && 140 effective_url) 141 resp->final_url = xstrdup(effective_url); 142 143 /* Retry on transient HTTP errors */ 144 if (is_transient(resp->status_code)) { 145 response_free(resp); 146 resp = NULL; 147 continue; 148 } 149 150 return resp; 151 } 152 153 /* All retries exhausted */ 154 warn("fetch: gave up on %s after %d attempts", 155 url, FETCH_MAX_RETRIES); 156 return resp; 157 } 158 159 void 160 response_free(Response *resp) 161 { 162 if (!resp) 163 return; 164 free(resp->data); 165 free(resp->content_type); 166 free(resp->final_url); 167 free(resp); 168 }