OPeNDAP Hyrax Back End Server (BES)  Updated for version 3.8.3
curl_utils.cc
Go to the documentation of this file.
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of gateway_module, A C++ module that can be loaded in to
4 // the OPeNDAP Back-End Server (BES) and is able to handle remote requests.
5 
6 // Copyright (c) 2013 OPeNDAP, Inc.
7 // Author: Nathan Potter <ndp@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include <unistd.h>
26 #include <algorithm> // std::for_each
27 
28 #include <GNURegex.h>
29 
30 #include "util.h"
31 #include "BESDebug.h"
32 #include "GatewayUtils.h"
33 
34 #include "curl_utils.h"
35 
36 namespace libcurl {
37 
38 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
39 int curl_trace = 0;
40 
41 
42 
43 #define CLIENT_ERR_MIN 400
44 #define CLIENT_ERR_MAX 417
46  {
47  "Bad Request:",
48  "Unauthorized: Contact the server administrator.",
49  "Payment Required.",
50  "Forbidden: Contact the server administrator.",
51  "Not Found: The data source or server could not be found.\n\
52  Often this means that the OPeNDAP server is missing or needs attention;\n\
53  Please contact the server administrator.",
54  "Method Not Allowed.",
55  "Not Acceptable.",
56  "Proxy Authentication Required.",
57  "Request Time-out.",
58  "Conflict.",
59  "Gone:.",
60  "Length Required.",
61  "Precondition Failed.",
62  "Request Entity Too Large.",
63  "Request URI Too Large.",
64  "Unsupported Media Type.",
65  "Requested Range Not Satisfiable.",
66  "Expectation Failed."
67  };
68 
69 #define SERVER_ERR_MIN 500
70 #define SERVER_ERR_MAX 505
72  {
73  "Internal Server Error.",
74  "Not Implemented.",
75  "Bad Gateway.",
76  "Service Unavailable.",
77  "Gateway Time-out.",
78  "HTTP Version Not Supported."
79  };
80 
81 
84 string http_status_to_string(int status)
85 {
86  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
87  return string(http_client_errors[status - CLIENT_ERR_MIN]);
88  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
89  return string(http_server_errors[status - SERVER_ERR_MIN]);
90  else
91  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
92 }
93 
94 static string getCurlAuthTypeName(const int authType){
95 
96  string authTypeString;
97  int match;
98 
99  match = authType & CURLAUTH_BASIC;
100  if(match){
101  authTypeString += "CURLAUTH_BASIC";
102  }
103 
104  match = authType & CURLAUTH_DIGEST;
105  if(match){
106  if(!authTypeString.empty())
107  authTypeString += " ";
108  authTypeString += "CURLAUTH_DIGEST";
109  }
110 
111  match = authType & CURLAUTH_DIGEST_IE;
112  if(match){
113  if(!authTypeString.empty())
114  authTypeString += " ";
115  authTypeString += "CURLAUTH_DIGEST_IE";
116  }
117 
118  match = authType & CURLAUTH_GSSNEGOTIATE;
119  if(match){
120  if(!authTypeString.empty())
121  authTypeString += " ";
122  authTypeString += "CURLAUTH_GSSNEGOTIATE";
123  }
124 
125  match = authType & CURLAUTH_NTLM;
126  if(match){
127  if(!authTypeString.empty())
128  authTypeString += " ";
129  authTypeString += "CURLAUTH_NTLM";
130  }
131 
132 #if 0
133  match = authType & CURLAUTH_ANY;
134  if(match){
135  if(!authTypeString.empty())
136  authTypeString += " ";
137  authTypeString += "CURLAUTH_ANY";
138  }
139 
140 
141  match = authType & CURLAUTH_ANY;
142  if(match){
143  if(!authTypeString.empty())
144  authTypeString += " ";
145  authTypeString += "CURLAUTH_ANYSAFE";
146  }
147 
148 
149  match = authType & CURLAUTH_ANY;
150  if(match){
151  if(!authTypeString.empty())
152  authTypeString += " ";
153  authTypeString += "CURLAUTH_ONLY";
154  }
155 #endif
156 
157  return authTypeString;
158 }
159 
160 
165 static size_t writeToOpenfileDescriptor( char *data, size_t /* size */, size_t nmemb, void *userdata){
166 
167  int *fd = (int *) userdata;
168 
169  BESDEBUG("curl", "curl_utils::writeToOpenfileDescriptor() - Bytes received " << libdap::long_to_string(nmemb) << endl);
170  int wrote = write(*fd, data, nmemb);
171  BESDEBUG("curl", "curl_utils::writeToOpenfileDescriptor() - Bytes written " << libdap::long_to_string(wrote) << endl);
172 
173  return wrote;
174 }
175 
176 
200 static size_t save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
201 {
202  BESDEBUG("curl", "curl_utils::save_raw_http_headers() - Inside the header parser." << endl);
203  vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
204 
205  // Grab the header, minus the trailing newline. Or \r\n pair.
206  string complete_line;
207  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
208  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
209  else
210  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
211 
212  // Store all non-empty headers that are not HTTP status codes
213  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
214  BESDEBUG("curl", "curl_utils::save_raw_http_headers() - Header line: " << complete_line << endl);
215  hdrs->push_back(complete_line);
216  }
217 
218  return size * nmemb;
219 }
220 
221 
222 
223 
224 
226 static int curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
227 {
228  string message(msg, size);
229 
230  switch (info) {
231  case CURLINFO_TEXT:
232  BESDEBUG("curl", "curl_utils::curl_debug() - Text: " << message << endl ); break;
233  case CURLINFO_HEADER_IN:
234  BESDEBUG("curl", "curl_utils::curl_debug() - Header in: " << message << endl ); break;
235  case CURLINFO_HEADER_OUT:
236  BESDEBUG("curl", "curl_utils::curl_debug() - Header out: " << endl << message << endl ); break;
237  case CURLINFO_DATA_IN:
238  BESDEBUG("curl", "curl_utils::curl_debug() - Data in: " << message << endl ); break;
239  case CURLINFO_DATA_OUT:
240  BESDEBUG("curl", "curl_utils::curl_debug() - Data out: " << message << endl ); break;
241  case CURLINFO_END:
242  BESDEBUG("curl", "curl_utils::curl_debug() - End: " << message << endl ); break;
243 #ifdef CURLINFO_SSL_DATA_IN
244  case CURLINFO_SSL_DATA_IN:
245  BESDEBUG("curl", "curl_utils::curl_debug() - SSL Data in: " << message << endl ); break;
246 #endif
247 #ifdef CURLINFO_SSL_DATA_OUT
248  case CURLINFO_SSL_DATA_OUT:
249  BESDEBUG("curl", "curl_utils::curl_debug() - SSL Data out: " << message << endl ); break;
250 #endif
251  default:
252  BESDEBUG("curl", "curl_utils::curl_debug() - Curl info: " << message << endl ); break;
253  }
254  return 0;
255 }
256 
257 
258 
259 
260 
261 
262 
265 class BuildHeaders : public std::unary_function<const string &, void>
266 {
267  struct curl_slist *d_cl;
268 
269 public:
270  BuildHeaders() : d_cl(0)
271  {}
272 
273  void operator()(const string &header)
274  {
275  BESDEBUG("curl", "BuildHeaders::operator() - Adding '" << header.c_str() << "' to the header list." << endl);
276  d_cl = curl_slist_append(d_cl, header.c_str());
277  }
278 
279  struct curl_slist *get_headers()
280  {
281  return d_cl;
282  }
283 };
284 
285 
286 
287 
288 
289 
290 
304 bool configureProxy(CURL *curl, const string &url) {
305  BESDEBUG( "curl", "curl_utils::configureProxy() - BEGIN." << endl);
306 
307  bool using_proxy = false;
308 
309  // I pulled this because I could never find where it was applied
310  // to the curl state in HTTPConnect
311  //string proxyProtocol = GatewayUtils::ProxyProtocol;
312 
313  string proxyHost = GatewayUtils::ProxyHost;
314  int proxyPort = GatewayUtils::ProxyPort;
315  string proxyPassword = GatewayUtils::ProxyPassword;
316  string proxyUser = GatewayUtils::ProxyUser;
317  string proxyUserPW = GatewayUtils::ProxyUserPW;
318  int proxyAuthType = GatewayUtils::ProxyAuthType;
319 
320  if (!proxyHost.empty()) {
321  using_proxy = true;
322  if(proxyPort==0)
323  proxyPort = 8080;
324 
325  // Apparently we don't need this...
326  //if(proxyProtocol.empty())
327  // proxyProtocol = "http";
328 
329  }
330  if (using_proxy) {
331  BESDEBUG( "curl", "curl_utils::configureProxy() - Found proxy configuration." << endl);
332 
333  // Don't set up the proxy server for URLs that match the 'NoProxy'
334  // regex set in the gateway.conf file.
335 
336  // Don't create the regex if the string is empty
337  if (!GatewayUtils::NoProxyRegex.empty()) {
338  BESDEBUG( "curl", "curl_utils::configureProxy() - Found NoProxyRegex." << endl);
339  libdap::Regex r(GatewayUtils::NoProxyRegex.c_str());
340  if (r.match(url.c_str(), url.length()) != -1) {
341  BESDEBUG( "curl", "curl_utils::configureProxy() - Found NoProxy match. Regex: " << GatewayUtils::NoProxyRegex << "; Url: " << url << endl);
342  using_proxy = false;
343  }
344  }
345 
346  if (using_proxy) {
347 
348  BESDEBUG("curl", "curl_utils::configureProxy() - Setting up a proxy server." << endl);
349  BESDEBUG("curl", "curl_utils::configureProxy() - Proxy host: " << proxyHost << endl);
350  BESDEBUG("curl", "curl_utils::configureProxy() - Proxy port: " << proxyPort << endl);
351 
352  curl_easy_setopt(curl, CURLOPT_PROXY, proxyHost.data());
353  curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxyPort);
354 
355 // #ifdef CURLOPT_PROXYAUTH
356 
357  // oddly "#ifdef CURLOPT_PROXYAUTH" doesn't work - even though CURLOPT_PROXYAUTH is defined and valued at 111 it
358  // fails the test. Eclipse hover over the CURLOPT_PROXYAUTH symbol shows: "CINIT(PROXYAUTH, LONG, 111)",
359  // for what that's worth
360 
361  // According to http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTPROXYAUTH As of 4/21/08 only NTLM, Digest and Basic work.
362 
363 #if 0
364  BESDEBUG("curl", "curl_utils::configureProxy() - CURLOPT_PROXYAUTH = " << CURLOPT_PROXYAUTH << endl);
365  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_BASIC = " << CURLAUTH_BASIC << endl);
366  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_DIGEST = " << CURLAUTH_DIGEST << endl);
367  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_DIGEST_IE = " << CURLAUTH_DIGEST_IE << endl);
368  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_GSSNEGOTIATE = " << CURLAUTH_GSSNEGOTIATE << endl);
369  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_NTLM = " << CURLAUTH_NTLM << endl);
370  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_ANY = " << CURLAUTH_ANY << endl);
371  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_ANYSAFE = " << CURLAUTH_ANYSAFE << endl);
372  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_ONLY = " << CURLAUTH_ONLY << endl);
373  BESDEBUG("curl", "curl_utils::configureProxy() - Using CURLOPT_PROXYAUTH = " << proxyAuthType << endl);
374 #endif
375 
376  BESDEBUG("curl", "curl_utils::configureProxy() - Using CURLOPT_PROXYAUTH = " << getCurlAuthTypeName(proxyAuthType) << endl);
377  curl_easy_setopt(curl, CURLOPT_PROXYAUTH, proxyAuthType);
378 // #endif
379 
380 
381 
382  if (!proxyUser.empty()){
383  curl_easy_setopt(curl, CURLOPT_PROXYUSERNAME, proxyUser.data());
384  BESDEBUG("curl", "curl_utils::configureProxy() - CURLOPT_PROXYUSER : " << proxyUser << endl);
385 
386  if (!proxyPassword.empty()){
387  curl_easy_setopt(curl, CURLOPT_PROXYPASSWORD, proxyPassword.data());
388  BESDEBUG("curl", "curl_utils::configureProxy() - CURLOPT_PROXYPASSWORD: " << proxyPassword << endl);
389  }
390  }
391  else if (!proxyUserPW.empty()){
392  BESDEBUG("curl",
393  "curl_utils::configureProxy() - CURLOPT_PROXYUSERPWD : " << proxyUserPW << endl);
394  curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, proxyUserPW.data());
395  }
396 
397  }
398  }
399  BESDEBUG( "curl", "curl_utils::configureProxy() - END." << endl);
400 
401  return using_proxy;
402 }
403 
404 
405 
406 
407 
408 
409 
410 
411 
412 
413 
414 
415 
416 
426 CURL *init(char *error_buffer)
427 {
428 
429  CURL *curl = curl_easy_init();
430  if (!curl)
431  throw libdap::InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
432 
433  // Load in the default headers to send with a request. The empty Pragma
434  // headers overrides libcurl's default Pragma: no-cache header (which
435  // will disable caching by Squid, etc.).
436 
437  // the empty Pragma never appears in the outgoing headers when this isn't present
438  // d_request_headers->push_back(string("Pragma: no-cache"));
439 
440  // d_request_headers->push_back(string("Cache-Control: no-cache"));
441 
442  // Allow compressed responses. Sending an empty string enables all supported compression types.
443 #ifndef CURLOPT_ACCEPT_ENCODING
444  curl_easy_setopt(curl, CURLOPT_ENCODING, "");
445 #else
446  curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
447 #endif
448 
449  curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer);
450  // We have to set FailOnError to false for any of the non-Basic
451  // authentication schemes to work. 07/28/03 jhrg
452  curl_easy_setopt(curl, CURLOPT_FAILONERROR, 0);
453 
454  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
455  // choosing the the 'safest' one supported by the server.
456  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
457  curl_easy_setopt(curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
458 
459  curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1);
460  curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
461  curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
462  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
463  // param of save_raw_http_headers to a vector<string> object.
464 
465  // Follow 302 (redirect) responses
466  curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
467  curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5);
468 
469 
470  // Set the user agent to curls version response because, well, that's what command line curl does :)
471  curl_easy_setopt(curl, CURLOPT_USERAGENT, curl_version());
472 
473 
474 #if 0
475  // If the user turns off SSL validation...
476  if (!d_rcr->get_validate_ssl() == 0) {
477  curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
478  curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
479  }
480 
481  // Look to see if cookies are turned on in the .dodsrc file. If so,
482  // activate here. We honor 'session cookies' (cookies without an
483  // expiration date) here so that session-base SSO systems will work as
484  // expected.
485  if (!d_cookie_jar.empty()) {
486  BESDEBUG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
487  curl_easy_setopt(curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
488  curl_easy_setopt(curl, CURLOPT_COOKIESESSION, 1);
489  }
490 #endif
491 
492 
493  if (curl_trace) {
494  BESDEBUG("curl", "curl_utils::www_lib_init() - Curl version: " << curl_version() << endl);
495  curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
496  BESDEBUG("curl", "curl_utils::www_lib_init() - Curl in verbose mode."<< endl);
497  curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, curl_debug);
498  BESDEBUG("curl", "curl_utils::www_lib_init() - Curl debugging function installed."<< endl);
499  }
500 
501 
502  BESDEBUG("curl", "curl_utils::www_lib_init() - curl: " << curl << endl);
503 
504  return curl;
505 
506 
507 }
508 
509 
510 
511 
527 long read_url(CURL *curl,
528  const string &url,
529  int fd,
530  vector<string> *resp_hdrs,
531  const vector<string> *request_headers,
532  char error_buffer[])
533 {
534 
535  BESDEBUG("curl", "curl_utils::read_url() - BEGIN" << endl);
536 
537 
538  curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
539 
540  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeToOpenfileDescriptor);
541 
542 
543 #ifdef CURLOPT_WRITEDATA
544  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &fd);
545 #else
546  curl_easy_setopt(curl, CURLOPT_FILE, &fd);
547 #endif
548 
549 
550 
551  //DBG(copy(d_request_headers.begin(), d_request_headers.end(), ostream_iterator<string>(cerr, "\n")));
552 
553  BuildHeaders req_hdrs;
554  //req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
555  // req_hdrs);
556  if (request_headers)
557  req_hdrs = for_each(request_headers->begin(), request_headers->end(), req_hdrs);
558  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
559 
560 
561  // Pass save_raw_http_headers() a pointer to the vector<string> where the
562  // response headers may be stored. Callers can use the resp_hdrs
563  // value/result parameter to get the raw response header information .
564  curl_easy_setopt(curl, CURLOPT_WRITEHEADER, resp_hdrs);
565 
566  // This call is the one that makes curl go get the thing.
567  CURLcode res = curl_easy_perform(curl);
568 
569  // Free the header list and null the value in d_curl.
570  curl_slist_free_all(req_hdrs.get_headers());
571  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, 0);
572 
573 
574  if (res != 0){
575  BESDEBUG("curl", "curl_utils::read_url() - OUCH! CURL returned an error! curl msg: " << curl_easy_strerror(res) << endl);
576  BESDEBUG("curl", "curl_utils::read_url() - OUCH! CURL returned an error! error_buffer: " << error_buffer << endl);
577  throw libdap::Error(error_buffer);
578  }
579 
580  long status;
581  res = curl_easy_getinfo(curl, CURLINFO_HTTP_CODE, &status);
582  BESDEBUG("curl", "curl_utils::read_url() - HTTP Status " << status << endl);
583  if (res != CURLE_OK)
584  throw libdap::Error(error_buffer);
585  BESDEBUG("curl", "curl_utils::read_url() - END" << endl);
586 
587  return status;
588 }
589 
590 
591 
592 } /* namespace libcurl */
bool configureProxy(CURL *curl, const string &url)
Configure the proxy options for the passed curl object.
Definition: curl_utils.cc:304
#define SERVER_ERR_MIN
Definition: curl_utils.cc:69
static string ProxyUser
Definition: GatewayUtils.h:55
#define CLIENT_ERR_MIN
Definition: curl_utils.cc:43
static string NoProxyRegex
Definition: GatewayUtils.h:61
const char * http_server_errors[SERVER_ERR_MAX-SERVER_ERR_MIN+1]
Definition: curl_utils.cc:71
#define SERVER_ERR_MAX
Definition: curl_utils.cc:70
static string ProxyHost
Definition: GatewayUtils.h:53
const char * http_client_errors[CLIENT_ERR_MAX-CLIENT_ERR_MIN+1]
Definition: curl_utils.cc:45
string http_status_to_string(int status)
This function translates an HTTP status code into an error messages.
Definition: curl_utils.cc:84
#define CLIENT_ERR_MAX
Definition: curl_utils.cc:44
long read_url(CURL *curl, const string &url, int fd, vector< string > *resp_hdrs, const vector< string > *request_headers, char error_buffer[])
Use libcurl to dereference a URL.
Definition: curl_utils.cc:527
CURL * init(char *error_buffer)
Get's a new instance of CURL* and performs basic configuration of that instance.
Definition: curl_utils.cc:426
static int ProxyPort
Definition: GatewayUtils.h:57
static int ProxyAuthType
Definition: GatewayUtils.h:58
#define BESDEBUG(x, y)
macro used to send debug information to the debug stream
Definition: BESDebug.h:64
int curl_trace
Definition: curl_utils.cc:39
static string ProxyUserPW
Definition: GatewayUtils.h:54
static string ProxyPassword
Definition: GatewayUtils.h:56