OPeNDAP Hyrax Back End Server (BES)  Updated for version 3.8.3
BESUtil.cc
Go to the documentation of this file.
1 // BESUtil.cc
2 
3 // This file is part of bes, A C++ back-end server implementation framework
4 // for the OPeNDAP Data Access Protocol.
5 
6 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research
7 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact University Corporation for Atmospheric Research at
24 // 3080 Center Green Drive, Boulder, CO 80301
25 
26 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
27 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
28 //
29 // Authors:
30 // pwest Patrick West <pwest@ucar.edu>
31 // jgarcia Jose Garcia <jgarcia@ucar.edu>
32 
33 #include "config.h"
34 
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 
38 #if HAVE_UNISTD_H
39 #include <unistd.h>
40 #endif
41 
42 #include <cstdio>
43 #include <cerrno>
44 #include <cstring>
45 #include <cstdlib>
46 #include <sstream>
47 #include <iostream>
48 
49 using std::istringstream;
50 using std::cout;
51 using std::endl;
52 
53 #include "BESUtil.h"
54 #include "BESDebug.h"
55 #include "BESForbiddenError.h"
56 #include "BESNotFoundError.h"
57 #include "BESInternalError.h"
58 
59 #define CRLF "\r\n"
60 
61 #define debug_key "BesUtil"
62 
67 void BESUtil::set_mime_text(ostream &strm) {
68  strm << "HTTP/1.0 200 OK" << CRLF;
69  strm << "XBES-Server: " << PACKAGE_STRING << CRLF;
70 
71  const time_t t = time(0);
72  strm << "Date: " << rfc822_date(t).c_str() << CRLF;
73  strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF;
74 
75  strm << "Content-Type: text/plain" << CRLF;
76  // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
77  strm << "Content-Description: unknown" << CRLF;
78  strm << CRLF;
79 }
80 
85 void BESUtil::set_mime_html(ostream &strm) {
86  strm << "HTTP/1.0 200 OK" << CRLF;
87  strm << "XBES-Server: " << PACKAGE_STRING << CRLF;
88 
89  const time_t t = time(0);
90  strm << "Date: " << rfc822_date(t).c_str() << CRLF;
91  strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF;
92 
93  strm << "Content-type: text/html" << CRLF;
94  // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
95  strm << "Content-Description: unknown" << CRLF;
96  strm << CRLF;
97 }
98 
99 // Return a MIME rfc-822 date. The grammar for this is:
100 // date-time = [ day "," ] date time ; dd mm yy
101 // ; hh:mm:ss zzz
102 //
103 // day = "Mon" / "Tue" / "Wed" / "Thu"
104 // / "Fri" / "Sat" / "Sun"
105 //
106 // date = 1*2DIGIT month 2DIGIT ; day month year
107 // ; e.g. 20 Jun 82
108 // NB: year is 4 digit; see RFC 1123. 11/30/99 jhrg
109 //
110 // month = "Jan" / "Feb" / "Mar" / "Apr"
111 // / "May" / "Jun" / "Jul" / "Aug"
112 // / "Sep" / "Oct" / "Nov" / "Dec"
113 //
114 // time = hour zone ; ANSI and Military
115 //
116 // hour = 2DIGIT ":" 2DIGIT [":" 2DIGIT]
117 // ; 00:00:00 - 23:59:59
118 //
119 // zone = "UT" / "GMT" ; Universal Time
120 // ; North American : UT
121 // / "EST" / "EDT" ; Eastern: - 5/ - 4
122 // / "CST" / "CDT" ; Central: - 6/ - 5
123 // / "MST" / "MDT" ; Mountain: - 7/ - 6
124 // / "PST" / "PDT" ; Pacific: - 8/ - 7
125 // / 1ALPHA ; Military: Z = UT;
126 // ; A:-1; (J not used)
127 // ; M:-12; N:+1; Y:+12
128 // / ( ("+" / "-") 4DIGIT ) ; Local differential
129 // ; hours+min. (HHMM)
130 
131 static const char *days[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
132 static const char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
133  "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
134 
144 string BESUtil::rfc822_date(const time_t t) {
145  struct tm *stm = gmtime(&t);
146  char d[256];
147 
148  snprintf(d, 255, "%s, %02d %s %4d %02d:%02d:%02d GMT", days[stm->tm_wday],
149  stm->tm_mday, months[stm->tm_mon], 1900 + stm->tm_year,
150  stm->tm_hour, stm->tm_min, stm->tm_sec);
151  d[255] = '\0';
152  return string(d);
153 }
154 
155 string BESUtil::unhexstring(string s) {
156  int val;
157  istringstream ss(s);
158  ss >> std::hex >> val;
159  char tmp_str[2];
160  tmp_str[0] = static_cast<char> (val);
161  tmp_str[1] = '\0';
162  return string(tmp_str);
163 }
164 
165 // I modified this to mirror the version in libdap. The change allows several
166 // escape sequences to by listed in 'except'. jhrg 2/18/09
167 string BESUtil::www2id(const string &in, const string &escape,
168  const string &except) {
169  string::size_type i = 0;
170  string res = in;
171  while ((i = res.find_first_of(escape, i)) != string::npos) {
172  if (except.find(res.substr(i, 3)) != string::npos) {
173  i += 3;
174  continue;
175  }
176  res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
177  }
178 
179  return res;
180 }
181 
182 string BESUtil::lowercase(const string &s) {
183  string return_string = s;
184  for (int j = 0; j < static_cast<int> (return_string.length()); j++) {
185  return_string[j] = (char) tolower(return_string[j]);
186  }
187 
188  return return_string;
189 }
190 
191 string BESUtil::unescape(const string &s) {
192  bool done = false;
193  string::size_type index = 0;
194  /* string::size_type new_index = 0 ; */
195  string new_str;
196  while (!done) {
197  string::size_type bs = s.find('\\', index);
198  if (bs == string::npos) {
199  new_str += s.substr(index, s.length() - index);
200  done = true;
201  } else {
202  new_str += s.substr(index, bs - index);
203  new_str += s[bs + 1];
204  index = bs + 2;
205  }
206  }
207 
208  return new_str;
209 }
210 
232 void BESUtil::check_path(const string &path, const string &root,
233  bool follow_sym_links) {
234  // if nothing is passed in path, then the path checks out since root is
235  // assumed to be valid.
236  if (path == "")
237  return;
238 
239 
240  // Rather than have two basically identical code paths for the two cases (follow and !follow symlinks)
241  // We evaluate the follow_sym_links switch and use a function pointer to get the correct "stat"
242  // function for the eval operation.
243  int (*ye_old_stat_function)(const char *pathname, struct stat *buf);
244  if(follow_sym_links){
245  BESDEBUG(debug_key,"eval_w10n_resourceId() - Using 'stat' function (follow_sym_links = true)" << endl);
246  ye_old_stat_function = &stat;
247  }
248  else {
249  BESDEBUG(debug_key,"eval_w10n_resourceId() - Using 'lstat' function (follow_sym_links = false)" << endl);
250  ye_old_stat_function = &lstat;
251  }
252 
253 
254 
255  // make sure there are no ../ in the directory, backing up in any way is
256  // not allowed.
257  string::size_type dotdot = path.find("..");
258  if (dotdot != string::npos) {
259  string s = (string) "You are not allowed to access the node " + path;
260  throw BESForbiddenError(s, __FILE__, __LINE__);
261  }
262 
263  // What I want to do is to take each part of path and check to see if it
264  // is a symbolic link and it is accessible. If everything is ok, add the
265  // next part of the path.
266  bool done = false;
267 
268  // what is remaining to check
269  string rem = path;
270  if (rem[0] == '/')
271  rem = rem.substr(1, rem.length() - 1);
272  if (rem[rem.length() - 1] == '/')
273  rem = rem.substr(0, rem.length() - 1);
274 
275  // full path of the thing to check
276  string fullpath = root;
277  if (fullpath[fullpath.length() - 1] == '/') {
278  fullpath = fullpath.substr(0, fullpath.length() - 1);
279  }
280 
281  // path checked so far
282  string checked;
283  while (!done) {
284  size_t slash = rem.find('/');
285  if (slash == string::npos) {
286  fullpath = fullpath + "/" + rem;
287  checked = checked + "/" + rem;
288  done = true;
289  } else {
290  fullpath = fullpath + "/" + rem.substr(0, slash);
291  checked = checked + "/" + rem.substr(0, slash);
292  rem = rem.substr(slash + 1, rem.length() - slash);
293  }
294 
295  struct stat buf;
296  int statret = ye_old_stat_function(fullpath.c_str(), &buf);
297  if (statret == -1) {
298  int errsv = errno;
299  // stat failed, so not accessible. Get the error string,
300  // store in error, and throw exception
301  char *s_err = strerror(errsv);
302  string error = "Unable to access node " + checked + ": ";
303  if (s_err) {
304  error = error + s_err;
305  } else {
306  error = error + "unknown access error";
307  }
308 
309  BESDEBUG(debug_key,"check_path() - error: "<< error << " errno: " << errno << endl);
310 
311  // ENOENT means that the node wasn't found.
312  // On some systems a file that doesn't exist returns ENOTDIR because: w.f.t?
313  // Otherwise, access is being denied for some other reason
314  if (errsv == ENOENT || errsv == ENOTDIR) {
315  // On some systems a file that doesn't exist returns ENOTDIR because: w.f.t?
316  throw BESNotFoundError(error, __FILE__, __LINE__);
317  } else {
318  throw BESForbiddenError(error, __FILE__, __LINE__);
319  }
320  } else {
321  //The call to (stat | lstat) was successful, now check to see if it's a symlink.
322  // Note that if follow_symlinks is true then this will never evaluate as true
323  // because we'll be using 'stat' and not 'lstat' and stat will follow the link
324  // and return information about the file/dir pointed to by the symlink
325  if (S_ISLNK( buf.st_mode )) {
326  string error = "You do not have permission to access "
327  + checked;
328  throw BESForbiddenError(error, __FILE__, __LINE__);
329  }
330  }
331  }
332 
333 
334 #if 0
335  while (!done) {
336  size_t slash = rem.find('/');
337  if (slash == string::npos) {
338  fullpath = fullpath + "/" + rem;
339  checked = checked + "/" + rem;
340  done = true;
341  } else {
342  fullpath = fullpath + "/" + rem.substr(0, slash);
343  checked = checked + "/" + rem.substr(0, slash);
344  rem = rem.substr(slash + 1, rem.length() - slash);
345  }
346 
347  if (!follow_sym_links) {
348  struct stat buf;
349  int statret = lstat(fullpath.c_str(), &buf);
350  if (statret == -1) {
351  int errsv = errno;
352  // stat failed, so not accessible. Get the error string,
353  // store in error, and throw exception
354  char *s_err = strerror(errsv);
355  string error = "Unable to access node " + checked + ": ";
356  if (s_err) {
357  error = error + s_err;
358  } else {
359  error = error + "unknown access error";
360  }
361  // ENOENT means that the node wasn't found. Otherwise, access
362  // is denied for some reason
363  if (errsv == ENOENT) {
364  throw BESNotFoundError(error, __FILE__, __LINE__);
365  } else {
366  throw BESForbiddenError(error, __FILE__, __LINE__);
367  }
368  } else {
369  // lstat was successful, now check if sym link
370  if (S_ISLNK( buf.st_mode )) {
371  string error = "You do not have permission to access "
372  + checked;
373  throw BESForbiddenError(error, __FILE__, __LINE__);
374  }
375  }
376  } else {
377  // just do a stat and see if we can access the thing. If we
378  // can't, get the error information and throw an exception
379  struct stat buf;
380  int statret = stat(fullpath.c_str(), &buf);
381  if (statret == -1) {
382  int errsv = errno;
383  // stat failed, so not accessible. Get the error string,
384  // store in error, and throw exception
385  char *s_err = strerror(errsv);
386  string error = "Unable to access node " + checked + ": ";
387  if (s_err) {
388  error = error + s_err;
389  } else {
390  error = error + "unknown access error";
391  }
392  // ENOENT means that the node wasn't found. Otherwise, access
393  // is denied for some reason
394  if (errsv == ENOENT) {
395  throw BESNotFoundError(error, __FILE__, __LINE__);
396  } else {
397  throw BESForbiddenError(error, __FILE__, __LINE__);
398  }
399  }
400  }
401  }
402 
403 #endif
404 }
405 
406 char *
407 BESUtil::fastpidconverter(char *buf, int base) {
408  return fastpidconverter(getpid(), buf, base);
409 }
410 
411 char *
412 BESUtil::fastpidconverter(long val, /* value to be converted */
413 char *buf, /* output string */
414 int base) /* conversion base */
415 {
416  ldiv_t r; /* result of val / base */
417 
418  if (base > 36 || base < 2) /* no conversion if wrong base */
419  {
420  *buf = '\0';
421  return buf;
422  }
423  if (val < 0)
424  *buf++ = '-';
425  r = ldiv(labs(val), base);
426 
427  /* output digits of val/base first */
428 
429  if (r.quot > 0)
430  buf = fastpidconverter(r.quot, buf, base);
431  /* output last digit */
432 
433  *buf++ = "0123456789abcdefghijklmnopqrstuvwxyz"[(int) r.rem];
434  *buf = '\0';
435  return buf;
436 }
437 
439  if (!key.empty()) {
440  string::size_type first = key.find_first_not_of(" \t\n\r");
441  string::size_type last = key.find_last_not_of(" \t\n\r");
442  if (first == string::npos)
443  key = "";
444  else {
445  string::size_type num = last - first + 1;
446  string new_key = key.substr(first, num);
447  key = new_key;
448  }
449  }
450 }
451 
452 string BESUtil::entity(char c) {
453  switch (c) {
454  case '>':
455  return "&gt;";
456  case '<':
457  return "&lt;";
458  case '&':
459  return "&amp;";
460  case '\'':
461  return "&apos;";
462  case '\"':
463  return "&quot;";
464  default:
465  return string(1, c); // is this proper default, just the char?
466  }
467 }
468 
475 string BESUtil::id2xml(string in, const string &not_allowed) {
476  string::size_type i = 0;
477 
478  while ((i = in.find_first_of(not_allowed, i)) != string::npos) {
479  in.replace(i, 1, entity(in[i]));
480  i++;
481  }
482 
483  return in;
484 }
485 
491 string BESUtil::xml2id(string in) {
492  string::size_type i = 0;
493 
494  while ((i = in.find("&gt;", i)) != string::npos)
495  in.replace(i, 4, ">");
496 
497  i = 0;
498  while ((i = in.find("&lt;", i)) != string::npos)
499  in.replace(i, 4, "<");
500 
501  i = 0;
502  while ((i = in.find("&amp;", i)) != string::npos)
503  in.replace(i, 5, "&");
504 
505  i = 0;
506  while ((i = in.find("&apos;", i)) != string::npos)
507  in.replace(i, 6, "'");
508 
509  i = 0;
510  while ((i = in.find("&quot;", i)) != string::npos)
511  in.replace(i, 6, "\"");
512 
513  return in;
514 }
515 
529 void BESUtil::explode(char delim, const string &str, list<string> &values) {
530  std::string::size_type start = 0;
531  std::string::size_type qstart = 0;
532  std::string::size_type adelim = 0;
533  std::string::size_type aquote = 0;
534  bool done = false;
535  while (!done) {
536  string aval;
537  if (str[start] == '"') {
538  bool endquote = false;
539  qstart = start + 1;
540  while (!endquote) {
541  aquote = str.find('"', qstart);
542  if (aquote == string::npos) {
543  string currval = str.substr(start, str.length() - start);
544  string err = "BESUtil::explode - No end quote after value "
545  + currval;
546  throw BESInternalError(err, __FILE__, __LINE__);
547  }
548  // could be an escaped escape character and an escaped
549  // quote, or an escaped escape character and a quote
550  if (str[aquote - 1] == '\\') {
551  if (str[aquote - 2] == '\\') {
552  endquote = true;
553  qstart = aquote + 1;
554  } else {
555  qstart = aquote + 1;
556  }
557  } else {
558  endquote = true;
559  qstart = aquote + 1;
560  }
561  }
562  if (str[qstart] != delim && qstart != str.length()) {
563  string currval = str.substr(start, qstart - start);
564  string err = "BESUtil::explode - No delim after end quote "
565  + currval;
566  throw BESInternalError(err, __FILE__, __LINE__);
567  }
568  if (qstart == str.length()) {
569  adelim = string::npos;
570  } else {
571  adelim = qstart;
572  }
573  } else {
574  adelim = str.find(delim, start);
575  }
576  if (adelim == string::npos) {
577  aval = str.substr(start, str.length() - start);
578  done = true;
579  } else {
580  aval = str.substr(start, adelim - start);
581  }
582 
583  values.push_back(aval);
584  start = adelim + 1;
585  if (start == str.length()) {
586  values.push_back("");
587  done = true;
588  }
589  }
590 }
591 
602 string BESUtil::implode(const list<string> &values, char delim) {
603  string result;
604  list<string>::const_iterator i = values.begin();
605  list<string>::const_iterator e = values.end();
606  bool first = true;
607  string::size_type d; // = string::npos ;
608  for (; i != e; i++) {
609  if (!first)
610  result += delim;
611  d = (*i).find(delim);
612  if (d != string::npos && (*i)[0] != '"') {
613  string err =
614  (string) "BESUtil::implode - delimiter exists in value "
615  + (*i);
616  throw BESInternalError(err, __FILE__, __LINE__);
617  }
618  //d = string::npos ;
619  result += (*i);
620  first = false;
621  }
622  return result;
623 }
624 
644 void BESUtil::url_explode(const string &url_str, BESUtil::url &url_parts) {
645  string rest;
646 
647  string::size_type colon = url_str.find(":");
648  if (colon == string::npos) {
649  string err = "BESUtil::url_explode: missing colon for protocol";
650  throw BESInternalError(err, __FILE__, __LINE__);
651  }
652 
653  url_parts.protocol = url_str.substr(0, colon);
654 
655  if (url_str.substr(colon, 3) != "://") {
656  string err = "BESUtil::url_explode: no :// in the URL";
657  throw BESInternalError(err, __FILE__, __LINE__);
658  }
659 
660  colon += 3;
661  rest = url_str.substr(colon);
662 
663  string::size_type slash = rest.find("/");
664  if (slash == string::npos)
665  slash = rest.length();
666 
667  string::size_type at = rest.find("@");
668  if ((at != string::npos) && (at < slash)) {
669  // everything before the @ is username:password
670  string up = rest.substr(0, at);
671  colon = up.find(":");
672  if (colon != string::npos) {
673  url_parts.uname = up.substr(0, colon);
674  url_parts.psswd = up.substr(colon + 1);
675  } else {
676  url_parts.uname = up;
677  }
678  // everything after the @ is domain/path
679  rest = rest.substr(at + 1);
680  }
681  slash = rest.find("/");
682  if (slash == string::npos)
683  slash = rest.length();
684  colon = rest.find(":");
685  if ((colon != string::npos) && (colon < slash)) {
686  // everything before the colon is the domain
687  url_parts.domain = rest.substr(0, colon);
688  // everything after the folon is port/path
689  rest = rest.substr(colon + 1);
690  slash = rest.find("/");
691  if (slash != string::npos) {
692  url_parts.port = rest.substr(0, slash);
693  url_parts.path = rest.substr(slash + 1);
694  } else {
695  url_parts.port = rest;
696  url_parts.path = "";
697  }
698  } else {
699  slash = rest.find("/");
700  if (slash != string::npos) {
701  url_parts.domain = rest.substr(0, slash);
702  url_parts.path = rest.substr(slash + 1);
703  } else {
704  url_parts.domain = rest;
705  }
706  }
707 }
708 
709 string BESUtil::url_create(BESUtil::url &url_parts) {
710  string url = url_parts.protocol + "://";
711  if (!url_parts.uname.empty()) {
712  url += url_parts.uname;
713  if (!url_parts.psswd.empty())
714  url += ":" + url_parts.psswd;
715  url += "@";
716  }
717  url += url_parts.domain;
718  if (!url_parts.port.empty())
719  url += ":" + url_parts.port;
720  if (!url_parts.path.empty())
721  url += "/" + url_parts.path;
722 
723  return url;
724 }
725 
error thrown if the resource requested cannot be found
if(!(yy_init))
Definition: lex.gse.cc:752
static string id2xml(string in, const string &not_allowed="><&'\"")
convert characters not allowed in xml to escaped characters
Definition: BESUtil.cc:475
exception thrown if inernal error encountered
static string lowercase(const string &s)
Convert a string to all lower case.
Definition: BESUtil.cc:182
static string www2id(const string &in, const string &escape="%", const string &except="")
This functions are used to unescape hex characters from strings.
Definition: BESUtil.cc:167
static void removeLeadingAndTrailingBlanks(string &key)
remove leading and trailing blanks from a string
Definition: BESUtil.cc:438
static string implode(const list< string > &values, char delim)
implode a list of values into a single string delimited by delim
Definition: BESUtil.cc:602
string port
Definition: BESUtil.h:112
static string xml2id(string in)
unescape xml escaped characters
Definition: BESUtil.cc:491
static void set_mime_html(ostream &strm)
Generate an HTTP 1.0 response header for a html document.
Definition: BESUtil.cc:85
#define debug_key
Definition: BESUtil.cc:61
static void explode(char delim, const string &str, list< string > &values)
explode a string into an array given a delimiter
Definition: BESUtil.cc:529
#define CRLF
Definition: BESUtil.cc:59
static void set_mime_text(ostream &strm)
Generate an HTTP 1.0 response header for a text document.
Definition: BESUtil.cc:67
static string unhexstring(string s)
Definition: BESUtil.cc:155
static void url_explode(const string &url_str, BESUtil::url &url_parts)
Given a url, break the url into its different parts.
Definition: BESUtil.cc:644
string path
Definition: BESUtil.h:113
error thrown if the BES is not allowed to access the resource requested
static string url_create(BESUtil::url &url_parts)
Definition: BESUtil.cc:709
string protocol
Definition: BESUtil.h:108
static char * fastpidconverter(char *buf, int base)
convert pid and place in provided buffer
Definition: BESUtil.cc:407
string uname
Definition: BESUtil.h:110
string psswd
Definition: BESUtil.h:111
static string unescape(const string &s)
Unescape characters with backslash before them.
Definition: BESUtil.cc:191
#define PACKAGE_STRING
Definition: config.h:247
#define BESDEBUG(x, y)
macro used to send debug information to the debug stream
Definition: BESDebug.h:64
static void check_path(const string &path, const string &root, bool follow_sym_links)
Check if the specified path is valid.
Definition: BESUtil.cc:232
string domain
Definition: BESUtil.h:109