OPeNDAP Hyrax Back End Server (BES)  Updated for version 3.8.3
DirectoryUtil.cc
Go to the documentation of this file.
1 // This file is part of the "NcML Module" project, a BES module designed
3 // to allow NcML files to be used to be used as a wrapper to add
4 // AIS to existing datasets of any format.
5 //
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: Michael Johnson <m.johnson@opendap.org>
8 //
9 // For more information, please also see the main website: http://opendap.org/
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26 //
27 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29 
30 #include "config.h"
31 #include "DirectoryUtil.h"
32 
33 #include <cstring>
34 #include <cerrno>
35 #include <sstream>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <dirent.h>
39 
40 // libdap
41 #include "GNURegex.h"
42 
43 // bes
44 #include "BESDebug.h"
45 #include "BESForbiddenError.h"
46 #include "BESInternalError.h"
47 #include "BESKeys.h"
48 #include "BESNotFoundError.h"
49 #include "BESUtil.h"
50 #include "TheBESKeys.h"
51 
52 using std::string;
53 using std::vector;
54 
55 namespace agg_util
56 {
61  struct DirWrapper
62  {
63  public:
64 
65  DirWrapper(const string& fullDirPath)
66  : _pDir(0)
67  , _fullPath(fullDirPath)
68  {
69  // if the user sees null after this, they can check the errno.
70  _pDir = opendir(fullDirPath.c_str());
71  }
72 
73  ~DirWrapper()
74  {
75  if (_pDir)
76  {
77  closedir(_pDir);
78  _pDir = 0;
79  }
80  }
81 
82  bool
83  fail() const
84  {
85  return !_pDir;
86  }
87 
88  DIR*
89  get() const
90  {
91  return _pDir;
92  }
93 
94  // automatically closedir() if non-null on dtor.
95  DIR* _pDir;
96  std::string _fullPath;
97  };
98 
100  FileInfo::FileInfo(const std::string& path, const std::string& basename, bool isDir, time_t modTime)
101  : _path(path)
102  , _basename(basename)
103  , _fullPath("") // start empty, cached later
104  , _isDir(isDir)
105  , _modTime(modTime)
106  {
109  }
110 
112  {
113  }
114 
115  const std::string&
117  {
118  return _path;
119  }
120 
121  const std::string&
123  {
124  return _basename;
125  }
126 
127  bool
129  {
130  return _isDir;
131  }
132 
133  time_t
135  {
136  return _modTime;
137  }
138 
139  std::string
141  {
142  // we'll just use UTC for the output...
143  struct tm* pTM = gmtime(&_modTime);
144  char buf[128];
145  // this should be "Year-Month-Day Hour:Minute:Second"
146  strftime(buf, 128, "%F %T", pTM);
147  return string(buf);
148  }
149 
150  const std::string&
152  {
153  if (_fullPath.empty())
154  {
155  _fullPath = _path + "/" + _basename;
156  }
157  return _fullPath;
158  }
159 
160  std::string
162  {
163  return "{FileInfo fullPath=" + getFullPath() +
164  " isDir=" + ((isDir())?("true"):("false")) +
165  " modTime=\"" + getModTimeAsString() + "\""
166  " }";
167  }
168 
170 
171  const string DirectoryUtil::_sDebugChannel = "agg_util";
172 
174  : _rootDir("/")
175  , _suffix("") // we start with no filter
176  , _pRegExp(0)
177  , _filteringModTimes(false)
178  , _newestModTime(0L)
179  {
180  // this can throw, but the class is completely constructed by this point.
181  setRootDir("/");
182  }
183 
185  {
186  clearRegExp();
187  }
188 
190  const std::string&
192  {
193  return _rootDir;
194  }
195 
201  void
202  DirectoryUtil::setRootDir(const std::string& origRootDir,
203  bool allowRelativePaths/*=false*/,
204  bool /*allowSymLinks=false*/)
205  {
206  if (!allowRelativePaths && hasRelativePath(origRootDir))
207  {
208  throw BESForbiddenError("can't use rootDir=" + origRootDir + " since it has a relative path (../)", __FILE__, __LINE__);
209  }
210 
211  // Get the root without trailing slash, we'll add it.
212  _rootDir = origRootDir;
213  removeTrailingSlashes(_rootDir);
214  // If empty here, that means the actual filesystem root.
215 
216  // Use the BESUtil to test the path
217  // Since it assumes root is valid and strips preceding "/",
218  // we use "/" as the root path and the root path as the path
219  // to validate the root. This will throw if invalid.
220  BESUtil::check_path(_rootDir, "/", false); // not going to allow symlinks by default.
221 
222  // We should be good if we get here.
223  }
224 
225  void
226  DirectoryUtil::setFilterSuffix(const std::string& suffix)
227  {
228  _suffix = suffix;
229  }
230 
231  void
232  DirectoryUtil::setFilterRegExp(const std::string& regexp)
233  {
234  clearRegExp(); // avoid leaks
235  if (!regexp.empty())
236  {
237  _pRegExp = new libdap::Regex(regexp.c_str());
238  }
239  }
240 
241  void
243  {
244  delete _pRegExp; _pRegExp = 0;
245  }
246 
247  void
249  {
250  _newestModTime = newestModTime;
251  _filteringModTimes = true;
252  }
253 
254  void
255  DirectoryUtil::getListingForPath(const std::string& path,
256  std::vector<FileInfo>* pRegularFiles,
257  std::vector<FileInfo>* pDirectories)
258  {
259  string pathToUse(path);
260  removePrecedingSlashes(pathToUse);
261  pathToUse = getRootDir() + "/" + pathToUse;
262  BESDEBUG(_sDebugChannel, "Attempting to get dir listing for path=\"" << pathToUse << "\"" << endl);
263 
264  // RAII, will closedir no matter how we leave function, including a throw
265  DirWrapper pDir(pathToUse);
266  if (pDir.fail())
267  {
268  throwErrorForOpendirFail(pathToUse);
269  }
270 
271  // Go through each entry and see if it's a directory or regular file and
272  // add it to the list.
273  struct dirent* pDirEnt = 0;
274  while ( (pDirEnt = readdir(pDir.get())) != 0)
275  {
276  string entryName = pDirEnt->d_name;
277  // Exclude ".", ".." and any dotfile dirs like ".svn".
278  if (!entryName.empty() && entryName[0] == '.')
279  {
280  continue;
281  }
282 
283  // Figure out if it's a regular file or directory
284  string pathToEntry = pathToUse + "/" + entryName;
285  struct stat statBuf;
286  int statResult = stat(pathToEntry.c_str(), &statBuf);
287  if (statResult != 0)
288  {
289  // If we can't stat the file for some reason, then ignore it
290  continue;
291  }
292 
293  // Use the passed in path for the entry since we
294  // want to make the locations be relative to the root
295  // for loading later.
296  if (pDirectories && S_ISDIR(statBuf.st_mode))
297  {
298  pDirectories->push_back(FileInfo(path, entryName, true, statBuf.st_mtime ));
299  }
300  else if (pRegularFiles && S_ISREG(statBuf.st_mode))
301  {
302  FileInfo theFile(path, entryName, false, statBuf.st_mtime);
303  // match against the relative passed in path, not root full path
304  if (matchesAllFilters(theFile.getFullPath(), statBuf.st_mtime ))
305  {
306  pRegularFiles->push_back(theFile);
307  }
308  }
309  }
310  }
311 
312  void
314  std::vector<FileInfo>* pRegularFiles,
315  std::vector<FileInfo>* pDirectories)
316  {
317  // Remove trailing slash to make it canonical
318  string canonicalPath = path;
319  removeTrailingSlashes(canonicalPath);
320 
321  // We use our own local vector of directories in order to recurse,
322  // then add them to the end of pDirectories if it exists.
323 
324  // First, get the current path's listing
325  vector<FileInfo> dirs;
326  dirs.reserve(16); // might as well start with a "few" to avoid grows.
327 
328  // Keep adding them to the user specified regular file list if desired,
329  // but keep track of dirs ourself.
330  getListingForPath(canonicalPath, pRegularFiles, &dirs);
331 
332  // If the caller wanted directories, append them all to the return
333  if (pDirectories)
334  {
335  pDirectories->insert(pDirectories->end(), dirs.begin(), dirs.end());
336  }
337 
338  // Finally, recurse on each directory in dirs
339  for (vector<FileInfo>::const_iterator it = dirs.begin();
340  it != dirs.end();
341  ++it)
342  {
343  string subPath = canonicalPath + "/" + it->basename();
344  BESDEBUG(_sDebugChannel, "DirectoryUtil: recursing down to directory subtree=\"" <<
345  subPath << "\"..." << endl);
346  // Pass down the caller's accumulated vector's to be filled in.
347  getListingForPathRecursive(subPath, pRegularFiles, pDirectories);
348  }
349 
350  }
351 
352  void
354  std::vector<FileInfo>& rRegularFiles)
355  {
356  // call the other one, not accumulated the directories, only recursing into them.
357  getListingForPathRecursive(path, &rRegularFiles, 0);
358  }
359 
360  void
361  DirectoryUtil::throwErrorForOpendirFail(const string& fullPath)
362  {
363  switch (errno)
364  {
365  case EACCES:
366  {
367  string msg = "Permission denied for some directory in path=\"" + fullPath + "\"";
368  throw BESForbiddenError(msg, __FILE__, __LINE__);
369  }
370  break;
371 
372  case ELOOP:
373  {
374  string msg = "A symlink loop was detected in path=\"" + fullPath + "\"";
375  throw BESNotFoundError(msg, __FILE__, __LINE__); // closest I can figure...
376  }
377  break;
378 
379  case ENAMETOOLONG:
380  {
381  string msg = "A name in the path was too long. path=\"" + fullPath + "\"";
382  throw BESNotFoundError(msg, __FILE__, __LINE__);
383  }
384  break;
385 
386  case ENOENT:
387  {
388  string msg = "Some part of the path was not found. path=\"" + fullPath + "\"";
389  throw BESNotFoundError(msg, __FILE__, __LINE__);
390  }
391  break;
392 
393  case ENOTDIR:
394  {
395  string msg = "Some part of the path was not a directory. path=\"" + fullPath + "\"";
396  throw BESNotFoundError(msg, __FILE__, __LINE__);
397  }
398  break;
399 
400  case ENFILE:
401  {
402  string msg = "Internal Error: Too many files are currently open!";
403  throw BESInternalError(msg, __FILE__, __LINE__);
404  }
405  break;
406 
407  default:
408  {
409  string msg = "An unknown errno was found after opendir() was called on path=\"" + fullPath + "\"";
410  throw BESInternalError(msg, __FILE__, __LINE__);
411  }
412  }
413  }
414 
415  bool
416  DirectoryUtil::matchesAllFilters(const std::string& path, time_t modTime) const
417  {
418  bool matches = true;
419  // Do the suffix first since it's fast
420  if (!_suffix.empty() && !matchesSuffix(path, _suffix))
421  {
422  matches = false;
423  }
424 
425  // Suffix matches and we have a regexp, check that
426  if (matches && _pRegExp)
427  {
428  // match the full string, -1 on fail, num chars matching otherwise
429  int numCharsMatching = _pRegExp->match(path.c_str(), path.size(), 0);
430  matches = (numCharsMatching > 0); // TODO do we want to match the size()?
431  }
432 
433  if (matches && _filteringModTimes)
434  {
435  matches = (modTime < _newestModTime);
436  }
437 
438  return matches;
439  }
440 
441  bool
442  DirectoryUtil::hasRelativePath(const std::string& path)
443  {
444  return (path.find("..") != string::npos);
445  }
446 
447  void
449  {
450  if (!path.empty())
451  {
452  string::size_type pos = path.find_last_not_of("/");
453  if (pos != string::npos)
454  {
455  path = path.substr(0, pos+1);
456  }
457  }
458  }
459 
460  void
462  {
463  if (!path.empty())
464  {
465  string::size_type pos = path.find_first_not_of("/");
466  path = path.substr(pos, string::npos);
467  }
468  }
469 
470  void
471  DirectoryUtil::printFileInfoList(const vector<FileInfo>& listing)
472  {
473  std::ostringstream oss;
474  printFileInfoList(oss, listing);
475  BESDEBUG(_sDebugChannel, oss.str() << endl);
476  }
477 
478  void
479  DirectoryUtil::printFileInfoList(std::ostream& os, const vector<FileInfo>& listing)
480  {
481  for (vector<FileInfo>::const_iterator it = listing.begin();
482  it != listing.end();
483  ++it)
484  {
485  os << it->toString() << endl;
486  }
487  }
488 
489  std::string
491  {
492  bool found;
493  string rootDir;
494  TheBESKeys::TheKeys()->get_value("BES.Catalog.catalog.RootDirectory",
495  rootDir, found);
496  if (!found)
497  {
498  TheBESKeys::TheKeys()->get_value("BES.Data.RootDirectory",
499  rootDir, found);
500  }
501  if (!found)
502  {
503  rootDir = "/";
504  }
505  return rootDir;
506  }
507 
508  bool
509  DirectoryUtil::matchesSuffix(const std::string& filename, const std::string& suffix)
510  {
511  // see if the last suffix.size() characters match.
512  bool matches = (filename.find(suffix, filename.size() - suffix.size()) != string::npos);
513  return matches;
514  }
515 }
error thrown if the resource requested cannot be found
Class to hold info on files as we get them.
Definition: DirectoryUtil.h:46
FileInfo(const std::string &path, const std::string &basename, bool isDir, time_t modTime)
strips any trailing "/" on path.
void getListingOfRegularFilesRecursive(const std::string &path, std::vector< FileInfo > &rRegularFiles)
Get recursive listing of all regular files in the directory subtree.
const std::string & basename() const
const std::string & getFullPath() const
Get the path and basename as path + "/" + basename We cache this after first call to allow for a cons...
void setFilterRegExp(const std::string &regexp)
Set a (GNU style) regular expression to be used to match against the full filename (relative path und...
exception thrown if inernal error encountered
const std::string & path() const
does not include trailing "/"
string basename(const string &path)
Definition: dodsutil.h:133
bool isDir() const
void getListingForPath(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
Get a listing of all the regular files and directories in the given path, which is assumed relative t...
Helper class for temporarily hijacking an existing dhi to load a DDX response for one particular file...
#define L
Definition: avltree.h:36
static bool matchesSuffix(const std::string &filename, const std::string &suffix)
std::string toString() const
std::string getModTimeAsString() const
Get a human readable string for the modTime()
void setFilterSuffix(const std::string &suffix)
Set the filter to be used for the nexy getListingForPath() call.
static std::string getBESRootDir()
Gets the BES root directory by checking the bes.conf settings for BES.
static void removeTrailingSlashes(std::string &path)
mutate to remove all trailing "/"
void get_value(const string &s, string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: BESKeys.cc:453
error thrown if the BES is not allowed to access the resource requested
void setFilterModTimeOlderThan(time_t newestModTime)
Set a filter on the modification time of the files to be returned in a listing.
void clearRegExp()
Remove any filter using a regular expression.
#define BESDEBUG(x, y)
macro used to send debug information to the debug stream
Definition: BESDebug.h:64
const std::string & getRootDir() const
get the current root dir
void setRootDir(const std::string &rootDir, bool allowRelativePaths=false, bool allowSymLinks=false)
Makes sure the directory exists and is readable or throws an exception exception. ...
time_t modTime() const
static void check_path(const string &path, const string &root, bool follow_sym_links)
Check if the specified path is valid.
Definition: BESUtil.cc:232
static BESKeys * TheKeys()
Definition: TheBESKeys.cc:48
static void removePrecedingSlashes(std::string &path)
mutate to remove and preceding (in the front) "/"
void getListingForPathRecursive(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
Get the listing for the path recursing into every directory found until it bottoms out...
static void printFileInfoList(std::ostream &os, const std::vector< FileInfo > &listing)
Print the list of files to the stream.
static bool hasRelativePath(const std::string &path)
Is there a "../" in path?