40 #include <sys/types.h>
55 #include <unicode/smpdtfmt.h>
56 #include <unicode/timezone.h>
67 struct ScanElement::DateFormatters
69 DateFormatters() : _pDateFormat(0), _pISO8601(0), _markPos(0), _sdfLen(0) {}
79 SimpleDateFormat* _pDateFormat;
82 SimpleDateFormat* _pISO8601;
102 , _dateFormatMark(
"")
106 , _pDateFormatters(0)
111 : RCObjectInterface()
113 , _location(proto._location)
114 , _suffix(proto._suffix)
115 , _regExp(proto._regExp)
116 , _subdirs(proto._subdirs)
117 , _olderThan(proto._olderThan)
118 , _dateFormatMark(proto._dateFormatMark)
119 , _enhance(proto._enhance)
120 , _ncoords(proto._ncoords)
121 , _pParent(proto._pParent)
122 , _pDateFormatters(0)
124 if (!_dateFormatMark.empty())
126 initSimpleDateFormats(_dateFormatMark);
176 throwOnUnhandledAttributes();
179 if (!_dateFormatMark.empty())
181 initSimpleDateFormats(_dateFormatMark);
188 if (!
_parser->isScopeAggregation())
192 "was not the direct child of an <aggregation> element as required!");
209 " find the the child aggregation of the current dataset, which is "
210 "supposed to be our parent!");
218 "location=\"" + _location +
"\" " +
237 return (_subdirs ==
"true");
243 if (_olderThan.empty())
253 "Couldn't parse the olderThan attribute! Expect a string of the form: "
254 "\"%d %units\" where %d is a number and %units is a time unit string such as "
255 " \"hours\" or \"s\".");
270 BESDEBUG(
"ncml",
"Scan will be relative to the BES root data path = " <<
273 setupFilters(scanner);
275 vector<FileInfo> files;
292 oss <<
"In processing " <<
toString() <<
" we got a BESNotFoundError with msg=";
294 oss <<
" Perhaps a path is incorrect?" << endl;
302 BESDEBUG(
"ncml",
"Scan " <<
toString() <<
" returned matching regular files: " << endl);
305 BESDEBUG(
"ncml",
"WARNING: No matching files found!" << endl);
309 DirectoryUtil::printFileInfoList(files);
314 if (!_ncoords.empty())
317 "Scan has ncoords attribute specified: ncoords="
319 <<
" Will be inherited by all matching datasets!"
329 vector<NetcdfElement*> scannedDatasets;
330 scannedDatasets.reserve(files.size());
332 for (vector<FileInfo>::const_iterator it = files.begin();
344 if (!_ncoords.empty())
352 if (!_dateFormatMark.empty())
354 string timeCoord = extractTimeFromFilename(it->basename());
355 BESDEBUG(
"ncml",
"Got an ISO 8601 time from dateFormatMark: " <<
365 scannedDatasets.push_back(static_cast<NetcdfElement*>(dataset.
refAndGet()));
370 if (_dateFormatMark.empty())
372 BESDEBUG(
"ncml",
"Sorting scanned datasets by location()..." << endl);
373 std::sort(scannedDatasets.begin(),
374 scannedDatasets.end(),
379 BESDEBUG(
"ncml",
"Sorting scanned datasets by coordValue() since we got a dateFormatMark"
380 " and the coordValue are ISO 8601 dates..." << endl);
381 std::sort(scannedDatasets.begin(),
382 scannedDatasets.end(),
388 if (!_dateFormatMark.empty())
399 BESDEBUG(
"ncml",
"Adding the sorted scanned datasets to the current aggregation list..." << endl);
400 datasets.reserve(datasets.size() + scannedDatasets.size());
401 datasets.insert(datasets.end(), scannedDatasets.begin(), scannedDatasets.end());
408 if (!_suffix.empty())
410 BESDEBUG(
"ncml",
"Scan will filter against suffix=\"" << _suffix <<
"\"" << endl);
414 if (!_regExp.empty())
416 BESDEBUG(
"ncml",
"Scan will filter against the regExp=\"" << _regExp <<
"\"" << endl);
424 catch (libdap::Error& err)
427 "There was a problem compiling the regExp=\"" + _regExp +
429 + err.get_error_message());
433 if (!_olderThan.empty())
436 struct timeval tvNow;
437 gettimeofday(&tvNow, 0);
438 long cutoffTime = tvNow.tv_sec - secs;
440 BESDEBUG(
"ncml",
"Setting scan filter modification time using duration: "
441 << secs <<
" from the olderThan attribute=\"" << _olderThan <<
"\""
442 " The cutoff modification time based on now is: " <<
443 getTimeAsString(cutoffTime) << endl);
448 static const string ISO_8601_FORMAT =
"yyyy-MM-dd'T'HH:mm:ss'Z'";
454 static bool convertUnicodeStringToStdString(std::string& toString,
const UnicodeString& fromUniString)
462 buffer.resize(fromUniString.length() + 1);
463 UErrorCode errorCode = U_ZERO_ERROR;
464 int32_t patternLen = fromUniString.extract(&buffer[0], buffer.size(), 0, errorCode);
465 if (patternLen >= static_cast<int32_t>(buffer.size()) || U_FAILURE(errorCode))
471 toString = std::string(&buffer[0]);
477 ScanElement::initSimpleDateFormats(
const std::string& dateFormatMark)
481 _pDateFormatters =
new DateFormatters;
484 _pDateFormatters->_markPos = dateFormatMark.find_last_of(
"#");
485 if (_pDateFormatters->_markPos == string::npos)
488 "The scan@dateFormatMark attribute did not contain"
489 " a marking # character before the date format!"
490 " dateFormatMark=\"" + dateFormatMark +
"\"");
494 string dateFormat = dateFormatMark.substr(_pDateFormatters->_markPos+1, string::npos);
495 BESDEBUG(
"ncml",
"Using a date format of: " << dateFormat << endl);
496 UnicodeString usDateFormat(dateFormat.c_str());
499 _pDateFormatters->_sdfLen = dateFormat.size();
502 UErrorCode success = U_ZERO_ERROR;
503 _pDateFormatters->_pDateFormat =
new SimpleDateFormat(usDateFormat, success);
504 if (U_FAILURE(success))
507 "Scan element failed to parse the SimpleDateFormat pattern: "
510 VALID_PTR(_pDateFormatters->_pDateFormat);
512 _pDateFormatters->_pDateFormat->setTimeZone(*(TimeZone::getGMT()));
516 _pDateFormatters->_pISO8601 =
new SimpleDateFormat(success);
517 if (U_FAILURE(success))
520 "Scan element failed to create the ISO 8601 SimpleDateFormat"
521 " using the pattern " + ISO_8601_FORMAT);
525 _pDateFormatters->_pISO8601->setTimeZone(*(TimeZone::getGMT()));
526 _pDateFormatters->_pISO8601->applyPattern(ISO_8601_FORMAT.c_str());
530 ScanElement::extractTimeFromFilename(
const std::string& filename)
const
533 VALID_PTR(_pDateFormatters->_pDateFormat);
538 string sdfPortion = filename.substr(
539 _pDateFormatters->_markPos,
540 _pDateFormatters->_sdfLen);
542 UnicodeString usPattern;
543 _pDateFormatters->_pDateFormat->toPattern(usPattern);
545 bool conversionSuccess = convertUnicodeStringToStdString(sdfPattern, usPattern);
547 "ScanElement::extractTimeFromFilename: couldn't convert the UnicodeString date pattern to a std::string!");
550 BESDEBUG(
"ncml",
"Scan is now matching the date portion of the filename " <<
552 " to the SimpleDateFormat="
553 "\"" << sdfPattern <<
"\"" <<
556 UErrorCode status = U_ZERO_ERROR;
557 UDate theDate = _pDateFormatters->_pDateFormat->parse(sdfPortion.c_str(), status);
558 if (U_FAILURE(status))
561 "SimpleDateFormat could not parse the pattern="
562 "\"" + sdfPattern +
"\""
563 " on the filename portion=" +
564 "\"" + sdfPortion +
"\""
565 " of the filename=" +
566 "\"" + filename +
"\""
567 " Either the pattern was invalid or the filename did not match.");
570 UnicodeString usISODate;
571 _pDateFormatters->_pISO8601->format(theDate, usISODate);
573 conversionSuccess = convertUnicodeStringToStdString(result, usISODate);
575 "ScanElement::extractTimeFromFilename: failed to convert the UnicodeString ISO date to a std::string!");
581 ScanElement::deleteDateFormats() throw()
587 ScanElement::getValidAttributes()
589 vector<string> attrs;
590 attrs.push_back(
"location");
591 attrs.push_back(
"suffix");
592 attrs.push_back(
"regExp");
593 attrs.push_back(
"subdirs");
594 attrs.push_back(
"olderThan");
595 attrs.push_back(
"dateFormatMark");
599 attrs.push_back(
"enhance");
602 attrs.push_back(
"ncoords");
608 ScanElement::throwOnUnhandledAttributes()
610 if (!_enhance.empty())
617 ScanElement::getTimeAsString(time_t theTime)
619 struct tm* pTM = gmtime(&theTime);
622 strftime(buf, 128,
"%F %T", pTM);
error thrown if the resource requested cannot be found
Class to hold info on files as we get them.
void getListingOfRegularFilesRecursive(const std::string &path, std::vector< FileInfo > &rRegularFiles)
Get recursive listing of all regular files in the directory subtree.
virtual bool validateAttributes(const XMLAttributeMap &attrs, const vector< string > &validAttrs, vector< string > *pInvalidAttrs=0, bool printInvalid=true, bool throwOnError=true)
Check that the given attributes are all in the valid set, otherwise fill in *pInvalidAttrs with the p...
virtual void setAttributes(const XMLAttributeMap &attrs)
Set the attributes of this from the map.
void setFilterRegExp(const std::string ®exp)
Set a (GNU style) regular expression to be used to match against the full filename (relative path und...
long getOlderThanAsSeconds() const
Get the olderThan attribute in seconds.
static bool isCoordValueLexicographicallyLessThan(const NetcdfElement *pLHS, const NetcdfElement *pRHS)
Compare the coordvalue fields of the two arguments and return true if lhs.coordValue() < rhs...
void getDatasetList(vector< NetcdfElement * > &datasets) const
Actually perform the filesystem scan based on the specified attributes (suffix, subdirs, etc).
virtual void dump(ostream &strm) const
Displays debug information about this object.
An abstract superclass for NCMLArray that handles the non-parameterized functionality and allows u...
void getListingForPath(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
Get a listing of all the regular files and directories in the given path, which is assumed relative t...
static const string _sTypeName
virtual void handleContent(const string &content)
Handle the characters content for the element.
void addScanElement(ScanElement *pScanner)
Add a child ScanElement to the Aggregation to be used to to add to the list of child datasets...
const string getValueForLocalNameOrDefault(const string &localname, const string &defVal="") const
If there is an attribute with localname, return its value, else return default.
const string & ncoords() const
#define NCML_ASSERT_MSG(cond, msg)
Concrete class for NcML element.
Implementation of the element used to scan directories to create the set of files for an aggre...
T * refAndGet() const
If not null, ref() the object and then return it.
static bool isLocationLexicographicallyLessThan(const NetcdfElement *pLHS, const NetcdfElement *pRHS)
Compare the location fields of the two arguments and return true if lhs.location() < rhs...
void setAggregationVariableCoordinateAxisType(const std::string &cat)
If a child scan contains a dateFormatMark, then we want to add a "_CoordinateAxisType" of "Time" By s...
static const vector< string > _sValidAttrs
AggregationElement * getParent() const
Get the aggregation of which I am a child.
AggregationElement * getChildAggregation() const
Return the raw pointer (or NULL) to our contained aggregation.
static std::string printAttributeIfNotEmpty(const std::string &attrName, const std::string &attrValue)
Helper for subclasses implementing toString().
Helper classes for using dirent.h, dir.h, stat.h, etc.
void setFilterSuffix(const std::string &suffix)
Set the filter to be used for the nexy getListingForPath() call.
#define THROW_NCML_PARSE_ERROR(parseLine, msg)
static std::string getBESRootDir()
Gets the BES root directory by checking the bes.conf settings for BES.
A reference to an RCObject which automatically ref() and deref() on creation and destruction.
void addAttribute(const XMLAttribute &attribute)
TODO how do we tell if this exists? Does it replace? Do we care?
virtual void handleBegin()
Handle a begin on this element.
virtual ScanElement * clone() const
Make and return a copy of this.
Base class for NcML element concrete classes.
virtual void handleEnd()
Handle the closing of this element.
int line() const
Return the current parse line number.
virtual void handleContent(const std::string &content)
Handle the characters content for the element.
RCPtr< NCMLElement > makeElement(const std::string &eltTypeName, const XMLAttributeMap &attrs, NCMLParser &parser)
Create an element of the proper type with the given AttrMap for its defined attributes.
void setFilterModTimeOlderThan(time_t newestModTime)
Set a filter on the modification time of the files to be returned in a listing.
void setParent(AggregationElement *pParent)
Set the parent of this element.
bool shouldScanSubdirs() const
is the subdirs attribute true?
#define BESDEBUG(x, y)
macro used to send debug information to the debug stream
const std::string & getRootDir() const
get the current root dir
static bool parseIntoSeconds(long &seconds, const std::string &duration)
Parse the string in duration and to calculate the (approximate) number of seconds it represents...
void setRootDir(const std::string &rootDir, bool allowRelativePaths=false, bool allowSymLinks=false)
Makes sure the directory exists and is readable or throws an exception exception. ...
virtual string toString() const
Return a string describing the element.
virtual const string & getTypeName() const
Return the type of the element, which should be: the same as ConcreteClassName::getTypeName() ...