OPeNDAP Hyrax Back End Server (BES)  Updated for version 3.8.3
AggregationElement.cc
Go to the documentation of this file.
1 // This file is part of the "NcML Module" project, a BES module designed
3 // to allow NcML files to be used to be used as a wrapper to add
4 // AIS to existing datasets of any format.
5 //
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: Michael Johnson <m.johnson@opendap.org>
8 //
9 // For more information, please also see the main website: http://opendap.org/
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26 //
27 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29 
30 #include "config.h"
31 
32 
33 #include <AttrTable.h> // libdap
34 #include "AggMemberDatasetUsingLocationRef.h" // agg_util
35 #include "AggMemberDatasetSharedDDSWrapper.h" // agg_util
36 #include "AggregationElement.h"
37 #include "AggregationUtil.h" // agg_util
38 #include <Array.h> // libdap
39 #include "ArrayAggregateOnOuterDimension.h" // agg_util
40 #include "ArrayJoinExistingAggregation.h" // agg_util
41 #include <AttrTable.h> // libdap
42 #include "DDSAccessInterface.h" // agg_util
43 #include "Dimension.h" // agg_util
44 #include "DimensionElement.h"
45 #include <Grid.h> // libdap
46 #include "GridAggregateOnOuterDimension.h" // agg_util
47 #include "GridJoinExistingAggregation.h" // agg_util
48 #include "MyBaseTypeFactory.h"
49 #include "NCMLBaseArray.h"
50 #include "NCMLDebug.h"
51 #include "NCMLParser.h"
52 #include "NetcdfElement.h"
53 #include "ScanElement.h"
54 #include <sstream>
55 
58 using agg_util::AMDList;
63 using std::auto_ptr;
64 
65 namespace ncml_module
66 {
67  const string AggregationElement::_sTypeName = "aggregation";
68 
69  const vector<string> AggregationElement::_sValidAttrs = getValidAttributes();
70 
72  : NCMLElement(0)
73  , _type("")
74  , _dimName("")
75  , _recheckEvery("")
76  , _parent(0)
77  , _datasets()
78  , _scanners()
79  , _aggVars()
80  , _gotVariableAggElement(false)
81  , _wasAggregatedMapAddedForJoinExistingGrid(false)
82  , _coordinateAxisType("")
83  {
84  }
85 
87  : RCObjectInterface()
88  , NCMLElement(proto)
89  , _type(proto._type)
90  , _dimName(proto._dimName)
91  , _recheckEvery(proto._recheckEvery)
92  , _parent(proto._parent) // my parent is the same too... is this safe without a true weak reference?
93  , _datasets() // deep copy below
94  , _scanners() // deep copy below
95  , _aggVars(proto._aggVars)
96  , _gotVariableAggElement(false)
97  , _wasAggregatedMapAddedForJoinExistingGrid(false)
98  , _coordinateAxisType("")
99  {
100  // Deep copy all the datasets and add them to me...
101  // This is potentially expensive in memory for large datasets, so let's tell someone.
102  if (!proto._datasets.empty())
103  {
104  BESDEBUG("ncml",
105  "WARNING: AggregationElement copy ctor is deep copying all contained datasets! This might be memory and time intensive!");
106  }
107 
108  // Clone the actual members
109  _datasets.reserve(proto._datasets.size());
110  for (vector<NetcdfElement*>::const_iterator it = proto._datasets.begin();
111  it != proto._datasets.end();
112  ++it)
113  {
114  const NetcdfElement* elt = (*it);
115  addChildDataset(elt->clone());
116  }
117  NCML_ASSERT(_datasets.size() == proto._datasets.size());
118 
119  _scanners.reserve(proto._scanners.size());
120  for (vector<ScanElement*>::const_iterator it = proto._scanners.begin();
121  it != proto._scanners.end();
122  ++it)
123  {
124  const ScanElement* elt = (*it);
125  addScanElement(elt->clone());
126  }
127  NCML_ASSERT(_scanners.size() == proto._scanners.size());
128  }
129 
131  {
132  BESDEBUG("ncml:memory", "~AggregationElement called...");
133  _type = "";
134  _dimName= "";
135  _recheckEvery = "";
136  _parent = 0;
137  _wasAggregatedMapAddedForJoinExistingGrid = false;
138 
139  // Release strong references to the contained netcdfelements....
140  while (!_datasets.empty())
141  {
142  NetcdfElement* elt = _datasets.back();
143  _datasets.pop_back();
144  elt->unref(); // Will be deleted if the last strong reference
145  }
146 
147  // And the scan elements
148  while (!_scanners.empty())
149  {
150  ScanElement* elt = _scanners.back();
151  _scanners.pop_back();
152  elt->unref(); // Will be deleted if the last strong reference
153  }
154  }
155 
156  const string&
158  {
159  return _sTypeName;
160  }
161 
164  {
165  return new AggregationElement(*this);
166  }
167 
168  void
170  {
171  _type = attrs.getValueForLocalNameOrDefault("type", "");
172  _dimName = attrs.getValueForLocalNameOrDefault("dimName", "");
173  _recheckEvery = attrs.getValueForLocalNameOrDefault("recheckEvery", "");
174 
175  // default is to print errors and throw which we want.
177  }
178 
179  void
181  {
183 
184  // Check that the immediate parent element is netcdf since we cannot put an aggregation anywhere else.
185  if (!_parser->isScopeNetcdf())
186  {
188  "Got an <aggregation> = " + toString() +
189  " at incorrect parse location. They can only be direct children of <netcdf>. Scope=" +
190  _parser->getScopeString());
191  }
192 
193  NetcdfElement* dataset = _parser->getCurrentDataset();
194  NCML_ASSERT_MSG(dataset, "We expected a non-noll current dataset while processing AggregationElement::handleBegin() for " + toString());
195  // If the enclosing dataset already has an aggregation, this is a parse error.
196  if (dataset->getChildAggregation())
197  {
199  "Got <aggregation> = " + toString() + " but the enclosing dataset = " + dataset->toString() +
200  " already had an aggregation set! There can be only one!");
201  }
202  // Set me as the aggregation for the current dataset.
203  // This will set my parent and also ref() me.
204  dataset->setChildAggregation(this);
205  }
206 
207  void
208  AggregationElement::handleContent(const string& content)
209  {
210  // Aggregations do not specify content!
211  if (!NCMLUtil::isAllWhitespace(content))
212  {
214  "Got non-whitespace for content and didn't expect it. Element=" + toString() + " content=\"" +
215  content + "\"");
216  }
217  }
218 
219  void
221  {
222  // Handle the actual processing!!
223  BESDEBUG("ncml", "Got AggregationElement::handleEnd(): Processing the aggregation!!" << endl);
224 
225  if ( isUnionAggregation() )
226  {
227  BESDEBUG("ncml2", " AggregationElement::handleEnd; isUnionAggregation" << endl);
228  processUnion();
229  }
230  else if ( isJoinNewAggregation() )
231  {
232  BESDEBUG("ncml2", " AggregationElement::handleEnd; isJoinNewAggregation" << endl);
233  processJoinNew();
234  }
235  else if ( isJoinExistingAggregation() )
236  {
237  BESDEBUG("ncml2", " AggregationElement::handleEnd; isJoinExistingAggregation" << endl);
238  processJoinExisting();
239  }
240  else if (_type == "forecastModelRunCollection" ||
241  _type == "forecastModelSingleRunCollection")
242  {
244  "Sorry, we do not implement the forecastModelRunCollection aggregations in this version of the NCML Module!");
245  }
246  else
247  {
249  "Unknown aggregation type=" + _type + " at scope=" + _parser->getScopeString());
250  }
251  }
252 
253  string
255  {
256  return "<" + _sTypeName +
257  " type=\"" + _type + "\"" +
258  printAttributeIfNotEmpty("dimName", _dimName) +
259  printAttributeIfNotEmpty("recheckEvery", _recheckEvery) +
260  ">";
261  }
262 
263  bool
265  {
266  return (_type == "joinNew");
267  }
268 
269  bool
271  {
272  return (_type == "union");
273  }
274 
275  bool
277  {
278  return (_type == "joinExisting");
279  }
280 
281  void
283  {
284  VALID_PTR(pDataset);
285  BESDEBUG("ncml", "AggregationElement: adding child dataset: " << pDataset->toString() << endl);
286 
287  // Add as a strong reference.
288  pDataset->ref();
289  _datasets.push_back(pDataset);
290 
291  // also set a weak reference to us as the parent
292  pDataset->setParentAggregation(this);
293  }
294 
295  void
297  {
298  if (isAggregationVariable(name))
299  {
301  "Tried to add an aggregation variable twice: name=" + name +
302  " at scope=" + _parser->getScopeString());
303  }
304  else
305  {
306  _aggVars.push_back(name);
307  BESDEBUG("ncml", "Added aggregation variable name=" + name << endl);
308  }
309  }
310 
311  bool
313  {
314  bool ret = false;
315  AggVarIter endIt = endAggVarIter();
317  for (; it != endIt; ++it)
318  {
319  if (name == *it)
320  {
321  ret = true;
322  break;
323  }
324  }
325  return ret;
326  }
327 
328  string
330  {
331  string ret("{ ");
332  AggVarIter endIt = endAggVarIter();
334  for (; it != endIt; ++it)
335  {
336  ret += *it;
337  ret += " ";
338  }
339  ret += "}";
340  return ret;
341  }
342 
345  {
346  return _aggVars.begin();
347  }
348 
351  {
352  return _aggVars.end();
353  }
354 
355  bool
357  {
358  return _gotVariableAggElement;
359  }
360 
361  void
363  {
364  _gotVariableAggElement = true;
365  }
366 
367  void
369  {
370  VALID_PTR(pScanner);
371  _scanners.push_back(pScanner);
372  pScanner->ref(); // strong ref
373  pScanner->setParent(this); // weak ref.
374  }
375 
376  void
378  {
379  BESDEBUG("ncml", "AggregationElement::processParentDatasetComplete() called..." << endl);
380 
381  if (_type == "joinNew")
382  {
383  processParentDatasetCompleteForJoinNew();
384  }
385  else if (_type == "joinExisting")
386  {
387  processParentDatasetCompleteForJoinExisting();
388  }
389  }
390 
393 
396  {
398  _parent = parent;
399  return ret;
400  }
401 
402  void
403  AggregationElement::processUnion()
404  {
405  BESDEBUG("ncml", "Processing a union aggregation..." << endl);
406 
407  // Merge all the dimensions... For now, it is a parse error if a dimension
408  // with the same name exists but has a different size.
409  // Since DAP2 doesn't have dimensions, we can't do this in agg_util, but
410  // have to do it here.
411  mergeDimensions();
412 
413  // Merge the attributes and variables in all the DDS's into our parent DDS....
414  vector<const DDS*> datasetsInOrder;
415  // NOTE WELL: this will LOAD ALL DDX's, but there's no choice for union.
416  // This doesn't load data, just the metadata!
417  collectDatasetsInOrder(datasetsInOrder);
418  DDS* pUnion = 0;
419  if (getParentDataset())
420  {
421  pUnion = getParentDataset()->getDDS();
422  }
423  AggregationUtil::performUnionAggregation(pUnion, datasetsInOrder);
424  }
425 
426  void
427  AggregationElement::processJoinNew()
428  {
429  // This will run any child <scan> elements to prepare them.
430  processAnyScanElements();
431 
432  BESDEBUG("ncml", "AggregationElement: beginning joinNew on the following aggVars=" +
433  printAggregationVariables() << endl);
434 
435  // Union the dimensions of the child sets so they're available
436  BESDEBUG("ncml", "Merging dimensions from children into aggregated dataset..." << endl);
437  mergeDimensions();
438 
439  // For now we will explicitly create the new dimension for lookups.
440  unsigned int newDimSize = _datasets.size(); // ASSUMES we find an aggVar in EVERY dataset!
441  getParentDataset()->addDimension(new DimensionElement(agg_util::Dimension(_dimName, newDimSize)));
442 
443  // We need at least one dataset, so warn.
444  if (_datasets.empty())
445  {
446  THROW_NCML_PARSE_ERROR(line(), "In joinNew aggregation we cannot have zero datasets specified!");
447  }
448 
449  // This is where the output variables go
450  DDS* pAggDDS = getParentDataset()->getDDS();
451  // The first dataset acts as the template for the remainder
452  DDS* pTemplateDDS = _datasets[0]->getDDS();
453  NCML_ASSERT_MSG(pTemplateDDS, "AggregationElement::processJoinNew(): NULL template dataset!");
454 
455  // First, union the template's global attribute table into the output's table.
456  AggregationUtil::unionAttrsInto( &(pAggDDS->get_attr_table()),
457  pTemplateDDS->get_attr_table() );
458 
459  // Then perform the aggregation for each variable...
460  // TODO REFACTOR OPTIMIZE We loop on variables, not the datasets.
461  // It might be more efficient to do all vars for each dataset
462  vector<string>::const_iterator endIt = _aggVars.end();
463  for (vector<string>::const_iterator it = _aggVars.begin(); it != endIt; ++it)
464  {
465  const string& varName = *it;
466  BESDEBUG("ncml", "Aggregating with joinNew on variable=" << varName << "..." << endl);
467  processJoinNewOnAggVar(pAggDDS, varName, *pTemplateDDS);
468  }
469 
470  // Union any non-aggregated variables from the template dataset into the aggregated dataset
471  // Because we want the joinExistingaggregation to build up the Coordinate Variables (CVs)
472  // in the order they are declared in the NCML file, we need to track the current position
473  // where the last one was inserted. We can do that with a field in the AggregationUtil
474  // class. Here we reset that field so that it starts at position 0. 12.13.11 jhrg
475  AggregationUtil::resetCVInsertionPosition();
476 
477  // Union any non-aggregated variables from the template dataset into the aggregated dataset
478  AggregationUtil::unionAllVariablesInto(pAggDDS, *pTemplateDDS, /*add_at_top = */true);
479  }
480 
481 #if 0
482  // This function was used previously, but not now.
483  // Leaving it in case we need it, but commented out
484  // to deal with -werror compilation.
485 
486  /* File local helper for next function */
487  static bool
488  doAllScannersSpecifyNCoords(const vector<ScanElement*>& scanners)
489  {
490  bool success = true;
491  for (vector<ScanElement*>::const_iterator it = scanners.begin();
492  it != scanners.end();
493  ++it)
494  {
495  VALID_PTR(*it);
496  if ((*it)->ncoords().empty())
497  {
498  success = false;
499  break;
500  }
501  }
502  return success;
503  }
504 #endif // 0
505 
506  void
507  AggregationElement::processJoinExisting()
508  {
509  BESDEBUG("ncml:2", "Called AggregationElement::processJoinExisting()...");
510 
511  // Merge any scans into _datasets
512  processAnyScanElements();
513 
514  // We need at least one dataset or it's an error
515  if (_datasets.empty())
516  {
518  "In joinExisting aggregation we cannot have zero datasets specified!");
519  }
520 
521  // We need to know the size of the joinExisting dimension
522  // for all granule datasets.
523  // Make sure that we either get them from:
524  // 1) ncoords specified
525  // 2) Dimension cache file previously created
526  // 3) Load them the slow way and cache the result
527  AMDList granuleList;
528  granuleList.reserve(_datasets.size());
529  fillDimensionCacheForJoinExistingDimension(granuleList, _dimName);
530 
531  // Figure out the cardinality of the aggregated dimension
532  // and add it into the parent dataset's scope for lookups.
533  addNewDimensionForJoinExisting(granuleList);
534 
535  // Union any declared dimensions of the child sets so they're available,
536  // but be carefuly to skip the join dimension since we already created it
537  // new ourselves with the post-aggregation value!
538  BESDEBUG("ncml:2", "Merging dimensions from children into aggregated dataset..." << endl);
539  mergeDimensions(true, _dimName);
540 
541  // This is where the output variables go
542  DDS* pAggDDS = getParentDataset()->getDDS();
543 
544  // The first dataset acts as the template
545  DDS* pTemplateDDS = _datasets[0]->getDDS();
546  NCML_ASSERT_MSG(pTemplateDDS, "AggregationElement::processJoinExisting(): NULL template dataset!");
547 
548  // First, union the template's global attribute table into the output's table.
549  AggregationUtil::unionAttrsInto( &(pAggDDS->get_attr_table()),
550  pTemplateDDS->get_attr_table() );
551 
552  // Fills in the _aggVars list properly.
553  decideWhichVariablesToJoinExist(*pTemplateDDS);
554 
555  // For each variable in the to-be-aggregated list, create the
556  // aggregation variable in the output based on the granule list.
557  vector<string>::const_iterator endIt = _aggVars.end();
558  for (vector<string>::const_iterator it = _aggVars.begin(); it != endIt; ++it)
559  {
560  const string& varName = *it;
561  BESDEBUG("ncml", "Aggregating with joinExisting on variable=" << varName << "..." << endl);
562  processJoinExistingOnAggVar(pAggDDS, varName, *pTemplateDDS);
563  }
564 
565  // Union in the remaining unaggregated variables from the template DDS
566  // since they are likely to be coordinate variables.
567  // Handle variableAgg properly.
568  unionAddAllRequiredNonAggregatedVariablesFrom(*pTemplateDDS);
569  }
570 
571  void
572  AggregationElement::unionAddAllRequiredNonAggregatedVariablesFrom(const DDS& templateDDS)
573  {
574  // Union any non-aggregated variables from the template dataset into the aggregated dataset
575  // Because we want the joinExistingaggregation to build up the Coordinate Variables (CVs)
576  // in the order they are declared in the NCML file, we need to track the current position
577  // where the last one was inserted. We can do that with a field in the AggregationUtil
578  // class. Here we reset that field so that it starts at position 0. 12.13.11 jhrg
579  AggregationUtil::resetCVInsertionPosition();
580 
581 
582  // If we didn't get a variable agg for a joinExisting, then union them all.
584  {
585  if (!gotVariableAggElement())
586  {
587  AggregationUtil::unionAllVariablesInto(getParentDataset()->getDDS(), templateDDS, /*add_at_top = */true);
588  }
589  else
590  {
591  // THROW ONLY IF A GRID since we need to implement the path that handles maps
592  }
593  } // if isJoinExistingAggregation
594 
595  else if (isJoinNewAggregation())
596  // joinNew requires the list of vars, so for this one just union them all in as well.
597  {
598  AggregationUtil::unionAllVariablesInto(getParentDataset()->getDDS(), templateDDS, /*add_at_top = */true);
599  }
600  }
601 
602  void
603  AggregationElement::decideWhichVariablesToJoinExist(const DDS& templateDDS)
604  {
605  // If they were not specified by hand, then discover them.
606  if (_aggVars.empty())
607  {
608  BESDEBUG("ncml", "Searching the the template DDS for variables with outer "
609  "dimension matching the join dimension = " << _dimName
610  << " in order to add them to the aggregation output list." << endl);
611 
612  // the prototype (first dataset) will define the set of vars to be aggregated.
613  // Note: the c.v. dim(dim) _must_ exist, either in all datasets or in the agg itself.
614  vector<string> matchingVars;
615  findVariablesWithOuterDimensionName(matchingVars, templateDDS, _dimName);
616  for (vector<string>::const_iterator it = matchingVars.begin();
617  it != matchingVars.end();
618  ++it)
619  {
621  }
622  }
623  else // make sure the listed ones are valid
624  {
625  BESDEBUG("ncml", "joinExist aggregation had variableAgg specified... "
626  "Validating these variables have outer dimension named "
627  << _dimName
628  << endl);
629 
630  for (vector<string>::const_iterator it = _aggVars.begin();
631  it != _aggVars.end();
632  ++it)
633  {
634  BaseType* pVar = AggregationUtil::findVariableAtDDSTopLevel(templateDDS, *it);
635 
636  // First, it must exist!
637  if (!pVar)
638  {
639  std::ostringstream msg;
640  msg << "Error validating the variableAgg list. The variable named "
641  << *it
642  << " was not found in the top-level DDS!";
643  THROW_NCML_PARSE_ERROR(line(), msg.str());
644  }
645 
646  // Next see that it can be aggregated
647  Array* pArray = AggregationUtil::getAsArrayIfPossible(pVar);
648  if (!pArray)
649  {
650  std::ostringstream msg;
651  msg << "The declared variableAgg aggregation variable named "
652  << *it
653  << " was not of a type able to be aggregated!";
654  THROW_NCML_PARSE_ERROR(line(), msg.str());
655  }
656 
657  // Make sure the dimension name matches.
658  if (pArray->dimension_name(pArray->dim_begin()) != _dimName)
659  {
660  std::ostringstream msg;
661  msg << "The declared variableAgg variable named "
662  << *it
663  << " did not match the outer dimension name "
664  << _dimName
665  << " for this joinExisting aggregation!";
666  THROW_NCML_PARSE_ERROR(line(), msg.str());
667  }
668 
669  // Otherwise, it's good, so let the log know.
670  std::ostringstream msg;
671  msg << "The variable named "
672  << *it
673  << " is a valid joinExisting variable. Will be added to output.";
674  BESDEBUG("ncml", msg.str() << endl);
675  } // for loop over user-declared variableAgg list.
676  }
677  }
678 
679  //
680  void
681  AggregationElement::fillDimensionCacheForJoinExistingDimension(
682  AMDList& granuleList,
683  const std::string& /* aggDimName */)
684  {
685  // First, run down the dataset list (which has been expanded with scanners)
686  // and create the AMD list for them.
687  // for each entry in _dataset
688  vector<NetcdfElement*>::iterator endIt = _datasets.end();
689  for (vector<NetcdfElement*>::iterator it = _datasets.begin();
690  it != endIt;
691  ++it)
692  {
693  granuleList.push_back((*it)->getAggMemberDataset());
694  }
695 
696  // Second, see if there is an ncoords for each of the datasets,
697  // and if so, for each one add it to the cache in the AMD.
698  if (doesFirstGranuleSpecifyNcoords())
699  {
700  // If so, check they all do or it's a user error.
701  if (!doAllGranulesSpecifyNcoords())
702  {
704  "In a joinExisting aggregation we found that the first "
705  "granule specified an ncoords but not all of the others "
706  "did. Either all or none of them should have ncoords specified.");
707  }
708  // otherwise we're good, seed the cache from the ncoords
709  else
710  {
711  seedDimensionCacheFromUserSpecs(granuleList);
712  }
713  }
714  else // look for cached dimension file or load dimensionalities from granules
715  {
716  // If there is NOT an ncoords for all, then:
717  // 1) If there's a dimension cache file, load it.
718  if (doesDimensionCacheExist())
719  {
720  loadDimensionCacheFromCacheFile(granuleList);
721  }
722  // 2) Else do the slow load on the dimension cache
723  // and optionally save the cache file out.
724  else
725  {
726  // SLOW! Probably shoudl warn the user.
727  seedDimensionCacheByQueryOfDatasets(granuleList);
728  }
729  }
730  }
731 
732  bool
733  AggregationElement::doesDimensionCacheExist() const
734  {
735  // TODO
736  BESDEBUG("ncml", "Warning: joinExisting dimension cache"
737  " is not implemented and we'll force a slow load." << endl);
738  return false;
739  }
740 
741  void
742  AggregationElement::loadDimensionCacheFromCacheFile(agg_util::AMDList& /* rGranuleList */)
743  {
744  THROW_NCML_INTERNAL_ERROR("loadDimensionCacheFromCacheFile(): impl me!");
745  }
746 
747  bool
748  AggregationElement::doesFirstGranuleSpecifyNcoords() const
749  {
750  if (_datasets.size() > 0)
751  {
752  return _datasets.at(0)->hasNcoords();
753  }
754  else
755  {
756  return false;
757  }
758  }
759 
760  bool
761  AggregationElement::doAllGranulesSpecifyNcoords() const
762  {
763  bool success = true;
764  vector<NetcdfElement*>::const_iterator endIt = _datasets.end();
765  for (vector<NetcdfElement*>::const_iterator it = _datasets.begin();
766  it != endIt;
767  ++it)
768  {
769  success = success && (*it)->hasNcoords();
770  if (!success)
771  {
772  break;
773  }
774  }
775  return success;
776  }
777 
778  void
779  AggregationElement::seedDimensionCacheFromUserSpecs(agg_util::AMDList& rGranuleList) const
780  {
781  NCML_ASSERT( _datasets.size() == rGranuleList.size() );
782 
783  vector<NetcdfElement*>::const_iterator datasetIt;
784  AMDList::iterator amdIt;
785  for ( datasetIt = _datasets.begin(),
786  amdIt = rGranuleList.begin();
787  datasetIt != _datasets.end();
788  ++datasetIt, ++amdIt)
789  {
790  // Make sure the attribute exists or warn the author
791  const NetcdfElement* pDataset = *datasetIt;
792  if (!pDataset->hasNcoords())
793  {
794  // This is an assumption of the
796  "Expected netcdf element member of a joinExisting "
797  "aggregation to have the ncoords attribute specified "
798  "but it did not.");
799  }
800  unsigned int ncoords = pDataset->getNcoordsAsUnsignedInt();
801  RCPtr<AggMemberDataset> pAMD = *amdIt;
802  VALID_PTR(pAMD.get());
804  dim.name = _dimName;
805  dim.size = ncoords;
806  pAMD->setDimensionCacheFor(dim, true);
807 
808  NCML_ASSERT_MSG( (pAMD->isDimensionCached(dim.name) &&
809  pAMD->getCachedDimensionSize(dim.name) == dim.size ),
810  "Dimension cache bug");
811  }
812  // make sure they stayed in sync
813  NCML_ASSERT(amdIt == rGranuleList.end());
814  }
815 
816  void
817  AggregationElement::seedDimensionCacheByQueryOfDatasets(agg_util::AMDList& rGranuleList) const
818  {
819  BESDEBUG("ncml",
820  "WARNING: netcdf@ncoords attribute not specified for the granules in joinExisting."
821  " We will query the granules serially for the dimensions size. NOTE: This is "
822  "potentially a very slow operation until cached!" << endl);
823  BESDEBUG("ncml", "We will be loading " << rGranuleList.size() << " granules." << endl);
824  AMDList::iterator endIt = rGranuleList.end();
825  for (AMDList::iterator it = rGranuleList.begin();
826  it != endIt;
827  ++it)
828  {
829  BESDEBUG("ncml",
830  "Getting joinExisting dimension for: " << (*it)->getLocation() << "..." << endl);
831  (*it)->fillDimensionCacheByUsingDataDDS();
832  BESDEBUG("ncml", "... done." << endl);
833  }
834  }
835 
836  // For now, just count up the ncoords...
837  void
838  AggregationElement::addNewDimensionForJoinExisting(const agg_util::AMDList& rGranuleList)
839  {
840  // Sum up the cardinalities from AMD's
841  unsigned int aggDimSize = 0;
842  for (AMDList::const_iterator it = rGranuleList.begin();
843  it != rGranuleList.end();
844  ++it)
845  {
846  NCML_ASSERT( (*it)->isDimensionCached(_dimName) );
847  aggDimSize += (*it)->getCachedDimensionSize(_dimName);
848  }
849 
850  // Error if the dimension exists in the output local scope already
852  NCML_ASSERT_MSG(! (getParentDataset()->getDimensionInLocalScope(_dimName)),
853  "AggregationElement::addNewDimensionForJoinExisting() found a dimension "
854  "named " + _dimName + " already but did not expect it!");
855 
856  // Otherwise, create and add it in.
858  new DimensionElement(agg_util::Dimension(_dimName, aggDimSize)));
859 
860  // And tell the world at large
861  ostringstream oss;
862  oss << "Added joinExisting aggregation dimension "
863  " name=" <<
864  _dimName <<
865  " with aggregated size= " <<
866  aggDimSize;
867  BESDEBUG("ncml:2", oss.str());
868  }
869 
870  void
871  AggregationElement::findVariablesWithOuterDimensionName(
872  vector<string>& oMatchingVars,
873  const DDS& templateDDS,
874  const string& outerDimName) const
875  {
876  for ( DDS::Vars_iter it = const_cast<DDS&>(templateDDS).var_begin();
877  it != const_cast<DDS&>(templateDDS).var_end();
878  ++it )
879  {
880  Array* pArray = AggregationUtil::getAsArrayIfPossible(*it);
881  // Only if it's an array or a grid data array
882  if ( pArray && outerDimName == pArray->dimension_name(pArray->dim_begin()) )
883  {
884  oMatchingVars.push_back(pArray->name());
885  }
886  }
887  }
888 
889  void
890  AggregationElement::getParamsForJoinAggOnVariable(
891  JoinAggParams* pOutParams,
892  const DDS& /*aggOutputDDS*/,
893  const std::string& varName,
894  const DDS& templateDDS)
895  {
896  VALID_PTR(pOutParams);
897 
898  // Look up the template variable.
899  pOutParams->_pAggVarTemplate = AggregationUtil::getVariableNoRecurse(templateDDS, varName);
900  if (!(pOutParams->_pAggVarTemplate))
901  {
903  " We could not find a template for the specified aggregation variable="
904  + varName
905  + " so we cannot continue the aggregation.");
906  }
907 
908  // Dimension must exist already
909  const DimensionElement* pDim = getParentDataset()->getDimensionInLocalScope(_dimName);
910  NCML_ASSERT_MSG(pDim,
911  "Didn't find a DimensionElement with the aggregation dimName=" + _dimName );
912  pOutParams->_pAggDim = &(pDim->getDimension());
913 
914 #if 0
915  // I don't follow the logic here. I think we should be able to add attributes to
916  // variables that already exist. This may be intended to protect against removing
917  // the variable on which the aggregation is performed 'over' (e.g., time) with a
918  // different variable. But it has the affect of also prohibiting that addition of
919  // an attribute on that variable. I'm removing it for now. jhrg 10/17/11
920 
921  // Be sure the name isn't taken in the output DDS.
922  BaseType* pExists = AggregationUtil::getVariableNoRecurse(aggOutputDDS, varName);
923  NCML_ASSERT_MSG(!pExists,
924  "Failed since the name of the new variable to add (name="
925  + varName
926  + ") already exists in the "
927  " output aggregation DDS! What happened?!");
928 #endif
929 
930  // Get a vector of lazy loaders
931  // We will transfer AGM ownership to the calls so do not need to delete them.
932  collectAggMemberDatasets ( pOutParams->_memberDatasets );
933  }
934 
935  void
936  AggregationElement::processJoinNewOnAggVar(
937  DDS* pAggDDS,
938  const std::string& varName,
939  const DDS& templateDDS)
940  {
941  // Get the params we need to factory the actual aggregation subclass
942  JoinAggParams joinAggParams;
943  getParamsForJoinAggOnVariable(
944  &joinAggParams, // output
945  *pAggDDS,
946  varName,
947  templateDDS);
948 
949  // Factory out the proper subtype
950  BaseType* pAggVarTemplate = joinAggParams._pAggVarTemplate;
951  if (pAggVarTemplate->type() == dods_array_c)
952  {
953  processAggVarJoinNewForArray(*pAggDDS,
954  *(static_cast<Array*>(pAggVarTemplate)),
955  *(joinAggParams._pAggDim),
956  joinAggParams._memberDatasets );
957  }
958  else if (pAggVarTemplate->type() == dods_grid_c)
959  {
960  processAggVarJoinNewForGrid(*pAggDDS,
961  *(static_cast<Grid*>(pAggVarTemplate)),
962  *(joinAggParams._pAggDim),
963  joinAggParams._memberDatasets);
964  }
965  else
966  {
968  "Got an aggregation variable not of type Array or Grid, but of: " +
969  pAggVarTemplate->type_name() +
970  " which we cannot aggregate!");
971  }
972  // Nothing else to do for this var until the call to processParentDataset() is complete.
973  }
974 
975 
976  void
977  AggregationElement::processJoinExistingOnAggVar(
978  DDS* pAggDDS,
979  const std::string& varName,
980  const DDS& templateDDS)
981  {
982  // Get the params we need to factory the actual aggregation subclass
983  JoinAggParams joinAggParams;
984  getParamsForJoinAggOnVariable(
985  &joinAggParams, // output
986  *pAggDDS,
987  varName,
988  templateDDS);
989 
990  // Factory out the proper subtype
991  BaseType* pAggVarTemplate = joinAggParams._pAggVarTemplate;
992  if (pAggVarTemplate->type() == dods_array_c)
993  {
994  processAggVarJoinExistingForArray(*pAggDDS,
995  *(static_cast<Array*>(pAggVarTemplate)),
996  *(joinAggParams._pAggDim),
997  joinAggParams._memberDatasets );
998  }
999  else if (pAggVarTemplate->type() == dods_grid_c)
1000  {
1001  processAggVarJoinExistingForGrid(*pAggDDS,
1002  *(static_cast<Grid*>(pAggVarTemplate)),
1003  *(joinAggParams._pAggDim),
1004  joinAggParams._memberDatasets);
1005  }
1006  else
1007  {
1009  "Got an aggregation variable not of type Array or Grid, but of: " +
1010  pAggVarTemplate->type_name() + " which we cannot aggregate!");
1011  }
1012  // Nothing else to do for this var until the call to processParentDataset() is complete.
1013  }
1014 
1015  void
1016  AggregationElement::processAggVarJoinNewForArray(DDS& aggDDS,
1017  const libdap::Array& arrayTemplate,
1018  const agg_util::Dimension& dim,
1019  const AMDList& memberDatasets )
1020  {
1021  // Use the basic array getter to read adn get from top level DDS.
1022  auto_ptr<agg_util::ArrayGetterInterface>
1023  arrayGetter(new agg_util::TopLevelArrayGetter());
1024 
1025  auto_ptr<ArrayAggregateOnOuterDimension> pAggArray(
1027  arrayTemplate,
1028  memberDatasets,
1029  arrayGetter, // will xfer ownership
1030  dim)
1031  );
1032 
1033  // Make sure we xfer ownership of contained dumb ptr.
1034  NCML_ASSERT_MSG(!(arrayGetter.get()), "Expected auto_ptr owner xfer, failed!");
1035 
1036  // This will copy, auto_ptr will clear the prototype.
1037  // NOTE: add_var() makes a copy.
1038  // OPTIMIZE change to add_var_no_copy when it exists.
1039  BESDEBUG("ncml",
1040  "Adding new ArrayAggregateOnOuterDimension with name=" <<
1041  arrayTemplate.name() <<
1042  " to aggregated dataset!" <<
1043  endl);
1044 
1045  aggDDS.add_var(pAggArray.get());
1046  }
1047 
1048 
1049  void
1050  AggregationElement::processAggVarJoinNewForGrid(DDS& aggDDS,
1051  const Grid& gridTemplate,
1052  const agg_util::Dimension& dim,
1053  const AMDList& memberDatasets )
1054  {
1055  auto_ptr<GridAggregateOnOuterDimension> pAggGrid(new GridAggregateOnOuterDimension(
1056  gridTemplate,
1057  dim,
1058  memberDatasets,
1059  _parser->getDDSLoader()
1060  ));
1061 
1062  // This will copy, auto_ptr will clear the prototype.
1063  // OPTIMIZE change to add_var_no_copy when it exists.
1064  BESDEBUG("ncml", "Adding new GridAggregateOnOuterDimension with name=" << gridTemplate.name() <<
1065  " to aggregated dataset!" << endl);
1066  aggDDS.add_var(pAggGrid.get());
1067 
1068  // processParentDatasetCompleteForJoinNew() will
1069  // make sure the correct new map vector gets added
1070  }
1071 
1072  void
1073  AggregationElement::processAggVarJoinExistingForArray(
1074  DDS& aggDDS,
1075  const libdap::Array& arrayTemplate,
1076  const agg_util::Dimension& dim,
1077  const AMDList& memberDatasets )
1078  {
1079  // Use the basic array getter to read adn get from top level DDS.
1080  auto_ptr<agg_util::ArrayGetterInterface> arrayGetter(
1082 
1083  auto_ptr<ArrayJoinExistingAggregation> pAggArray(
1085  arrayTemplate,
1086  memberDatasets,
1087  arrayGetter, // will xfer ownership
1088  dim)
1089  );
1090 
1091  // Make sure we xfer ownership of contained dumb ptr.
1092  NCML_ASSERT_MSG(!(arrayGetter.get()), "Expected auto_ptr owner xfer, failed!");
1093 
1094  // This will copy, auto_ptr will clear the prototype.
1095  // NOTE: add_var() makes a copy.
1096  // OPTIMIZE change to add_var_no_copy when it exists.
1097  BESDEBUG("ncml",
1098  "Adding new ArrayJoinExistingAggregation with name=" <<
1099  arrayTemplate.name() <<
1100  " to aggregated dataset!" <<
1101  endl);
1102 
1103  aggDDS.add_var(pAggArray.get());
1104  }
1105 
1106 
1107  void
1108  AggregationElement::processAggVarJoinExistingForGrid(
1109  DDS& aggDDS,
1110  const Grid& gridTemplate,
1111  const agg_util::Dimension& dim,
1112  const AMDList& memberDatasets)
1113  {
1114  auto_ptr<GridJoinExistingAggregation> pAggGrid(
1116  gridTemplate,
1117  memberDatasets,
1118  _parser->getDDSLoader(),
1119  dim));
1120 
1121  BESDEBUG("ncml",
1122  "Adding new GridJoinExistingAggregation with name=" <<
1123  gridTemplate.name() <<
1124  " to aggregated dataset!" <<
1125  endl);
1126  aggDDS.add_var(pAggGrid.get()); // will copy
1127  }
1128 
1129  void
1130  AggregationElement::processParentDatasetCompleteForJoinNew()
1131  {
1132  NetcdfElement* pParentDataset = getParentDataset();
1133  VALID_PTR(pParentDataset);
1134  DDS* pParentDDS = pParentDataset->getDDS();
1135  VALID_PTR(pParentDDS);
1136 
1137  const DimensionElement* pDim = getParentDataset()->getDimensionInLocalScope(_dimName);
1138  NCML_ASSERT_MSG(pDim, " AggregationElement::processParentDatasetCompleteForJoinNew(): "
1139  " didn't find a DimensionElement with the joinNew dimName=" + _dimName );
1140  const agg_util::Dimension& dim = pDim->getDimension();
1141 
1142  // See if there's an explicit or placeholder c.v. for this dimension name
1143  BaseType* pBT = AggregationUtil::getVariableNoRecurse(*pParentDDS, dim.name);
1144  Array* pCV = 0; // this will be a ptr to the actual (new or existing) c.v. in the *pParentDDS.
1145 
1146  // If name totally unused, we need to create a new c.v. and add it.
1147  if (!pBT)
1148  {
1149  pCV = createAndAddCoordinateVariableForNewDimension(*pParentDDS, dim);
1150  NCML_ASSERT_MSG(pCV, "processParentDatasetCompleteForJoinNew(): "
1151  "failed to create a new coordinate variable for dim=" + dim.name);
1152  }
1153  else // name exists: either it's explicit or deferred.
1154  {
1155  // See if the var we found with the dimension name is
1156  // in the deferred variable list for the parent dataset:
1157  VariableElement* pVarElt = pParentDataset->findVariableElementForLibdapVar(pBT);
1158  // If not, then we expect explicit values so just validate it's a proper c.v. for
1159  // the aggregation (the dim) and set pCV to it if so.
1160  if (!pVarElt)
1161  {
1162  // will throw if not valid since we send true.
1163  pCV = ensureVariableIsProperNewCoordinateVariable(pBT, dim, true);
1164  VALID_PTR(pCV);
1165  }
1166  else // it was deferred, need to do some special work...
1167  {
1168  pCV = processDeferredCoordinateVariable(pBT, dim);
1169  VALID_PTR(pCV);
1170  }
1171  }
1172 
1173  // OK, either pCV is valid or we've unwound out by this point.
1174  // If a coordinate axis type was specified, we need to add it now.
1175  //
1176  // This fiddles with the attribute for the CV. jhrg 10/17/11
1177  if (!_coordinateAxisType.empty())
1178  {
1179  addCoordinateAxisType(*pCV, _coordinateAxisType);
1180  }
1181 
1182  // For each aggVar:
1183  // If it's a Grid, add the coordinate variable as a new map vector.
1184  // If it's an Array, do nothing -- we already added the CV as a sibling to the aggvar
1185  AggVarIter it;
1186  AggVarIter endIt = endAggVarIter();
1187  for (it = beginAggVarIter(); it != endIt; ++it)
1188  {
1189  const string& aggVar = *it;
1190  BaseType* pBT = AggregationUtil::getVariableNoRecurse(*pParentDDS, aggVar);
1191  GridAggregateOnOuterDimension* pGrid = dynamic_cast<GridAggregateOnOuterDimension*>(pBT);
1192  if (pGrid)
1193  {
1194  // Add the given map to the Grid as a copy
1195  pGrid->prepend_map(pCV, true);
1196  }
1197  }
1198  }
1199 
1200  void
1201  AggregationElement::processParentDatasetCompleteForJoinExisting()
1202  {
1203  NetcdfElement* pParentDataset = getParentDataset();
1204  VALID_PTR(pParentDataset);
1205  DDS* pAggDDS = pParentDataset->getDDS();
1206  VALID_PTR(pAggDDS);
1207 
1208  const DimensionElement* pDim = getParentDataset()->getDimensionInLocalScope(_dimName);
1209  NCML_ASSERT_MSG(pDim,
1210  " Didn't find a DimensionElement with the joinExisting dimName=" + _dimName );
1211  const agg_util::Dimension& dim = pDim->getDimension();
1212 
1213  // See if there's an explicit or placeholder c.v. for this dimension name
1214  BaseType* pDimNameVar = AggregationUtil::getVariableNoRecurse(*pAggDDS, dim.name);
1215 
1216  bool placeholderExists = false;
1217  Array* pCV = 0; // this will be a ptr to the actual (new or existing) c.v. in the *pParentDDS.
1218  // If the c.v. exists, then process it further.
1219  if (pDimNameVar)
1220  {
1221  // See if the var we found with the dimension name is
1222  // in the deferred variable list for the parent dataset:
1223  VariableElement* pVarElt = pParentDataset->findVariableElementForLibdapVar(pDimNameVar);
1224  // If not, then we expect explicit values so just validate it's a proper c.v. for
1225  // the aggregation (the dim) and set pCV to it if so.
1226  if (!pVarElt)
1227  {
1228  // will throw if not valid since we send true.
1229  pCV = ensureVariableIsProperNewCoordinateVariable(pDimNameVar, dim, true);
1230  VALID_PTR(pCV);
1231  placeholderExists = false;
1232  }
1233  else // it was deferred, need to do some special work below...
1234  {
1235  //pCV = processDeferredCoordinateVariable(pDimNameVar, dim);
1236  placeholderExists = true;
1237  }
1238  }
1239 
1240  // For the scope of the next loop, this will be filled
1241  // with a new aggregated map variable when we fidnt he first Grid
1242  // and then pCV will refer to it until the fucntion end.
1243  // If created, it will be used as the map vector for all Grid's.
1244  auto_ptr<ArrayJoinExistingAggregation> pNewMap(0);
1245 
1246  // For each aggVar:
1247  // If it's a Grid, add the coordinate variable as a new map vector
1248  // since we left it out in the actual Grid until aggregated.
1249  // If it's an Array, do nothing
1250  AggVarIter it;
1251  AggVarIter endIt = endAggVarIter();
1252  for (it = beginAggVarIter(); it != endIt; ++it)
1253  {
1254  const string& aggVar = *it;
1255  BaseType* pAggVar = AggregationUtil::getVariableNoRecurse(*pAggDDS, aggVar);
1256 
1257  // HACK TODO clean this downcast later when we refactor this file.
1258  GridJoinExistingAggregation* pGrid = dynamic_cast<GridJoinExistingAggregation*>(pAggVar);
1259  if (pGrid)
1260  {
1261  // If we don't find it, but we're the first Grid, then assume it's in the Grid maps
1262  // and create it. Will be reused by other Grid's.
1263  // We also do this if it was a placeholder since we need to replace it!
1264  if ( !pCV || placeholderExists )
1265  {
1266  pNewMap =
1268  VALID_PTR(pNewMap.get());
1269 
1270  // If there was a placeholder, we need to
1271  // grab it's metadata as a changeset and replace
1272  // the variable in the DDS with the new one.
1273  if (placeholderExists)
1274  {
1275  processPlaceholderCoordinateVariableForJoinExisting(*pDimNameVar,
1276  pNewMap.get());
1277  }
1278 
1279  // this will make a copy, so the auto_ptr is ok.
1280  AggregationUtil::addOrReplaceVariableForName(pAggDDS, *(pNewMap.get()) );
1281 
1282  // Use the new one as the coordinate variable for the maps below
1283  pCV = pNewMap.get();
1284  }
1285 
1286  // It MUST exist for a Grid since we have to add it for completeness.
1287  NCML_ASSERT_MSG(pCV,
1288  "Expected a coordinate variable since a Grid exists... what happened?");
1289 
1290  // Add the given map to the Grid as a copy
1291  pGrid->prepend_map(pCV, true);
1292  }
1293  }
1294  }
1295 
1296  void
1297  AggregationElement::processPlaceholderCoordinateVariableForJoinExisting(
1298  const libdap::BaseType& placeholderVar,
1299  libdap::Array* pNewVar)
1300  {
1301  VALID_PTR(pNewVar);
1302 
1303  // Make sure the types of the placeholder scalar and created array match or the author goofed
1304  BaseType* pNewEltProto = pNewVar->var();
1305  VALID_PTR(pNewEltProto);
1306  if (placeholderVar.type() != pNewEltProto->type())
1307  {
1309  " We expected the type of the placeholder coordinate variable to be the same "
1310  " as that created by the aggregation. Expected type="
1311  + pNewEltProto->type_name() +
1312  + " but placeholder has type="
1313  + placeholderVar.type_name()
1314  + " Please make sure these match in the input file!");
1315  }
1316 
1317  // Pull the metadata into the new c.v. from the placeholder
1318  AggregationUtil::gatherMetadataChangesFrom(pNewVar, placeholderVar);
1319 
1320  // Let the validation know that we got values for the original value and to remove the entry
1321  // since we're about to delete the pointer to pBT!
1322  getParentDataset()->setVariableGotValues(const_cast<BaseType*>(&placeholderVar), true);
1323  }
1324 
1325 
1326  void
1328  {
1329  _coordinateAxisType = cat;
1330  }
1331 
1332  const std::string&
1334  {
1335  return _coordinateAxisType;
1336  }
1337 
1338  libdap::Array*
1339  AggregationElement::ensureVariableIsProperNewCoordinateVariable(libdap::BaseType* pBT,
1340  const agg_util::Dimension& dim,
1341  bool throwOnInvalidCV) const
1342  {
1343  VALID_PTR(pBT);
1344  Array* pArrRet = 0;
1345 
1346  // If 1D array with name == dim....
1347  if (AggregationUtil::couldBeCoordinateVariable(pBT))
1348  {
1349  // Ensure the dimensionalities match
1350  Array* pArr = static_cast<Array*>(pBT);
1351  if ( pArr->length() == static_cast<int>(dim.size) )
1352  {
1353  // OK, it's a valid return value.
1354  pArrRet = pArr;
1355  }
1356  else // Dimensionality mismatch, exception or return NULL.
1357  {
1358  ostringstream oss;
1359  oss << string("In the aggregation for dimension=") << dim.name <<
1360  ": The coordinate variable we found does NOT have the same dimensionality as the"
1361  "aggregated dimension! We expected dimensionality=" << dim.size <<
1362  " but the coordinate variable had dimensionality=" << pArr->length();
1363  BESDEBUG("ncml", oss.str() << endl);
1364  if (throwOnInvalidCV)
1365  {
1366  THROW_NCML_PARSE_ERROR(line(), oss.str());
1367  }
1368  }
1369  }
1370 
1371  else // Name exists, but not a coordinate variable, then exception or return null.
1372  {
1373  std::ostringstream msg;
1374  msg << "Aggregation found a variable matching aggregated dimension name=" << dim.name <<
1375  " but it was not a coordinate variable. "
1376  " It must be a 1D array whose dimension name is the same as its name. ";
1377  BESDEBUG("ncml", "AggregationElement::ensureVariableIsProperNewCoordinateVariable: " +
1378  msg.str() << endl);
1379  if (throwOnInvalidCV)
1380  {
1382  msg.str())
1383  }
1384  }
1385  // Return valid Array or null on failures.
1386  return pArrRet;
1387  }
1388 
1389  libdap::Array*
1390  AggregationElement::findMatchingCoordinateVariable(
1391  const DDS& dds,
1392  const agg_util::Dimension& dim,
1393  bool throwOnInvalidCV/*=true*/) const
1394  {
1395  BaseType* pBT = AggregationUtil::getVariableNoRecurse(dds, dim.name);
1396 
1397  // Name doesn't exist, just NULL. We'll have to create it from scratch
1398  if (!pBT)
1399  {
1400  return 0;
1401  }
1402 
1403  return ensureVariableIsProperNewCoordinateVariable(pBT, dim, throwOnInvalidCV);
1404  }
1405 
1418  libdap::Array*
1419  AggregationElement::processDeferredCoordinateVariable(libdap::BaseType* pBT, const agg_util::Dimension& dim)
1420  {
1421  VALID_PTR(pBT);
1422 
1423  BESDEBUG("ncml", "Processing the placeholder coordinate variable (no values) for the "
1424  "current aggregation to add placeholder metadata to the generated values..." << endl);
1425 
1426  // Generate the c.v. as if we had no placeholder since pBT will be a scalar (shape cannot
1427  // be defined on it by ncml spec defn).
1428  // @OPTIMIZE try to refactor this to avoid unnecessary copies.
1429  auto_ptr<Array> pNewArrCV = createCoordinateVariableForNewDimension(dim);
1430  NCML_ASSERT_MSG(pNewArrCV.get(),
1431  " createCoordinateVariableForNewDimension()"
1432  " returned null.");
1433 
1434  // Make sure the types of the placeholder scalar and created array match or the author goofed
1435  BaseType* pNewEltProto = pNewArrCV->var();
1436  VALID_PTR(pNewEltProto);
1437  if (pBT->type() != pNewEltProto->type())
1438  {
1440  " We expected the type of the placeholder coordinate variable to be the same "
1441  " as that created by the aggregation. Expected type="
1442  + pNewEltProto->type_name() +
1443  + " but placeholder has type="
1444  + pBT->type_name()
1445  + " Please make sure these match in the input file!");
1446  }
1447 
1448  // Let the validation know that we got values for the original value and to remove the entry
1449  // since we're about to delete the pointer to pBT!
1450  getParentDataset()->setVariableGotValues(pBT, true);
1451 
1452  // Copy the entire AttrTable tree (recursively) from the place holder into the new variable
1453  pNewArrCV->get_attr_table() = pBT->get_attr_table();
1454 
1455  // Delete the placeholder
1456  DDS* pDDS = getParentDataset()->getDDS();
1457  VALID_PTR(pDDS);
1458  pDDS->del_var(pBT->name());
1459 
1460  // Add the new one, which will copy it (argh! we need to fix this in libdap!)
1461  // OPTIMIZE use non copy add when available.
1462  BESDEBUG("ncml", "Adding CV: " << pNewArrCV->name() << endl);
1463  pDDS->add_var(pNewArrCV.get()); // use raw ptr for the copy.
1464 
1465  // Pull out the copy we just added and hand it back
1466  Array* pArrCV = static_cast<Array*>(AggregationUtil::getVariableNoRecurse(*pDDS, dim.name));
1467  VALID_PTR(pArrCV);
1468  return pArrCV;
1469  }
1470 
1471  auto_ptr<libdap::Array>
1472  AggregationElement::createCoordinateVariableForNewDimension(const agg_util::Dimension& dim) const
1473  {
1474  // Get the netcdf@coordValue or use the netcdf@location (or auto generate if empty() ).
1475  NCML_ASSERT(_datasets.size() > 0);
1476  bool hasCoordValue = !(_datasets[0]->coordValue().empty());
1477  if (hasCoordValue)
1478  {
1479  return createCoordinateVariableForNewDimensionUsingCoordValue(dim);
1480  }
1481  else
1482  {
1483  return createCoordinateVariableForNewDimensionUsingLocation(dim);
1484  }
1485  }
1486 
1487  libdap::Array*
1488  AggregationElement::createAndAddCoordinateVariableForNewDimension(DDS& dds, const agg_util::Dimension& dim)
1489  {
1490  auto_ptr<libdap::Array> pNewCV = createCoordinateVariableForNewDimension(dim);
1491 
1492  // Make sure it did it
1493  NCML_ASSERT_MSG(pNewCV.get(), "AgregationElement::createCoordinateVariableForNewDimension() failed to create a coordinate variable!");
1494 
1495  // Add it to the DDS, which will make a copy
1496  // (TODO change this when we add noncopy add_var to DDS)
1497  //
1498  // Fix. This will append the variable to the DDS; we need these CVs to be
1499  // prefixes to the Grids (so that old versions of the netCDF library will
1500  // recognize them. jhrg 10/17/11
1501  BESDEBUG("ncml2", "AggregationElement::createAndAddCoordinateVariableForNewDimension: " << pNewCV->name());
1502 #if 0
1503  dds.add_var(pNewCV.get());
1504 #else
1505  // This provides a way to remember where the last CV was inserted and adds
1506  // this one after it. That provides the behavior that all of the CVs are
1507  // added at the beginning of the DDS but in the order they appear in the NCML.
1508  // That will translate into a greater chance of success for users, I think ...
1509  //
1510  // See also similar code in AggregationUtil::addCopyOfVariableIfNameIsAvailable.
1511  // jhrg 10/17/11
1512  static int last_added = 0;
1513  DDS::Vars_iter pos = dds.var_begin();
1514  for (int i = 0; i < last_added; ++i)
1515  ++pos;
1516 
1517  dds.insert_var(pos, pNewCV.get());
1518  ++last_added;
1519 #endif
1520  // Grab the copy back out and set to our expected result.
1521  Array* pCV = static_cast<Array*>( AggregationUtil::getVariableNoRecurse(
1522  dds,
1523  dim.name) );
1524 
1525  NCML_ASSERT_MSG(pCV, "Logic Error: tried to add a new coordinate variable while processing joinNew"
1526  " but we couldn't locate it!");
1527  return pCV;
1528  }
1529 
1530  auto_ptr<libdap::Array>
1531  AggregationElement::createCoordinateVariableForNewDimensionUsingCoordValue(const agg_util::Dimension& dim) const
1532  {
1533  NCML_ASSERT(_datasets.size() > 0);
1534  NCML_ASSERT_MSG(_datasets.size() == dim.size, "Logic error: Number of datasets doesn't match dimension!");
1535  // Use first dataset to define the proper type
1536  double doubleVal = 0;
1537  if (_datasets[0]->getCoordValueAsDouble(doubleVal))
1538  {
1539  return createCoordinateVariableForNewDimensionUsingCoordValueAsDouble(dim);
1540  }
1541  else
1542  {
1543  return createCoordinateVariableForNewDimensionUsingCoordValueAsString(dim);
1544  }
1545  }
1546 
1547  auto_ptr<libdap::Array>
1548  AggregationElement::createCoordinateVariableForNewDimensionUsingCoordValueAsDouble(const agg_util::Dimension& dim) const
1549  {
1550  vector<dods_float64> coords;
1551  coords.reserve(dim.size);
1552  double doubleVal = 0;
1553  // Use the index rather than iterator so we can use it in debug output...
1554  for (unsigned int i=0; i < _datasets.size(); ++i)
1555  {
1556  const NetcdfElement* pDataset = _datasets[i];
1557  if (!pDataset->getCoordValueAsDouble(doubleVal))
1558  {
1560  "In creating joinNew coordinate variable from coordValue, expected a coordValue of type double"
1561  " but failed! coordValue=" + pDataset->coordValue() +
1562  " which was in the dataset location=" + pDataset->location() +
1563  " with title=\"" + pDataset->title() + "\"");
1564  }
1565  else // we got our value fine, so add it
1566  {
1567  coords.push_back(static_cast<dods_float64>(doubleVal));
1568  }
1569  }
1570 
1571  // If we got here, we have the array of coords.
1572  // So we need to make the proper array, fill it in, and return it.
1573  auto_ptr<Array> pNewCV = MyBaseTypeFactory::makeArrayTemplateVariable("Array<Float64>", dim.name, true);
1574  NCML_ASSERT_MSG(pNewCV.get(), "createCoordinateVariableForNewDimensionUsingCoordValueAsDouble: failed to create"
1575  " the new Array<Float64> for variable: " + dim.name);
1576  pNewCV->append_dim(dim.size, dim.name);
1577  pNewCV->set_value(coords, coords.size()); // this will set the length correctly.
1578  return pNewCV;
1579  }
1580 
1581  auto_ptr<libdap::Array>
1582  AggregationElement::createCoordinateVariableForNewDimensionUsingCoordValueAsString(const agg_util::Dimension& dim) const
1583  {
1584  // I feel suitably dirty for cut and pasting this.
1585  vector<string> coords;
1586  coords.reserve(dim.size);
1587  for (unsigned int i=0; i < _datasets.size(); ++i)
1588  {
1589  const NetcdfElement* pDataset = _datasets[i];
1590  if (pDataset->coordValue().empty())
1591  {
1592  int parseLine = line();
1593  THROW_NCML_PARSE_ERROR(parseLine,
1594  "In creating joinNew coordinate variable from coordValue, expected a coordValue of type string"
1595  " but it was empty! dataset location=" + pDataset->location() +
1596  " with title=\"" + pDataset->title() + "\"");
1597  }
1598  else // we got our value fine, so add it
1599  {
1600  coords.push_back(pDataset->coordValue());
1601  }
1602  }
1603  // If we got here, we have the array of coords.
1604  // So we need to make the proper array, fill it in, and return it.
1605  auto_ptr<Array> pNewCV = MyBaseTypeFactory::makeArrayTemplateVariable("Array<String>", dim.name, true);
1606  NCML_ASSERT_MSG(pNewCV.get(), "createCoordinateVariableForNewDimensionUsingCoordValueAsString: failed to create"
1607  " the new Array<String> for variable: " + dim.name);
1608  pNewCV->append_dim(dim.size, dim.name);
1609  pNewCV->set_value(coords, coords.size()); // this will set the length correctly.
1610  return pNewCV;
1611  }
1612 
1613  auto_ptr<libdap::Array>
1614  AggregationElement::createCoordinateVariableForNewDimensionUsingLocation(const agg_util::Dimension& dim) const
1615  {
1616  // I feel suitably dirty for cut and pasting this.
1617  vector<string> coords;
1618  coords.reserve(dim.size);
1619  for (unsigned int i=0; i < _datasets.size(); ++i)
1620  {
1621  const NetcdfElement* pDataset = _datasets[i];
1622  string location("");
1623  if (pDataset->location().empty())
1624  {
1625  std::ostringstream oss;
1626  oss << "Virtual_Dataset_" << i;
1627  location = oss.str();
1628  }
1629  else // we got our value fine, so add it
1630  {
1631  location = pDataset->location();
1632  }
1633  coords.push_back(location);
1634  }
1635  // If we got here, we have the array of coords.
1636  // So we need to make the proper array, fill it in, and return it.
1637  auto_ptr<Array> pNewCV = MyBaseTypeFactory::makeArrayTemplateVariable("Array<String>", dim.name, true);
1638  NCML_ASSERT_MSG(pNewCV.get(), "createCoordinateVariableForNewDimensionUsingCoordValueUsingLocation: failed to create"
1639  " the new Array<String> for variable: " + dim.name);
1640 
1641  pNewCV->append_dim(dim.size, dim.name);
1642  pNewCV->set_value(coords, coords.size());
1643  return pNewCV;
1644  }
1645 
1646  void
1647  AggregationElement::collectDatasetsInOrder(vector<const DDS*>& ddsList) const
1648  {
1649  ddsList.resize(0);
1650  ddsList.reserve(_datasets.size());
1651  vector<NetcdfElement*>::const_iterator endIt = _datasets.end();
1652  vector<NetcdfElement*>::const_iterator it;
1653  for (it = _datasets.begin(); it != endIt; ++it)
1654  {
1655  const NetcdfElement* elt = *it;
1656  VALID_PTR(elt);
1657  const DDS* pDDS = elt->getDDS();
1658  VALID_PTR(pDDS);
1659  ddsList.push_back(pDDS);
1660  }
1661  }
1662 
1663  void
1664  AggregationElement::collectAggMemberDatasets(AMDList& rMemberDatasets) const
1665  {
1666  rMemberDatasets.resize(0);
1667  rMemberDatasets.reserve(_datasets.size());
1668 
1669  for (vector<NetcdfElement*>::const_iterator it = _datasets.begin();
1670  it != _datasets.end();
1671  ++it)
1672  {
1673  VALID_PTR(*it);
1674  RCPtr<AggMemberDataset> pAGM( (*it)->getAggMemberDataset() );
1675  VALID_PTR(pAGM.get());
1676 
1677  // Push down the ncoords hint if it was given
1678  if ( !( (*it)->ncoords().empty()) &&
1679  !_dimName.empty() )
1680  {
1681  if (! (pAGM->isDimensionCached(_dimName)) )
1682  {
1683  unsigned int ncoords = (*it)->getNcoordsAsUnsignedInt();
1684  pAGM->setDimensionCacheFor(agg_util::Dimension(_dimName,ncoords), false);
1685  }
1686  }
1687 
1688  // don't need to ref(), the RCPtr copy ctor in the vector elt
1689  // takes care of it when we push_back()
1690  rMemberDatasets.push_back( pAGM );
1691  }
1692  }
1693 
1694  void
1695  AggregationElement::processAnyScanElements()
1696  {
1697  if (_scanners.size() > 0)
1698  {
1699  BESDEBUG("ncml", "Started to process " << _scanners.size() << " scan elements..." << endl);
1700  }
1701 
1702  vector<ScanElement*>::iterator it;
1703  vector<ScanElement*>::iterator endIt = _scanners.end();
1704  vector<NetcdfElement*> scannedDatasets;
1705  for (it = _scanners.begin(); it != endIt; ++it)
1706  {
1707  BESDEBUG("ncml", "Processing scan element = " << (*it)->toString() << " ..." << endl);
1708 
1709  // Run the scanner to get the scanned datasets.
1710  // These will be sorted, so maintain order.
1711  (*it)->getDatasetList(scannedDatasets);
1712 
1713  // Add the datasets using the parser call to
1714  // set the data up correctly,
1715  // then unref() and remove them from the temp array
1716  vector<NetcdfElement*>::iterator datasetIt;
1717  vector<NetcdfElement*>::iterator datasetEndIt = scannedDatasets.end();
1718  for (datasetIt = scannedDatasets.begin();
1719  datasetIt != datasetEndIt;
1720  ++datasetIt)
1721  {
1722  // this will ref() it and make sure we can load it.
1723  _parser->addChildDatasetToCurrentDataset(*datasetIt);
1724  // so we unref() it afterwards because we're dumping the temp array
1725  (*datasetIt)->unref();
1726  }
1727  // we're done with it and they're all unref().
1728  scannedDatasets.clear();
1729  }
1730  }
1731 
1732  void
1733  AggregationElement::mergeDimensions(
1734  bool checkDimensionMismatch/*=true*/,
1735  const std::string& dimToSkip/*=""*/)
1736  {
1737  NetcdfElement* pParent = getParentDataset();
1738  // For each dataset in the children....
1739  vector<NetcdfElement*>::const_iterator datasetsEndIt = _datasets.end();
1740  vector<NetcdfElement*>::const_iterator datasetsIt;
1741  for (datasetsIt = _datasets.begin(); datasetsIt != datasetsEndIt; ++datasetsIt)
1742  {
1743  // Check each dimension in it compared to the parent
1744  const NetcdfElement* dataset = *datasetsIt;
1745  VALID_PTR(dataset);
1746  const vector<DimensionElement*>& dimensions = dataset->getDimensionElements();
1747  vector<DimensionElement*>::const_iterator dimEndIt = dimensions.end();
1748  vector<DimensionElement*>::const_iterator dimIt;
1749  for (dimIt = dimensions.begin(); dimIt != dimEndIt; ++dimIt)
1750  {
1751  const DimensionElement* pDim = *dimIt;
1752  VALID_PTR(pDim);
1753  // Skip if asked to do so
1754  if (!dimToSkip.empty() &&
1755  (pDim->name() == dimToSkip) )
1756  {
1757  continue;
1758  }
1759  // Otherwise continue to look it up
1760  const DimensionElement* pUnionDim = pParent->getDimensionInLocalScope(pDim->name());
1761  if (pUnionDim)
1762  {
1763  // We'll check the dimensions match no matter what, but only warn unless we're told to check
1764  if (!pUnionDim->checkDimensionsMatch(*pDim))
1765  {
1766  string msg = string("The union aggregation already had a dimension=") +
1767  pUnionDim->toString() +
1768  " but we found another with different cardinality: " +
1769  pDim->toString() +
1770  " This is likely an error and could cause a later exception.";
1771  BESDEBUG("ncml", "WARNING: " + msg);
1772  if (checkDimensionMismatch)
1773  {
1775  msg + " Scope=" + _parser->getScopeString());
1776  }
1777  }
1778  }
1779  else // if not in the union already, we want to add it!
1780  {
1781  // this will up the ref count for it so when child dataset dies, we're good.
1782  BESDEBUG("ncml", "Dimension name=" << pDim->name() <<
1783  " was not found in the union yet, so adding it. The full elt is: " <<
1784  pDim->toString() << endl);
1785  pParent->addDimension( const_cast<DimensionElement*>(pDim) );
1786  }
1787  }
1788  }
1789  }
1790 
1791  static const string COORDINATE_AXIS_TYPE_ATTR("_CoordinateAxisType");
1792  void
1793  AggregationElement::addCoordinateAxisType(libdap::Array& rCV, const std::string& cat)
1794  {
1795  AttrTable& rAT = rCV.get_attr_table();
1796  AttrTable::Attr_iter foundIt = rAT.simple_find(COORDINATE_AXIS_TYPE_ATTR);
1797  // preexists, then delete it and we'll replace with the new
1798  if (foundIt != rAT.attr_end())
1799  {
1800  rAT.del_attr(COORDINATE_AXIS_TYPE_ATTR);
1801  }
1802 
1803  BESDEBUG("ncml3", "Adding attribute to the aggregation variable " << rCV.name() <<
1804  " Attr is " << COORDINATE_AXIS_TYPE_ATTR <<
1805  " = " << cat <<
1806  endl);
1807 
1808  // Either way, now we can add it.
1809  rAT.append_attr(COORDINATE_AXIS_TYPE_ATTR, "String", cat);
1810  }
1811 
1812  vector<string>
1813  AggregationElement::getValidAttributes()
1814  {
1815  vector<string> attrs;
1816  attrs.push_back("type");
1817  attrs.push_back("dimName");
1818  attrs.push_back("recheckEvery");
1819  return attrs;
1820  }
1821 
1822 }; // namespace ncml_module
virtual bool validateAttributes(const XMLAttributeMap &attrs, const vector< string > &validAttrs, vector< string > *pInvalidAttrs=0, bool printInvalid=true, bool throwOnError=true)
Check that the given attributes are all in the valid set, otherwise fill in *pInvalidAttrs with the p...
Definition: NCMLElement.cc:191
static std::auto_ptr< libdap::Array > makeArrayTemplateVariable(const string &type, const string &name, bool addTemplateVar)
Make an Array where T is the DAP simple type for the values in the Array.
Abstract helper superclass for allowing lazy access to the DataDDS for an aggregation.
virtual const string & getTypeName() const
Return the type of the element, which should be: the same as ConcreteClassName::getTypeName() ...
unsigned int size
Definition: Dimension.h:64
virtual string toString() const
Return a string describing the element.
bool isAggregationVariable(const string &name) const
std::string name
Definition: Dimension.h:61
virtual void handleContent(const string &content)
Handle the characters content for the element.
void setParentAggregation(AggregationElement *parent)
Set my parent AggregationElement to parent.
void addDimension(DimensionElement *dim)
Add the given element to this scope.
#define NCML_ASSERT(cond)
Definition: NCMLDebug.h:80
void setChildAggregation(AggregationElement *agg, bool throwIfExists=true)
Set our aggregation to the given agg.
virtual int unref() const
Decrease the reference count by one.
Definition: RCObject.cc:82
An abstract superclass for NCMLArray that handles the non-parameterized functionality and allows u...
void setVariableAggElement()
Should only be used by the VariableAggElement class to let us know it added the vars.
virtual bool isDimensionCached(const std::string &dimName) const =0
Return whether the dimension is already cached, or would have to be loaded to be found.
void addScanElement(ScanElement *pScanner)
Add a child ScanElement to the Aggregation to be used to to add to the list of child datasets...
const string getValueForLocalNameOrDefault(const string &localname, const string &defVal="") const
If there is an attribute with localname, return its value, else return default.
Definition: XMLHelpers.cc:209
static bool isAllWhitespace(const std::string &str)
Is all the string whitespace as defined by chars in WHITESPACE ?
Definition: NCMLUtil.cc:110
#define NCML_ASSERT_MSG(cond, msg)
Definition: NCMLDebug.h:83
Concrete class for NcML element.
Definition: NetcdfElement.h:63
Implementation of the element used to scan directories to create the set of files for an aggre...
Definition: ScanElement.h:50
virtual const libdap::DDS * getDDS() const
Return the DDS for this dataset, loading it in if needed.
const DimensionElement * getDimensionInLocalScope(const string &name) const
virtual NetcdfElement * clone() const
Make and return a copy of this.
virtual string toString() const
Return a string describing the element.
void setAggregationVariableCoordinateAxisType(const std::string &cat)
If a child scan contains a dateFormatMark, then we want to add a "_CoordinateAxisType" of "Time" By s...
virtual unsigned int getCachedDimensionSize(const std::string &dimName) const =0
Get the size of the given dimension named dimName cached within the dataset.
AggregationElement * getChildAggregation() const
Return the raw pointer (or NULL) to our contained aggregation.
A static class for encapsulating the aggregation functionality on libdap.
Concrete impl that simply finds the Array by looking for a variable of the given name at the top leve...
int getParseLineNumber() const
Get the line of the NCML file the parser is currently parsing.
Definition: NCMLParser.cc:222
static std::string printAttributeIfNotEmpty(const std::string &attrName, const std::string &attrValue)
Helper for subclasses implementing toString().
Definition: NCMLElement.cc:240
virtual void handleEnd()
Handle the closing of this element.
class GridAggregateOnOuterDimension : public GridAggregationBase
NetcdfElement * getParentDataset() const
#define THROW_NCML_PARSE_ERROR(parseLine, msg)
Definition: NCMLDebug.h:69
virtual int ref() const
Increase the reference count by one.
Definition: RCObject.cc:74
Struct for holding information about a dimension of data, minimally a name and a cardinality (size)...
Definition: Dimension.h:50
A reference to an RCObject which automatically ref() and deref() on creation and destruction.
Definition: RCObject.h:279
virtual ScanElement * clone() const
Make and return a copy of this.
Definition: ScanElement.cc:155
void setVariableGotValues(libdap::BaseType *pVarToValidate, bool removeEntry)
Lookup the VariableElement* associated with pVarToValidate via a previous addVariableToValidateOnClos...
vector< string >::const_iterator AggVarIter
Base class for NcML element concrete classes.
Definition: NCMLElement.h:64
int line() const
Return the current parse line number.
Definition: NCMLElement.cc:174
bool gotVariableAggElement() const
whether this aggregation contained a variableAgg element to select aggregation variables.
void processParentDatasetComplete()
Called when the enclosing dataset is closing for the aggregation to handle any post processing that i...
#define THROW_NCML_INTERNAL_ERROR(msg)
Definition: NCMLDebug.h:61
void addChildDataset(NetcdfElement *pDataset)
Add a new dataset to the aggregation for the parse.
static const vector< string > _sValidAttrs
std::vector< RCPtr< AggMemberDataset > > AMDList
virtual void setAttributes(const XMLAttributeMap &attrs)
Set the attributes of this from the map.
void setParent(AggregationElement *pParent)
Set the parent of this element.
Definition: ScanElement.cc:143
auto_ptr< ArrayJoinExistingAggregation > makeAggregatedOuterMapVector() const
Create a new map aggregated map vector for this aggregation, using its data templates, granule list, and outer dimension.
#define VALID_PTR(ptr)
Definition: NCMLDebug.h:88
T * get() const
Definition: RCObject.h:334
virtual void handleBegin()
Handle a begin on this element.
void addAggregationVariable(const string &name)
Set the variable with name as an aggregation variable for this aggregation.
#define BESDEBUG(x, y)
macro used to send debug information to the debug stream
Definition: BESDebug.h:64
NetcdfElement * setParentDataset(NetcdfElement *parent)
Set the parent and return the old one, which could be null.
virtual void setDimensionCacheFor(const Dimension &dim, bool throwIfFound)=0
Seed the dimension cache using the given dimension, so that later calls to getDimensionSize for dim...
const std::string & getAggregationVariableCoordinateAxisType() const
Return the value set by setAggregationVariableCoordinateAxisType() or "" if none was set...
virtual AggregationElement * clone() const
Make and return a copy of this.