OPeNDAP Hyrax Back End Server (BES)  Updated for version 3.8.3
SaxParserWrapper.cc
Go to the documentation of this file.
1 // This file is part of the "NcML Module" project, a BES module designed
3 // to allow NcML files to be used to be used as a wrapper to add
4 // AIS to existing datasets of any format.
5 //
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: Michael Johnson <m.johnson@opendap.org>
8 //
9 // For more information, please also see the main website: http://opendap.org/
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26 //
27 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29 
30 #include "SaxParserWrapper.h"
31 
32 #include <exception>
33 #include <iostream>
34 #include <libxml/parser.h>
35 #include <libxml/xmlstring.h>
36 #include <stdio.h> // for vsnprintf
37 #include <string>
38 
39 #include "BESDebug.h"
40 #include "BESError.h"
41 #include "BESInternalError.h"
42 #include "BESInternalFatalError.h"
43 #include "BESSyntaxUserError.h"
44 #include "BESForbiddenError.h"
45 #include "BESNotFoundError.h"
46 #include "NCMLDebug.h"
47 #include "SaxParser.h"
48 #include "XMLHelpers.h"
49 
50 // Toggle to tell the parser to use the Sax2 start/end element
51 // calls with namespace information.
52 // [ TODO We probably want to remove the non-namespace pathways at some point,
53 // but I will leave them here for now in case there's issues ]
54 #define NCML_PARSER_USE_SAX2_NAMESPACES 1
55 
56 using namespace std;
57 using namespace ncml_module;
58 
60 // Helpers
61 
62 #if NCML_PARSER_USE_SAX2_NAMESPACES
63 static const int SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE = 5;
64 static int toXMLAttributeMapWithNamespaces(XMLAttributeMap& attrMap, const xmlChar** attributes, int num_attributes)
65 {
66  attrMap.clear();
67  for (int i=0; i<num_attributes; ++i)
68  {
69  XMLAttribute attr;
70  attr.fromSAX2NamespaceAttributes(attributes);
71  attributes += SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE; // jump to start of next record
72  attrMap.addAttribute(attr);
73  }
74  return num_attributes;
75 }
76 #else
77 // Assumes the non-namespace calls, so attrs is stride 2 {name,value}
78 static int toXMLAttributeMapNoNamespaces(XMLAttributeMap& attrMap, const xmlChar** attrs)
79 {
80  attrMap.clear();
81  int count=0;
82  while (attrs && *attrs != NULL)
83  {
84  XMLAttribute attr;
85  attr.localname = XMLUtil::xmlCharToString(*attrs);
86  attr.value = XMLUtil::xmlCharToString(*(attrs+1));
87  attrMap.addAttribute(attr);
88  attrs += 2;
89  count++;
90  }
91  return count;
92 }
93 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
94 
95 
97 // Callback we will register that just pass on to our C++ engine
98 //
99 // NOTE WELL: New C handlers need to follow the given
100 // other examples in order to avoid memory leaks
101 // in libxml during an exception!
102 
103 // To avoid cut & paste below, we use this macro to cast the void* into the wrapper and
104 // set up a proper error handling structure around the main call.
105 // The macro internally defines the symbol "parser" to the SaxParser contained in the wrapper.
106 // So for example, a safe handler call to SaxParser would look like:
107 // static void ncmlStartDocument(void* userData)
108 //{
109 // BEGIN_SAFE_HANDLER_CALL(userData); // pass in the void*, which is a SaxParserWrapper*
110 // parser.onStartDocument(); // call the dispatch on the wrapped parser using the autodefined name parser
111 // END_SAFE_HANDLER_CALL; // end the error handling wrapper
112 //}
113 
114 #define BEGIN_SAFE_PARSER_BLOCK(argName) { \
115  SaxParserWrapper* _spw_ = static_cast<SaxParserWrapper*>(argName); \
116  if (_spw_->isExceptionState()) \
117  { \
118  return; \
119  } \
120  else \
121  { \
122  try \
123  { \
124  SaxParser& parser = _spw_->getParser(); \
125  parser.setParseLineNumber(_spw_->getCurrentParseLine());
126 
127 // This is required after the end of the actual calls to the parser.
128 #define END_SAFE_PARSER_BLOCK } \
129  catch (BESError& theErr) \
130  { \
131  BESDEBUG("ncml", "Caught BESError&, deferring..." << endl); \
132  _spw_->deferException(theErr); \
133  } \
134  catch (std::exception& ex) \
135  { \
136  BESDEBUG("ncml", "Caught std::exception&, wrapping and deferring..." << endl); \
137  BESInternalError _badness_("Wrapped std::exception.what()=" + string(ex.what()), __FILE__, __LINE__);\
138  _spw_->deferException(_badness_); \
139  } \
140  catch (...) \
141  { \
142  BESDEBUG("ncml", "Caught unknown (...) exception: deferring default error." << endl); \
143  BESInternalError _badness_("SaxParserWrapper:: Unknown Exception Type: ", __FILE__, __LINE__); \
144  _spw_->deferException(_badness_); \
145  } \
146  } \
147 }
148 
150 // Our C SAX callbacks, wrapped carefully.
151 
152 static void ncmlStartDocument(void* userData)
153 {
154  BEGIN_SAFE_PARSER_BLOCK(userData);
155 
156  parser.onStartDocument();
157 
159 }
160 
161 static void ncmlEndDocument(void* userData)
162 {
163  BEGIN_SAFE_PARSER_BLOCK(userData);
164 
165  parser.onEndDocument();
166 
168 }
169 
170 #if !NCML_PARSER_USE_SAX2_NAMESPACES
171 
172 static void ncmlStartElement(void * userData,
173  const xmlChar * name,
174  const xmlChar ** attrs)
175 {
176  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
177  BEGIN_SAFE_PARSER_BLOCK(userData);
178 
179  string nameS = XMLUtil::xmlCharToString(name);
180  XMLAttributeMap map;
181  toXMLAttributeMapNoNamespaces(map, attrs);
182 
183  // These args will be valid for the scope of the call.
184  parser.onStartElement(nameS, map);
185 
187 }
188 
189 static void ncmlEndElement(void * userData,
190  const xmlChar * name)
191 {
192  BEGIN_SAFE_PARSER_BLOCK(userData);
193 
194  string nameS = XMLUtil::xmlCharToString(name);
195  parser.onEndElement(nameS);
196 
198 }
199 #endif // !NCML_PARSER_USE_SAX2_NAMESPACES
200 
201 #if NCML_PARSER_USE_SAX2_NAMESPACES
202 static
203 void
204 ncmlSax2StartElementNs(void *userData,
205  const xmlChar *localname,
206  const xmlChar *prefix,
207  const xmlChar *URI,
208  int nb_namespaces,
209  const xmlChar **namespaces,
210  int nb_attributes,
211  int /* nb_defaulted */,
212  const xmlChar **attributes)
213 {
214  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
215  BEGIN_SAFE_PARSER_BLOCK(userData);
216 
217  XMLAttributeMap attrMap;
218  toXMLAttributeMapWithNamespaces(attrMap, attributes, nb_attributes);
219 
220  XMLNamespaceMap nsMap;
221  nsMap.fromSAX2Namespaces(namespaces, nb_namespaces);
222 
223  // These args will be valid for the scope of the call.
224  string localnameString = XMLUtil::xmlCharToString(localname);
225  string prefixString = XMLUtil::xmlCharToString(prefix);
226  string uriString = XMLUtil::xmlCharToString(URI);
227 
228  parser.onStartElementWithNamespace(
229  localnameString,
230  prefixString,
231  uriString,
232  attrMap,
233  nsMap);
234 
236 }
237 
238 static
239 void
240 ncmlSax2EndElementNs(void *userData,
241  const xmlChar *localname,
242  const xmlChar *prefix,
243  const xmlChar *URI)
244 {
245  BEGIN_SAFE_PARSER_BLOCK(userData);
246 
247  string localnameString = XMLUtil::xmlCharToString(localname);
248  string prefixString = XMLUtil::xmlCharToString(prefix);
249  string uriString = XMLUtil::xmlCharToString(URI);
250  parser.onEndElementWithNamespace(localnameString, prefixString, uriString);
251 
253 }
254 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
255 
256 static void ncmlCharacters(void* userData, const xmlChar* content, int len)
257 {
258  BEGIN_SAFE_PARSER_BLOCK(userData);
259 
260  BESDEBUG("ncml", "ncmlCharacters: len:" << len << ", content: " << content << endl);
261 
262  // len is since the content string might not be null terminated,
263  // so we have to build out own and pass it up special....
264  // TODO consider just using these xmlChar's upstairs to avoid copies, or make an adapter or something.
265  string characters("");
266  characters.reserve(len);
267  const xmlChar* contentEnd = content+len;
268  while(content != contentEnd)
269  {
270  characters += (const char)(*content++);
271  }
272 
273  parser.onCharacters(characters);
274 
276 }
277 
278 static void ncmlWarning(void* userData, const char* msg, ...)
279 {
280  BEGIN_SAFE_PARSER_BLOCK(userData);
281  char buffer[1024];
282  va_list(args);
283  va_start(args, msg);
284  unsigned int len = sizeof(buffer);
285  vsnprintf(buffer, len, msg, args);
286  va_end(args);
287  parser.onParseWarning(string(buffer));
289 }
290 
291 static void ncmlFatalError(void* userData, const char* msg, ...)
292 {
293  BEGIN_SAFE_PARSER_BLOCK(userData);
294  char buffer[1024];
295  va_list(args);
296  va_start(args, msg);
297  unsigned int len = sizeof(buffer);
298  vsnprintf(buffer, len, msg, args);
299  va_end(args);
300  parser.onParseError(string(buffer));
302 }
303 
305 // class SaxParserWrapper impl
306 
307 SaxParserWrapper::SaxParserWrapper(SaxParser& parser)
308 : _parser(parser)
309 , _handler() // inits to all nulls.
310 , _context(0)
311 , _state(NOT_PARSING)
312 , _errorMsg("")
313 , _errorType(0)
314 , _errorFile("")
315 , _errorLine(-1)
316 {
317 }
318 
320 {
321  // Really not much to do... everything cleans itself up.
322  _state = NOT_PARSING;
323  cleanupParser();
324 }
325 
326 bool
327 SaxParserWrapper::parse(const string& ncmlFilename)
328 {
329  bool success = true;
330 
331  // It's illegal to call this until it's done.
332  if (_state == PARSING)
333  {
334  throw BESInternalError("Parse called again while already in parse.", __FILE__, __LINE__);
335  }
336 
337  // OK, now we're parsing
338  _state = PARSING;
339 
340  setupParser(ncmlFilename);
341 
342  // Old way where we have no context.
343  // int errNo = xmlSAXUserParseFile(&_handler, this, ncmlFilename.c_str());
344  // success = (errNo == 0);
345 
346  // Any BESError thrown in SaxParser callbacks will be deferred by the safe handler blocks
347  // So that we safely pass this line.
348  // Even if not, _context is cleared in dtor just in case.
349  xmlParseDocument(_context);
350 
351  success = (_context->errNo == 0);
352 
353  cleanupParser();
354 
355  // If we deferred an exception during the libxml parse call, now's the time to rethrow it.
356  if (isExceptionState())
357  {
359  }
360 
361  // Otherwise, we're also done parsing.
362  _state = NOT_PARSING;
363  return success;
364 }
365 
366 void
368 {
369  _state = EXCEPTION;
370  _errorType = theErr.get_error_type();
371  _errorMsg = theErr.get_message();
372  _errorLine = theErr.get_line();
373  _errorFile = theErr.get_file();
374 }
375 
376 // HACK admittedly a little gross, but it's weird to have to copy an exception
377 // and this seemed the safest way rather than making dynamic storage, etc.
378 void
380 {
381  // Clear our state out so we can parse again though.
382  _state = NOT_PARSING;
383 
384  switch (_errorType)
385  {
386  case BES_INTERNAL_ERROR:
387  throw BESInternalError(_errorMsg, _errorFile, _errorLine);
388  break;
389 
391  throw BESInternalFatalError(_errorMsg, _errorFile, _errorLine);
392  break;
393 
395  throw BESSyntaxUserError(_errorMsg, _errorFile, _errorLine);
396  break;
397 
398  case BES_FORBIDDEN_ERROR:
399  throw BESForbiddenError(_errorMsg, _errorFile, _errorLine);
400  break;
401 
402  case BES_NOT_FOUND_ERROR:
403  throw BESNotFoundError(_errorMsg, _errorFile, _errorLine);
404  break;
405 
406  default:
407  throw BESInternalError("Unknown exception type.", __FILE__, __LINE__);
408  break;
409  }
410 }
411 
412 int
414 {
415  if (_context)
416  {
417  return xmlSAX2GetLineNumber(_context);
418  }
419  else
420  {
421  return -1;
422  }
423 }
424 
425 static void setAllHandlerCBToNulls(xmlSAXHandler& h)
426 {
427  h.internalSubset = 0;
428  h.isStandalone = 0;
429  h.hasInternalSubset = 0;
430  h.hasExternalSubset = 0;
431  h.resolveEntity = 0;
432  h.getEntity = 0;
433  h.entityDecl = 0;
434  h.notationDecl = 0;
435  h.attributeDecl = 0;
436  h.elementDecl = 0;
437  h.unparsedEntityDecl = 0;
438  h.setDocumentLocator = 0;
439  h.startDocument = 0;
440  h.endDocument = 0;
441  h.startElement = 0;
442  h.endElement = 0;
443  h.reference = 0;
444  h.characters = 0;
445  h.ignorableWhitespace = 0;
446  h.processingInstruction = 0;
447  h.comment = 0;
448  h.warning = 0;
449  h.error = 0;
450  h.fatalError = 0;
451  h.getParameterEntity = 0;
452  h.cdataBlock = 0;
453  h.externalSubset = 0;
454 
455  // unsigned int initialized; magic number the init should fill in
456  /* The following fields are extensions available only on version 2 */
457  // void *_private; //i'd assume i don't set this either...
458 
459  h.startElementNs = 0;
460  h.endElementNs = 0;
461  h.serror = 0;
462 }
463 
464 void
465 SaxParserWrapper::setupParser(const string& filename)
466 {
467  // setup the handler for version 2,
468  // which sets an internal version magic number
469  // into _handler.initialized
470  // but which doesn't clear the handlers to 0.
471  xmlSAXVersion(&_handler, 2);
472 
473  // Initialize all handlers to 0 by hand to start
474  // so we don't blow those internal magic numbers.
475  setAllHandlerCBToNulls(_handler);
476 
477  // Put our static functions into the handler
478  _handler.startDocument = ncmlStartDocument;
479  _handler.endDocument = ncmlEndDocument;
480  _handler.warning = ncmlWarning;
481  _handler.error = ncmlFatalError;
482  _handler.fatalError = ncmlFatalError;
483  _handler.characters = ncmlCharacters;
484 
485  // We'll use one or the other until we're sure it works.
486 #if NCML_PARSER_USE_SAX2_NAMESPACES
487  _handler.startElement = 0;
488  _handler.endElement = 0;
489  _handler.startElementNs = ncmlSax2StartElementNs;
490  _handler.endElementNs = ncmlSax2EndElementNs;
491 #else
492  _handler.startElement = ncmlStartElement;
493  _handler.endElement = ncmlEndElement;
494  _handler.startElementNs = 0;
495  _handler.endElementNs = 0;
496 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
497 
498  // Create the non-validating parser context for the file
499  // using this as the userData for making exception-safe
500  // C++ calls.
501  _context = xmlCreateFileParserCtxt(filename.c_str());
502  if (!_context)
503  {
505  "Cannot parse: Unable to create a libxml parse context for " + filename);
506  }
507  _context->sax = &_handler;
508  _context->userData = this;
509  _context->validate = false;
510 }
511 
512 void
513 SaxParserWrapper::cleanupParser() throw ()
514 {
515  if (_context)
516  {
517  // Remove our handler from it.
518  _context->sax = NULL;
519 
520  // Free it up.
521  xmlFreeParserCtxt(_context);
522  _context = 0;
523  }
524 }
525 
#define BES_SYNTAX_USER_ERROR
Definition: BESError.h:44
void rethrowException()
If there's a deferred exception, this will throw the right subclass type from the preserved state at ...
error thrown if the resource requested cannot be found
exception thrown if an internal error is found and is fatal to the BES
exception thrown if inernal error encountered
#define BES_INTERNAL_ERROR
Definition: BESError.h:42
bool isExceptionState() const
Used by the callbacks to know whether we have a deferred exception.
void deferException(BESError &theErr)
The remaining calls are for the internals of the parser, but need to be public.
An abstract superclass for NCMLArray that handles the non-parameterized functionality and allows u...
STL namespace.
#define BES_FORBIDDEN_ERROR
Definition: BESError.h:45
#define BEGIN_SAFE_PARSER_BLOCK(argName)
bool parse(const string &ncmlFilename)
Do a SAX parse of the ncmlFilename and pass the calls to wrapper parser.
error thrown if there is a user syntax error in the request or any other user error ...
virtual int get_error_type()
Return the return code for this error class.
Definition: BESError.h:135
virtual string get_file()
get the file name where the exception was thrown
Definition: BESError.h:102
virtual string get_message()
get the error message for this exception
Definition: BESError.h:94
void fromSAX2Namespaces(const xmlChar **pNamespaces, int numNamespaces)
Read them all in from the xmlChar array.
Definition: XMLHelpers.cc:369
Abstract exception class for the BES with basic string message.
Definition: BESError.h:51
int getCurrentParseLine() const
Return the current line of the parse we're on, assuming we're not in an exception state and that we a...
#define NULL
Definition: wcsUtil.h:65
#define THROW_NCML_PARSE_ERROR(parseLine, msg)
Definition: NCMLDebug.h:69
void addAttribute(const XMLAttribute &attribute)
TODO how do we tell if this exists? Does it replace? Do we care?
Definition: XMLHelpers.cc:194
#define BES_INTERNAL_FATAL_ERROR
Definition: BESError.h:43
error thrown if the BES is not allowed to access the resource requested
void clear()
make empty
Definition: XMLHelpers.cc:187
#define BES_NOT_FOUND_ERROR
Definition: BESError.h:46
#define BESDEBUG(x, y)
macro used to send debug information to the debug stream
Definition: BESDebug.h:64
Interface class for the wrapper between libxml C SAX parser and our NCMLParser.
Definition: SaxParser.h:49
void fromSAX2NamespaceAttributes(const xmlChar **chunkOfFivePointers)
Fill in the fields from the SAX2 namespace attributes array.
Definition: XMLHelpers.cc:111
virtual int get_line()
get the line number where the exception was thrown
Definition: BESError.h:110
#define END_SAFE_PARSER_BLOCK