bes  Updated for version 3.20.5
SaxParserWrapper.cc
1 // This file is part of the "NcML Module" project, a BES module designed
3 // to allow NcML files to be used to be used as a wrapper to add
4 // AIS to existing datasets of any format.
5 //
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: Michael Johnson <m.johnson@opendap.org>
8 //
9 // For more information, please also see the main website: http://opendap.org/
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26 //
27 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29 
30 #include "SaxParserWrapper.h"
31 
32 #include <exception>
33 #include <iostream>
34 #include <libxml/parser.h>
35 #include <libxml/xmlstring.h>
36 #include <stdio.h> // for vsnprintf
37 #include <string>
38 
39 #include "BESDebug.h"
40 #include "BESError.h"
41 #include "BESInternalError.h"
42 #include "BESInternalFatalError.h"
43 #include "BESSyntaxUserError.h"
44 #include "BESForbiddenError.h"
45 #include "BESNotFoundError.h"
46 #include "NCMLDebug.h"
47 #include "SaxParser.h"
48 #include "XMLHelpers.h"
49 
50 // Toggle to tell the parser to use the Sax2 start/end element
51 // calls with namespace information.
52 // [ TODO We probably want to remove the non-namespace pathways at some point,
53 // but I will leave them here for now in case there's issues ]
54 #define NCML_PARSER_USE_SAX2_NAMESPACES 1
55 
56 using namespace std;
57 using namespace ncml_module;
58 
60 // Helpers
61 
62 #if NCML_PARSER_USE_SAX2_NAMESPACES
63 static const int SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE = 5;
64 static int toXMLAttributeMapWithNamespaces(XMLAttributeMap& attrMap, const xmlChar** attributes, int num_attributes)
65 {
66  attrMap.clear();
67  for (int i = 0; i < num_attributes; ++i) {
68  XMLAttribute attr;
69  attr.fromSAX2NamespaceAttributes(attributes);
70  attributes += SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE; // jump to start of next record
71  attrMap.addAttribute(attr);
72  }
73  return num_attributes;
74 }
75 #else
76 // Assumes the non-namespace calls, so attrs is stride 2 {name,value}
77 static int toXMLAttributeMapNoNamespaces(XMLAttributeMap& attrMap, const xmlChar** attrs)
78 {
79  attrMap.clear();
80  int count=0;
81  while (attrs && *attrs != NULL)
82  {
83  XMLAttribute attr;
84  attr.localname = XMLUtil::xmlCharToString(*attrs);
85  attr.value = XMLUtil::xmlCharToString(*(attrs+1));
86  attrMap.addAttribute(attr);
87  attrs += 2;
88  count++;
89  }
90  return count;
91 }
92 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
93 
95 // Callback we will register that just pass on to our C++ engine
96 //
97 // NOTE WELL: New C handlers need to follow the given
98 // other examples in order to avoid memory leaks
99 // in libxml during an exception!
100 
101 // To avoid cut & paste below, we use this macro to cast the void* into the wrapper and
102 // set up a proper error handling structure around the main call.
103 // The macro internally defines the symbol "parser" to the SaxParser contained in the wrapper.
104 // So for example, a safe handler call to SaxParser would look like:
105 // static void ncmlStartDocument(void* userData)
106 //{
107 // BEGIN_SAFE_HANDLER_CALL(userData); // pass in the void*, which is a SaxParserWrapper*
108 // parser.onStartDocument(); // call the dispatch on the wrapped parser using the autodefined name parser
109 // END_SAFE_HANDLER_CALL; // end the error handling wrapper
110 //}
111 
112 #define BEGIN_SAFE_PARSER_BLOCK(argName) { \
113  SaxParserWrapper* _spw_ = static_cast<SaxParserWrapper*>(argName); \
114  if (_spw_->isExceptionState()) \
115  { \
116  return; \
117  } \
118  else \
119  { \
120  try \
121  { \
122  SaxParser& parser = _spw_->getParser(); \
123  parser.setParseLineNumber(_spw_->getCurrentParseLine());
124 
125 // This is required after the end of the actual calls to the parser.
126 #define END_SAFE_PARSER_BLOCK } \
127  catch (BESError& theErr) \
128  { \
129  BESDEBUG("ncml", "Caught BESError&, deferring..." << endl); \
130  _spw_->deferException(theErr); \
131  } \
132  catch (std::exception& ex) \
133  { \
134  BESDEBUG("ncml", "Caught std::exception&, wrapping and deferring..." << endl); \
135  BESInternalError _badness_("Wrapped std::exception.what()=" + string(ex.what()), __FILE__, __LINE__);\
136  _spw_->deferException(_badness_); \
137  } \
138  catch (...) \
139  { \
140  BESDEBUG("ncml", "Caught unknown (...) exception: deferring default error." << endl); \
141  BESInternalError _badness_("SaxParserWrapper:: Unknown Exception Type: ", __FILE__, __LINE__); \
142  _spw_->deferException(_badness_); \
143  } \
144  } \
145 }
146 
148 // Our C SAX callbacks, wrapped carefully.
149 
150 static void ncmlStartDocument(void* userData)
151 {
152  BEGIN_SAFE_PARSER_BLOCK(userData)
153 
154  parser.onStartDocument();
155 
156  END_SAFE_PARSER_BLOCK;
157 }
158 
159 static void ncmlEndDocument(void* userData)
160 {
161  BEGIN_SAFE_PARSER_BLOCK(userData)
162 ; // BESDEBUG("ncml", "SaxParserWrapper::ncmlEndDocument() - BEGIN"<< endl);
163 
164  parser.onEndDocument();
165 
166  // BESDEBUG("ncml", "SaxParserWrapper::ncmlEndDocument() - END"<< endl);
167 
168  END_SAFE_PARSER_BLOCK;
169 }
170 
171 #if !NCML_PARSER_USE_SAX2_NAMESPACES
172 
173 static void ncmlStartElement(void * userData,
174  const xmlChar * name,
175  const xmlChar ** attrs)
176 {
177  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
178  BEGIN_SAFE_PARSER_BLOCK(userData);
179 
180  string nameS = XMLUtil::xmlCharToString(name);
181  XMLAttributeMap map;
182  toXMLAttributeMapNoNamespaces(map, attrs);
183 
184  // These args will be valid for the scope of the call.
185  parser.onStartElement(nameS, map);
186 
187  END_SAFE_PARSER_BLOCK;
188 }
189 
190 static void ncmlEndElement(void * userData,
191  const xmlChar * name)
192 {
193  BEGIN_SAFE_PARSER_BLOCK(userData);
194 
195  string nameS = XMLUtil::xmlCharToString(name);
196  parser.onEndElement(nameS);
197 
198  END_SAFE_PARSER_BLOCK;
199 }
200 #endif // !NCML_PARSER_USE_SAX2_NAMESPACES
201 
202 #if NCML_PARSER_USE_SAX2_NAMESPACES
203 static
204 void ncmlSax2StartElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
205  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /* nb_defaulted */,
206  const xmlChar **attributes)
207 {
208  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
209  BEGIN_SAFE_PARSER_BLOCK(userData);
210  BESDEBUG("ncml", "SaxParserWrapper::ncmlSax2StartElementNs() - localname:" << localname << endl);
211 
212  XMLAttributeMap attrMap;
213  toXMLAttributeMapWithNamespaces(attrMap, attributes, nb_attributes);
214 
215  XMLNamespaceMap nsMap;
216  nsMap.fromSAX2Namespaces(namespaces, nb_namespaces);
217 
218  // These args will be valid for the scope of the call.
219  string localnameString = XMLUtil::xmlCharToString(localname);
220  string prefixString = XMLUtil::xmlCharToString(prefix);
221  string uriString = XMLUtil::xmlCharToString(URI);
222 
223  parser.onStartElementWithNamespace(
224  localnameString,
225  prefixString,
226  uriString,
227  attrMap,
228  nsMap);
229 
230  END_SAFE_PARSER_BLOCK;
231 }
232 
233 static
234 void ncmlSax2EndElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI)
235 {
236  BEGIN_SAFE_PARSER_BLOCK(userData);
237 
238  string localnameString = XMLUtil::xmlCharToString(localname);
239  string prefixString = XMLUtil::xmlCharToString(prefix);
240  string uriString = XMLUtil::xmlCharToString(URI);
241  parser.onEndElementWithNamespace(localnameString, prefixString, uriString);
242 
243  END_SAFE_PARSER_BLOCK;
244 }
245 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
246 
247 static void ncmlCharacters(void* userData, const xmlChar* content, int len)
248 {
249  BEGIN_SAFE_PARSER_BLOCK(userData);
250 
251  // len is since the content string might not be null terminated,
252  // so we have to build out own and pass it up special....
253  // TODO consider just using these xmlChar's upstairs to avoid copies, or make an adapter or something.
254  string characters("");
255  characters.reserve(len);
256  const xmlChar* contentEnd = content+len;
257  while(content != contentEnd)
258  {
259  characters += (const char)(*content++);
260  }
261 
262  parser.onCharacters(characters);
263 
264  END_SAFE_PARSER_BLOCK;
265 }
266 
267 static void ncmlWarning(void* userData, const char* msg, ...)
268 {
269  BEGIN_SAFE_PARSER_BLOCK(userData);
270 
271  BESDEBUG("ncml", "SaxParserWrapper::ncmlWarning() - msg:" << msg << endl);
272 
273  char buffer[1024];
274  va_list(args);
275  va_start(args, msg);
276  unsigned int len = sizeof(buffer);
277  vsnprintf(buffer, len, msg, args);
278  va_end(args);
279  parser.onParseWarning(string(buffer));
280  END_SAFE_PARSER_BLOCK;
281 }
282 
283 static void ncmlFatalError(void* userData, const char* msg, ...)
284 {
285  BEGIN_SAFE_PARSER_BLOCK(userData);
286 
287  BESDEBUG("ncml", "SaxParserWrapper::ncmlFatalError() - msg:" << msg << endl);
288 
289  char buffer[1024];
290  va_list(args);
291  va_start(args, msg);
292  unsigned int len = sizeof(buffer);
293  vsnprintf(buffer, len, msg, args);
294  va_end(args);
295  parser.onParseError(string(buffer));
296 
297  END_SAFE_PARSER_BLOCK;
298 }
299 
301 // class SaxParserWrapper impl
302 
303 SaxParserWrapper::SaxParserWrapper(SaxParser& parser) :
304  _parser(parser), _handler(), // inits to all nulls.
305  /*_context(0),*/ _state(NOT_PARSING), _errorMsg(""), _errorType(0), _errorFile(""), _errorLine(-1)
306 {
307 }
308 
309 SaxParserWrapper::~SaxParserWrapper()
310 {
311  // Really not much to do... everything cleans itself up.
312  _state = NOT_PARSING;
313 #if 0
314  // Leak fix. jhrg 6/21/19
315  cleanupParser();
316 #endif
317 
318 }
319 
320 bool SaxParserWrapper::parse(const string& ncmlFilename)
321 {
322  bool success = true;
323 
324  // It's illegal to call this until it's done.
325  if (_state == PARSING) {
326  throw BESInternalError("Parse called again while already in parse.", __FILE__, __LINE__);
327  }
328 
329  // OK, now we're parsing
330  _state = PARSING;
331 
332 
333  setupParser(ncmlFilename);
334 
335  success = xmlSAXUserParseFile(&_handler, this, ncmlFilename.c_str());
336 
337 #if 0
338  // Old way where we have no context.
339  // int errNo = xmlSAXUserParseFile(&_handler, this, ncmlFilename.c_str());
340  // success = (errNo == 0);
341 
342  // Any BESError thrown in SaxParser callbacks will be deferred by the safe handler blocks
343  // So that we safely pass this line.
344  // Even if not, _context is cleared in dtor just in case.
345  xmlParseDocument(_context);
346 
347  success = (_context->errNo == 0);
348 
349  cleanupParser();
350 #endif
351 
352  // If we deferred an exception during the libxml parse call, now's the time to rethrow it.
353  if (isExceptionState()) {
355  }
356 
357  // Otherwise, we're also done parsing.
358  _state = NOT_PARSING;
359  return success;
360 }
361 
363 {
364  _state = EXCEPTION;
365  _errorType = theErr.get_bes_error_type();
366  _errorMsg = theErr.get_message();
367  _errorLine = theErr.get_line();
368  _errorFile = theErr.get_file();
369 }
370 
371 // HACK admittedly a little gross, but it's weird to have to copy an exception
372 // and this seemed the safest way rather than making dynamic storage, etc.
374 {
375  // Clear our state out so we can parse again though.
376  _state = NOT_PARSING;
377 
378  switch (_errorType) {
379  case BES_INTERNAL_ERROR:
380  throw BESInternalError(_errorMsg, _errorFile, _errorLine);
381  break;
382 
383  case BES_INTERNAL_FATAL_ERROR:
384  throw BESInternalFatalError(_errorMsg, _errorFile, _errorLine);
385  break;
386 
387  case BES_SYNTAX_USER_ERROR:
388  throw BESSyntaxUserError(_errorMsg, _errorFile, _errorLine);
389  break;
390 
391  case BES_FORBIDDEN_ERROR:
392  throw BESForbiddenError(_errorMsg, _errorFile, _errorLine);
393  break;
394 
395  case BES_NOT_FOUND_ERROR:
396  throw BESNotFoundError(_errorMsg, _errorFile, _errorLine);
397  break;
398 
399  default:
400  throw BESInternalError("Unknown exception type.", __FILE__, __LINE__);
401  break;
402  }
403 }
404 
406 {
407 #if 0
408  if (_context) {
409  return xmlSAX2GetLineNumber(_context);
410  }
411  else {
412  return -1;
413  }
414 #endif
415  return -1; //FIXME part of leak fix. jhrg 6.21.19
416 }
417 
418 static void setAllHandlerCBToNulls(xmlSAXHandler& h)
419 {
420  h.internalSubset = 0;
421  h.isStandalone = 0;
422  h.hasInternalSubset = 0;
423  h.hasExternalSubset = 0;
424  h.resolveEntity = 0;
425  h.getEntity = 0;
426  h.entityDecl = 0;
427  h.notationDecl = 0;
428  h.attributeDecl = 0;
429  h.elementDecl = 0;
430  h.unparsedEntityDecl = 0;
431  h.setDocumentLocator = 0;
432  h.startDocument = 0;
433  h.endDocument = 0;
434  h.startElement = 0;
435  h.endElement = 0;
436  h.reference = 0;
437  h.characters = 0;
438  h.ignorableWhitespace = 0;
439  h.processingInstruction = 0;
440  h.comment = 0;
441  h.warning = 0;
442  h.error = 0;
443  h.fatalError = 0;
444  h.getParameterEntity = 0;
445  h.cdataBlock = 0;
446  h.externalSubset = 0;
447 
448  // unsigned int initialized; magic number the init should fill in
449  /* The following fields are extensions available only on version 2 */
450  // void *_private; //i'd assume i don't set this either...
451  h.startElementNs = 0;
452  h.endElementNs = 0;
453  h.serror = 0;
454 }
455 
456 void SaxParserWrapper::setupParser(const string& filename)
457 {
458  // setup the handler for version 2,
459  // which sets an internal version magic number
460  // into _handler.initialized
461  // but which doesn't clear the handlers to 0.
462  xmlSAXVersion(&_handler, 2);
463 
464  // Initialize all handlers to 0 by hand to start
465  // so we don't blow those internal magic numbers.
466  setAllHandlerCBToNulls(_handler);
467 
468  // Put our static functions into the handler
469  _handler.startDocument = ncmlStartDocument;
470  _handler.endDocument = ncmlEndDocument;
471  _handler.warning = ncmlWarning;
472  _handler.error = ncmlFatalError;
473  _handler.fatalError = ncmlFatalError;
474  _handler.characters = ncmlCharacters;
475 
476  // We'll use one or the other until we're sure it works.
477 #if NCML_PARSER_USE_SAX2_NAMESPACES
478  _handler.startElement = 0;
479  _handler.endElement = 0;
480  _handler.startElementNs = ncmlSax2StartElementNs;
481  _handler.endElementNs = ncmlSax2EndElementNs;
482 #else
483  _handler.startElement = ncmlStartElement;
484  _handler.endElement = ncmlEndElement;
485  _handler.startElementNs = 0;
486  _handler.endElementNs = 0;
487 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
488 
489  // Create the non-validating parser context for the file
490  // using this as the userData for making exception-safe
491  // C++ calls.
492 
493 #if 0
494  // Leak fix. jhrg 6/21/19
495  _context = xmlCreateFileParserCtxt(filename.c_str());
496  if (!_context) {
497  THROW_NCML_PARSE_ERROR(-1, "Cannot parse: Unable to create a libxml parse context for " + filename);
498  }
499  _context->sax = &_handler;
500  _context->userData = this;
501  _context->validate = false;
502 #endif
503 }
504 
505 #if 0
506 // Leak fix. jhrg 6/21/19
507 void SaxParserWrapper::cleanupParser() throw ()
508 {
509 #if 0
510  // Leak fix. jhrg 6/21/19
511  if (_context) {
512  // Remove our handler from it.
513  _context->sax = NULL;
514 
515  // Free it up.
516  xmlFreeParserCtxt(_context);
517  _context = 0;
518  }
519 #endif
520 }
521 #endif
522 
523 
BESError::get_bes_error_type
virtual int get_bes_error_type()
Return the return code for this error class.
Definition: BESError.h:143
BESError::get_line
virtual int get_line()
get the line number where the exception was thrown
Definition: BESError.h:115
ncml_module::SaxParserWrapper::rethrowException
void rethrowException()
Definition: SaxParserWrapper.cc:373
BESInternalFatalError
exception thrown if an internal error is found and is fatal to the BES
Definition: BESInternalFatalError.h:43
ncml_module::XMLAttribute::fromSAX2NamespaceAttributes
void fromSAX2NamespaceAttributes(const xmlChar **chunkOfFivePointers)
Definition: XMLHelpers.cc:92
ncml_module::XMLNamespaceMap::fromSAX2Namespaces
void fromSAX2Namespaces(const xmlChar **pNamespaces, int numNamespaces)
Definition: XMLHelpers.cc:318
BESNotFoundError
error thrown if the resource requested cannot be found
Definition: BESNotFoundError.h:40
BESError::get_message
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
BESError::get_file
virtual std::string get_file()
get the file name where the exception was thrown
Definition: BESError.h:107
ncml_module::XMLAttributeMap
Definition: XMLHelpers.h:96
ncml_module::SaxParserWrapper::isExceptionState
bool isExceptionState() const
Definition: SaxParserWrapper.h:152
BESSyntaxUserError
error thrown if there is a user syntax error in the request or any other user error
Definition: BESSyntaxUserError.h:41
ncml_module::XMLAttributeMap::addAttribute
void addAttribute(const XMLAttribute &attribute)
Definition: XMLHelpers.cc:165
BESForbiddenError
error thrown if the BES is not allowed to access the resource requested
Definition: BESForbiddenError.h:40
BESInternalError
exception thrown if inernal error encountered
Definition: BESInternalError.h:43
ncml_module::SaxParserWrapper::getCurrentParseLine
int getCurrentParseLine() const
Definition: SaxParserWrapper.cc:405
ncml_module
NcML Parser for adding/modifying/removing metadata (attributes) to existing local datasets using NcML...
Definition: AggregationElement.cc:72
ncml_module::XMLAttribute
Definition: XMLHelpers.h:61
ncml_module::XMLNamespaceMap
Definition: XMLHelpers.h:153
ncml_module::SaxParser
Interface class for the wrapper between libxml C SAX parser and our NCMLParser.
Definition: SaxParser.h:48
BESError
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
ncml_module::XMLAttributeMap::clear
void clear()
Definition: XMLHelpers.cc:159
ncml_module::SaxParserWrapper::parse
bool parse(const string &ncmlFilename)
Do a SAX parse of the ncmlFilename and pass the calls to wrapper parser.
Definition: SaxParserWrapper.cc:320
ncml_module::SaxParserWrapper::deferException
void deferException(BESError &theErr)
The remaining calls are for the internals of the parser, but need to be public.
Definition: SaxParserWrapper.cc:362