g4tools  5.4.0
loader
Go to the documentation of this file.
1 // Copyright (C) 2010, Guy Barrand. All rights reserved.
2 // See the file tools.license for terms.
3 
4 #ifndef tools_xml_loader
5 #define tools_xml_loader
6 
7 #include <tools/xml/tree>
8 #include <tools/file>
9 #include <tools/mnmx>
10 
11 #include <cstdio>
12 #include <cctype> //iscntrl
13 
14 #ifndef TOOLS_USE_OUREX_EXPAT
15 #include <expat.h>
16 #else
17 #include <ourex_expat.h>
18 #endif
19 
20 #ifdef TOOLS_MEM
21 #include <tools/mem>
22 namespace tools {
23 extern "C" {
24 inline void* xml_malloc(size_t a_size){
25  tools::mem::increment(tools::s_malloc().c_str());
26  return ::malloc(a_size);
27 }
28 inline void* xml_realloc(void* a_ptr,size_t a_size){
29  if(a_ptr==NULL) tools::mem::increment(tools::s_malloc().c_str());
30  return ::realloc(a_ptr,a_size);
31 }
32 inline void xml_free(void* a_ptr){
33  if(a_ptr!=NULL) tools::mem::decrement(tools::s_malloc().c_str());
34  ::free(a_ptr);
35 }
36 }}
37 #endif
38 
39 namespace tools {
40 namespace xml {
41 
42 class loader {
43 #ifdef TOOLS_MEM
45 #endif
46 public:
48  std::ostream& a_out,bool a_verbose = false)
49  :m_factory(a_factory)
50  ,m_out(a_out)
51  ,m_verbose(a_verbose)
52  ,m_take_cntrl(false)
53 
54  ,m_errors(0)
55  ,m_top(0) // Used to cleanup in case XML parsing failed.
56  ,m_current(0)
58  ,m_depth(0)
59  ,m_abort(false)
60  {
61 #ifdef TOOLS_MEM
62  tools::mem::increment(s_class().c_str());
63 #endif
64  }
65 
66  virtual ~loader(){
67  delete m_compressed_reader;
68  clear();
69 #ifdef TOOLS_MEM
70  tools::mem::decrement(s_class().c_str());
71 #endif
72  }
73 
74 protected:
75  loader(const loader& a_from)
76  :m_factory(a_from.m_factory)
77  ,m_out(a_from.m_out)
78  ,m_verbose(a_from.m_verbose)
79  ,m_take_cntrl(a_from.m_take_cntrl)
80 
81  ,m_errors(0)
82  ,m_top(0) // Used to cleanup in case XML parsing failed.
83  ,m_current(0)
85  ,m_depth(0)
86  ,m_abort(false)
87  {
88 #ifdef TOOLS_MEM
89  tools::mem::increment(s_class().c_str());
90 #endif
91  }
92  loader& operator=(const loader& a_from){
93  if(&a_from==this) return *this;
94 
95  m_verbose = a_from.m_verbose;
96  m_take_cntrl = a_from.m_take_cntrl;
97 
98  m_errors = 0;
99  m_top = 0;
100  m_current = 0;
102  m_depth = 0;
103  m_abort = false;
104 
105  return *this;
106  }
107 
108 public:
109  virtual bool visit_end_element(tools::xml::tree&,bool& a_keep) {
110  a_keep = true;
111  return true;
112  }
113 
114 public:
115  void set_take_cntrl_chars(bool a_value) {m_take_cntrl = a_value;}
116 
117  std::ostream& out() const {return m_out;}
119  delete m_compressed_reader;
120  m_compressed_reader = aReader; //take ownership.
121  }
122 
123  unsigned int errors() const {return m_errors;}
124 
125  void set_tags(const std::vector<std::string>& a_tags){m_tags=a_tags;}
126  void add_tag(const std::string& a_tag){m_tags.push_back(a_tag);}
127 
128  bool load_file(const std::string& a_file,bool a_compressed) {
129  clear();
130  if(!parse_file(a_file,
131  (XML_StartElementHandler)start_element,
132  (XML_EndElementHandler)end_element,
133  this,a_compressed)) {
134  clear();
135  return false;
136  }
137  if(m_current) m_current->set_file(a_file);
138  return true;
139  }
140 
141  bool load_string(const std::string& a_string){
142  clear();
143  if(!parse_buffer(a_string.size(),a_string.c_str(),
144  (XML_StartElementHandler)start_element,
145  (XML_EndElementHandler)end_element,
146  this)) {
147  clear();
148  return false;
149  }
150  return true;
151  }
152 
153  bool load_buffer(size_t aSize,const char* aBuffer){
154  clear();
155  if(!parse_buffer(aSize,aBuffer,
156  (XML_StartElementHandler)start_element,
157  (XML_EndElementHandler)end_element,
158  this)) {
159  clear();
160  return false;
161  }
162  return true;
163  }
164 
165  const tools::xml::tree* top_item() const {return m_current;}
166 
168 
169  void empty(){m_top = 0;m_current = 0;}
170 
171  bool is_tag(const std::string& a_string) const {
172  size_t number = m_tags.size();
173  for(size_t index=0;index<number;index++) {
174  if(a_string==m_tags[index]) return true;
175  }
176  return false;
177  }
178 
179 protected:
180  void clear(){
181  // In case of problem, deleting m_current is not sufficient.
182  delete m_top;
183  m_top = 0;
184  m_current = 0;
185  }
186 
187  bool parse_buffer(size_t aSize,const char* aBuffer,
188  XML_StartElementHandler a_start,XML_EndElementHandler a_end,
189  void* a_tag){
190  m_errors = 0;
191  if(!aSize) return true; //nothing to do.
192  m_depth = 0;
193  m_abort = false;
194 
195 #ifdef TOOLS_MEM
196  XML_Memory_Handling_Suite mem;
197  mem.malloc_fcn = xml_malloc;
198  mem.realloc_fcn = xml_realloc;
199  mem.free_fcn = xml_free;
200  XML_Parser _parser = XML_ParserCreate_MM(NULL,&mem,NULL);
201 #else
202  XML_Parser _parser = XML_ParserCreate(NULL);
203 #endif
204 
205  XML_SetUserData(_parser,a_tag);
206  XML_SetElementHandler(_parser,a_start,a_end);
207  XML_SetCharacterDataHandler(_parser,(XML_CharacterDataHandler)character_data_handler);
208  //XML_SetProcessingInstructionHandler(_parser,processingInstructionHandler);
209  char* buf = (char*)aBuffer;
210  size_t l = aSize;
211  int done = 0;
212  do {
213  size_t len = tools::mn<size_t>(l,BUFSIZ); //BUFSIZ in cstdio
214  done = len < BUFSIZ ? 1 : 0;
215  if(XML_Parse(_parser, buf, (int)len, done)==XML_STATUS_ERROR) {
216  m_out << "parse_buffer :"
217  << " " << XML_ErrorString(XML_GetErrorCode(_parser))
218  << " at line " << (int)XML_GetCurrentLineNumber(_parser)
219  << " at byte index " << (int)XML_GetCurrentByteIndex(_parser)
220  << std::endl;
221  {XML_Index pos = XML_GetCurrentByteIndex(_parser);
222  XML_Index pmn = tools::mx<XML_Index>(pos-10,0);
223  XML_Index pmx = tools::mn<XML_Index>(pos+10,XML_Index(aSize)-1);
224  std::string c = " ";
225  {for(XML_Index p=pmn;p<=pmx;p++) {c[0] = *(aBuffer+p);m_out << c;}
226  m_out << std::endl;}
227  {for(XML_Index p=pmn;p<pos;p++) m_out << " ";
228  m_out << "^" << std::endl;}}
229  XML_ParserFree(_parser);
230  return false;
231  }
232  if(m_abort) {
233  XML_ParserFree(_parser);
234  return false;
235  }
236  buf += len;
237  l -= len;
238  } while (!done);
239  XML_ParserFree(_parser);
240  return true;
241  }
242 
243  bool parse_file(const std::string& a_file,
244  XML_StartElementHandler a_start,XML_EndElementHandler a_end,
245  void* a_tag,bool a_compressed){
246  if(m_verbose) {
247  m_out << "parse_file :"
248  << " parse file " << tools::sout(a_file) << "..." << std::endl;
249  }
250  m_errors = 0;
251 
252  bool use_zlib = false;
253  if(a_compressed) {
254  if(m_verbose) {
255  m_out << "parse_file :"
256  << " uncompress requested for file "
257  << tools::sout(a_file) << "."
258  << std::endl;
259  }
260  use_zlib = true;
261  } else {
262  // may be compressed anyway :
263  bool compressed;
264  if(!tools::file::is_gzip(a_file,compressed)) {
265  m_out << "parse_file :"
266  << " tools::file::is_gzip() failed for " << a_file << "."
267  << std::endl;
268  return false;
269  }
270  if(compressed) use_zlib = true;
271  }
272 
273  tools::file::reader* freader = 0;
274  bool delete_freader = false;
275  if(use_zlib) {
276  if(!m_compressed_reader) {
277  m_out << "parse_file :"
278  << " no compressed reader given."
279  << std::endl;
280  return false;
281  }
282  freader = m_compressed_reader;
283  } else {
284  freader = new tools::FILE_reader();
285  delete_freader = true;
286  }
287  if(!freader->open(a_file)) {
288  m_out << "parse_file :"
289  << " can't open file " << a_file << std::endl;
290  if(delete_freader) delete freader;
291  return false;
292  }
293 
294  m_depth = 0;
295  m_abort = false;
296 
297 #ifdef TOOLS_MEM
298  XML_Memory_Handling_Suite mem;
299  mem.malloc_fcn = xml_malloc;
300  mem.realloc_fcn = xml_realloc;
301  mem.free_fcn = xml_free;
302  XML_Parser _parser = XML_ParserCreate_MM(NULL,&mem,NULL);
303 #else
304  XML_Parser _parser = XML_ParserCreate(NULL);
305 #endif
306 
307  XML_SetUserData(_parser,a_tag);
308  XML_SetElementHandler(_parser,a_start,a_end);
309  XML_SetCharacterDataHandler(_parser,(XML_CharacterDataHandler)character_data_handler);
310  //XML_SetProcessingInstructionHandler(_parser,
311  // processingInstructionHandler);
312 
313 
314  //char buf[1024 * BUFSIZ];
315  char buf[BUFSIZ];
316  int done = 0;
317  do {
318  size_t len;
319  if(!freader->read(buf,sizeof(buf),len)) {
320  XML_ParserFree(_parser);
321  freader->close();
322  if(delete_freader) delete freader;
323  return false;
324  }
325  done = len < sizeof(buf) ? 1 : 0;
326  if(XML_Parse(_parser, buf, (int)len, done)==XML_STATUS_ERROR) {
327  m_out << "parse_file :"
328  << " in file " << tools::sout(a_file)
329  << " " << XML_ErrorString(XML_GetErrorCode(_parser))
330  << " at line " << (int)XML_GetCurrentLineNumber(_parser)
331  << std::endl;
332  XML_ParserFree(_parser);
333  freader->close();
334  if(delete_freader) delete freader;
335  return false;
336  }
337  if(m_abort) {
338  XML_ParserFree(_parser);
339  freader->close();
340  if(delete_freader) delete freader;
341  return false;
342  }
343  } while (!done);
344  XML_ParserFree(_parser);
345  freader->close();
346  if(m_verbose) {
347  m_out << "parse_file :"
348  << " parse file " << tools::sout(a_file) << " done." << std::endl;
349  }
350  if(delete_freader) delete freader;
351  return true;
352  }
353 
354 protected:
355  static void character_data_handler(void* aUserData,const XML_Char* a_string,int aLength){
356  loader* This = (loader*)aUserData;
357  std::string s;
358  s.resize(aLength);
359  size_t count = 0;
360  char* p = (char*)a_string;
361  for (int i = 0; i < aLength; i++, p++) {
362  if(This->m_take_cntrl || (!iscntrl(*p))) {
363  s[count] = *p;
364  count++;
365  }
366  }
367  if(count) {
368  s.resize(count);
369  This->m_value += s;
370  }
371  }
372 
373  static void start_element(void* aUserData,const XML_Char* a_name,const XML_Char** a_atbs){
374  loader* This = (loader*)aUserData;
375  if(This->m_abort) return; //Do nothing.
376 
377  This->m_depth++;
378  This->m_value = "";
379 
380  std::string name = a_name; //Can't be empty
381  if(This->is_tag(name)) {
382 
383  if(!This->m_current) {
384  if(This->m_depth==1) {
385  // Ok. Head.
386  } else {
387  This->m_out << "start_element :"
388  << " no tag with a depth of " << This->m_depth
389  << std::endl;
390  This->m_abort = true;
391  return;
392  }
393  } else {
394  int delta = This->m_current->depth() - This->m_depth;
395  if(delta>=1) {
396  This->m_out << "start_element :"
397  << " for element " << tools::sout(name)
398  << " tag with a delta depth of " << delta
399  << std::endl;
400  This->m_abort = true;
401  return;
402  }
403  }
404 
405  std::vector<tools::xml::tree::atb> atbs;
406  {const XML_Char** a_atts = a_atbs;
407  while((*a_atts)&&(*(a_atts+1))) {
408  atbs.push_back(tools::xml::tree::atb(*a_atts,*(a_atts+1)));
409  a_atts+=2;
410  }}
411 
412  tools::xml::tree* parent = This->m_current;
413  tools::xml::tree* _tree = This->m_factory.create(name,atbs,parent);
414  if(!_tree) {
415  This->m_out << "start_element :"
416  << " can't create a tree for tag " << tools::sout(name)
417  << std::endl;
418  This->m_abort = true;
419  return;
420  }
421 
422  //out << "start_element :" << std::endl;
423  //_tree->print_xml(*(This->m_printer),"debug : ");
424 
425  if(parent) parent->add_child(_tree);
426 
427 /*
428  if(This->m_current && !This->m_current->parent()) {
429  This->m_out << "start_element :"
430  << " warning : current tree without parent."
431  << " Potential mem leak."
432  << std::endl;
433  }
434 */
435 
436  This->m_current = _tree;
437  _tree->set_depth(This->m_depth); // Internal only.
438 
439  if(!This->m_top) This->m_top = _tree;
440 
441  } else {
442 
443  if(!This->m_current) {
444 
445  // Can't be in a non-tag without a tag !
446  This->m_out << "start_element :"
447  << " for element " << tools::sout(name)
448  << " non-tag without some parent tag."
449  << std::endl;
450  This->m_abort = true;
451  return;
452 
453  } else {
454 
455  int delta = This->m_depth - This->m_current->depth();
456  if(delta>1) {
457 
458  This->m_out << "start_element :"
459  << " for element " << tools::sout(name)
460  << " grand child of a tag."
461  << std::endl;
462  This->m_abort = true;
463  return;
464 
465  } else if(delta==1) { //ok
466 
467  This->m_atbs.clear();
468  {const XML_Char** a_atts = a_atbs;
469  while((*a_atts)&&(*(a_atts+1))) {
470  This->m_atbs.push_back(tools::xml::tree::atb(*a_atts,*(a_atts+1)));
471  a_atts+=2;
472  }}
473 
474  } else {
475 
476  This->m_out << "start_element :"
477  << " for element " << tools::sout(name)
478  << " non-tag with a delta depth of " << delta
479  << std::endl;
480  This->m_abort = true;
481  return;
482 
483  }
484  }
485 
486  }
487  }
488 
489 
490  static void end_element(void* aUserData,const XML_Char* a_name){
491  loader* This = (loader*)aUserData;
492  if(This->m_abort) return; //Do nothing.
493 
494  if(This->m_current) {
495 
496  tools::xml::tree* tr = This->m_current;
497  int delta = This->m_depth - tr->depth();
498  if(delta==0) { //Back to a tag :
499 
500  tools::xml::tree* parent = tr->parent();
501 
502  bool keep = false;
503  bool cont = This->visit_end_element(*tr,keep);
504  if(keep) {
505  if(parent) {
506 /*
507  if(!This->m_current->parent()) {
508  This->m_out << "end_element :"
509  << " warning : current tree without parent (1)."
510  << " Potential mem leak."
511  << std::endl;
512  }
513 */
514  This->m_current = parent;
515  }
516  } else {
517  //FIXME : the top could be recreated !
518  if(This->m_top==tr) This->m_top = 0;
519 
520  if(parent) {
521  parent->remove_child(tr); //delete the tr
522  } else {
523  delete tr;
524  }
525 
526 /*
527  if(!This->m_current->parent()) {
528  This->m_out << "end_element :"
529  << " warning : current tree without parent (2)."
530  << " Potential mem leak."
531  << std::endl;
532  }
533 */
534 
535  This->m_current = parent; //parent could be 0 : ok.
536  }
537 
538  if(!cont) This->m_abort = true;
539 
540  } else if(delta==1) { //Back to a child of tag :
541 
542  //FIXME : correct m_value ? (Can we pick the one of a sub item ?)
543  tr->add_element(std::string(a_name),This->m_atbs,This->m_value);
544  //This->m_value = "";
545 
546  } else {
547 
548  This->m_out << "end_element :"
549  << " problem for element " << tools::sout(std::string(a_name))
550  << " : delta depth of " << delta
551  << std::endl;
552  This->m_abort = true;
553 
554  }
555 
556  }
557 
558 
559  This->m_depth--;
560  }
561 
562 protected:
564  std::ostream& m_out;
565 protected:
566  bool m_verbose;
568  unsigned int m_errors;
569  std::vector<std::string> m_tags;
572  //std::vector<tools::xml::tree::atb> m_atbs;
573  std::vector< std::pair<std::string,std::string> > m_atbs;
574  std::string m_value;
576  unsigned int m_depth;
577  bool m_abort;
578 };
579 
580 }}
581 
582 #endif
tools::xml::loader::operator=
loader & operator=(const loader &a_from)
Definition: loader:92
tools::xml::loader::is_tag
bool is_tag(const std::string &a_string) const
Definition: loader:171
tools::xml::loader::character_data_handler
static void character_data_handler(void *aUserData, const XML_Char *a_string, int aLength)
Definition: loader:355
mnmx
tools::xml::loader::m_errors
unsigned int m_errors
Definition: loader:568
tools::xml::tree::add_element
void add_element(const std::string &a_name, const std::vector< atb > &a_atbs, const std::string &a_value)
elements //////////////////////////////////////////
Definition: tree:176
tools::file::is_gzip
bool is_gzip(const std::string &a_file, bool &a_is)
Definition: gzip:12
tools::xml::loader::end_element
static void end_element(void *aUserData, const XML_Char *a_name)
Definition: loader:490
tools::xml::tree::parent
tree * parent() const
end osc //////////////////////////////////////
Definition: tree:828
tools::file::reader
Definition: file_reader:13
tools::xml::loader::parse_buffer
bool parse_buffer(size_t aSize, const char *aBuffer, XML_StartElementHandler a_start, XML_EndElementHandler a_end, void *a_tag)
Definition: loader:187
tools::xml::loader::m_verbose
bool m_verbose
Definition: loader:566
tools::file::reader::open
virtual bool open(const std::string &)=0
tools::xml::loader::~loader
virtual ~loader()
Definition: loader:66
tools::xml::loader::m_abort
bool m_abort
Definition: loader:577
tools::xml::loader::load_buffer
bool load_buffer(size_t aSize, const char *aBuffer)
Definition: loader:153
tools::xml::loader::m_out
std::ostream & m_out
Definition: loader:564
tools::xml::loader::clear
void clear()
Definition: loader:180
tools::xml::loader::top_item
tools::xml::tree * top_item()
Definition: loader:167
tools::xml::tree::add_child
void add_child(tree *a_tree)
Definition: tree:448
tools::xml::loader::empty
void empty()
Definition: loader:169
tools::xml::loader::m_tags
std::vector< std::string > m_tags
Definition: loader:569
tools::xml::loader::m_take_cntrl
bool m_take_cntrl
Definition: loader:567
tools::file::reader::close
virtual void close()=0
tree
TOOLS_SCLASS
#define TOOLS_SCLASS(a_name)
Definition: S_STRING:41
tools::xml::factory::create
virtual tree * create(const std::string &a_tag_name, const std::vector< atb > &a_atbs, tree *a_parent)=0
tools::xml::loader::m_factory
tools::xml::factory & m_factory
Definition: loader:563
mem
tools::xml::loader::out
std::ostream & out() const
Definition: loader:117
tools::xml::loader::m_value
std::string m_value
Definition: loader:574
tools::xml::factory
Definition: tree:38
tools::xml::loader::m_current
tools::xml::tree * m_current
Definition: loader:571
tools::xml::tree::set_file
void set_file(const std::string &a_file)
Definition: tree:915
tools::realloc
bool realloc(T *&a_pointer, size_t a_new_size, size_t a_old_size, bool a_init=false)
Definition: realloc:9
tools::sout
Definition: sout:17
tools::xml::loader::errors
unsigned int errors() const
Definition: loader:123
tools::xml::loader::m_compressed_reader
tools::file::reader * m_compressed_reader
Definition: loader:575
tools::xml::loader::load_file
bool load_file(const std::string &a_file, bool a_compressed)
Definition: loader:128
tools::xml::tree::set_depth
void set_depth(unsigned int a_depth)
Definition: tree:911
tools::xml::loader::set_tags
void set_tags(const std::vector< std::string > &a_tags)
Definition: loader:125
tools::xml::tree
Definition: tree:47
tools::xml::loader::load_string
bool load_string(const std::string &a_string)
Definition: loader:141
tools::xml::loader::loader
loader(const loader &a_from)
Definition: loader:75
tools::xml::loader::m_depth
unsigned int m_depth
Definition: loader:576
tools
inlined C code : ///////////////////////////////////
Definition: aida_ntuple:26
tools::file::reader::read
virtual bool read(char *, unsigned int, size_t &)=0
tools::xml::loader::start_element
static void start_element(void *aUserData, const XML_Char *a_name, const XML_Char **a_atbs)
Definition: loader:373
tools::xml::loader::add_tag
void add_tag(const std::string &a_tag)
Definition: loader:126
tools::xml::loader::set_compressed_reader
void set_compressed_reader(tools::file::reader *aReader)
Definition: loader:118
tools::xml::loader::set_take_cntrl_chars
void set_take_cntrl_chars(bool a_value)
Definition: loader:115
tools::xml::loader::top_item
const tools::xml::tree * top_item() const
Definition: loader:165
tools::xml::loader::visit_end_element
virtual bool visit_end_element(tools::xml::tree &, bool &a_keep)
Definition: loader:109
tools::xml::tree::remove_child
void remove_child(tree *&a_tree, bool a_delete=true)
Definition: tree:840
tools::xml::loader::m_top
tools::xml::tree * m_top
Definition: loader:570
tools::xml::loader
Definition: loader:42
tools::xml::tree::depth
unsigned int depth() const
Definition: tree:912
file
tools::xml::loader::loader
loader(tools::xml::factory &a_factory, std::ostream &a_out, bool a_verbose=false)
Definition: loader:47
tools::xml::loader::m_atbs
std::vector< std::pair< std::string, std::string > > m_atbs
Definition: loader:573
tools::xml::loader::parse_file
bool parse_file(const std::string &a_file, XML_StartElementHandler a_start, XML_EndElementHandler a_end, void *a_tag, bool a_compressed)
Definition: loader:243
tools::FILE_reader
Definition: file:594
tools::xml::tree::atb
std::pair< std::string, std::string > atb
Definition: tree:57