Subversion Repositories DevTools

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4384 dpurdie 1
package XML::Simple;
2
BEGIN {
3
  $XML::Simple::VERSION = '2.20';
4
}
5
 
6
=head1 NAME
7
 
8
XML::Simple - Easily read/write XML (esp config files)
9
 
10
=head1 SYNOPSIS
11
 
12
    use XML::Simple qw(:strict);
13
 
14
    my $ref = XMLin([<xml file or string>] [, <options>]);
15
 
16
    my $xml = XMLout($hashref [, <options>]);
17
 
18
Or the object oriented way:
19
 
20
    require XML::Simple qw(:strict);
21
 
22
    my $xs = XML::Simple->new([<options>]);
23
 
24
    my $ref = $xs->XMLin([<xml file or string>] [, <options>]);
25
 
26
    my $xml = $xs->XMLout($hashref [, <options>]);
27
 
28
(or see L<"SAX SUPPORT"> for 'the SAX way').
29
 
30
Note, in these examples, the square brackets are used to denote optional items
31
not to imply items should be supplied in arrayrefs.
32
 
33
=cut
34
 
35
# See after __END__ for more POD documentation
36
 
37
 
38
# Load essentials here, other modules loaded on demand later
39
 
40
use strict;
41
use Carp;
42
require Exporter;
43
 
44
 
45
##############################################################################
46
# Define some constants
47
#
48
 
49
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER);
50
 
51
@ISA               = qw(Exporter);
52
@EXPORT            = qw(XMLin XMLout);
53
@EXPORT_OK         = qw(xml_in xml_out);
54
$PREFERRED_PARSER  = undef;
55
 
56
my %StrictMode     = ();
57
 
58
my @KnownOptIn     = qw(keyattr keeproot forcecontent contentkey noattr
59
                        searchpath forcearray cache suppressempty parseropts
60
                        grouptags nsexpand datahandler varattr variables
61
                        normalisespace normalizespace valueattr strictmode);
62
 
63
my @KnownOptOut    = qw(keyattr keeproot contentkey noattr
64
                        rootname xmldecl outputfile noescape suppressempty
65
                        grouptags nsexpand handler noindent attrindent nosort
66
                        valueattr numericescape strictmode);
67
 
68
my @DefKeyAttr     = qw(name key id);
69
my $DefRootName    = qq(opt);
70
my $DefContentKey  = qq(content);
71
my $DefXmlDecl     = qq(<?xml version='1.0' standalone='yes'?>);
72
 
73
my $xmlns_ns       = 'http://www.w3.org/2000/xmlns/';
74
my $bad_def_ns_jcn = '{' . $xmlns_ns . '}';     # LibXML::SAX workaround
75
 
76
 
77
##############################################################################
78
# Globals for use by caching routines
79
#
80
 
81
my %MemShareCache  = ();
82
my %MemCopyCache   = ();
83
 
84
 
85
##############################################################################
86
# Wrapper for Exporter - handles ':strict'
87
#
88
 
89
sub import {
90
  # Handle the :strict tag
91
 
92
  my($calling_package) = caller();
93
  _strict_mode_for_caller(1) if grep(/^:strict$/, @_);
94
 
95
  # Pass everything else to Exporter.pm
96
 
97
  @_ = grep(!/^:strict$/, @_);
98
  goto &Exporter::import;
99
}
100
 
101
 
102
##############################################################################
103
# Constructor for optional object interface.
104
#
105
 
106
sub new {
107
  my $class = shift;
108
 
109
  if(@_ % 2) {
110
    croak "Default options must be name=>value pairs (odd number supplied)";
111
  }
112
 
113
  my %known_opt;
114
  @known_opt{@KnownOptIn, @KnownOptOut} = ();
115
 
116
  my %raw_opt = @_;
117
  $raw_opt{strictmode} = _strict_mode_for_caller()
118
    unless exists $raw_opt{strictmode};
119
  my %def_opt;
120
  while(my($key, $val) = each %raw_opt) {
121
    my $lkey = lc($key);
122
    $lkey =~ s/_//g;
123
    croak "Unrecognised option: $key" unless(exists($known_opt{$lkey}));
124
    $def_opt{$lkey} = $val;
125
  }
126
  my $self = { def_opt => \%def_opt };
127
 
128
  return(bless($self, $class));
129
}
130
 
131
 
132
##############################################################################
133
# Sub: _strict_mode_for_caller()
134
#
135
# Gets or sets the XML::Simple :strict mode flag for the calling namespace.
136
# Walks back through call stack to find the calling namespace and sets the
137
# :strict mode flag for that namespace if an argument was supplied and returns
138
# the flag value if not.
139
#
140
 
141
sub _strict_mode_for_caller {
142
  my $set_mode = @_;
143
  my $frame = 1;
144
  while(my($package) = caller($frame++)) {
145
    next if $package eq 'XML::Simple';
146
    $StrictMode{$package} = 1 if $set_mode;
147
    return $StrictMode{$package};
148
  }
149
  return(0);
150
}
151
 
152
 
153
##############################################################################
154
# Sub: _get_object()
155
#
156
# Helper routine called from XMLin() and XMLout() to create an object if none
157
# was provided.  Note, this routine does mess with the caller's @_ array.
158
#
159
 
160
sub _get_object {
161
  my $self;
162
  if($_[0]  and  UNIVERSAL::isa($_[0], 'XML::Simple')) {
163
    $self = shift;
164
  }
165
  else {
166
    $self = XML::Simple->new();
167
  }
168
 
169
  return $self;
170
}
171
 
172
 
173
##############################################################################
174
# Sub/Method: XMLin()
175
#
176
# Exported routine for slurping XML into a hashref - see pod for info.
177
#
178
# May be called as object method or as a plain function.
179
#
180
# Expects one arg for the source XML, optionally followed by a number of
181
# name => value option pairs.
182
#
183
 
184
sub XMLin {
185
  my $self = &_get_object;      # note, @_ is passed implicitly
186
 
187
  my $target = shift;
188
 
189
 
190
  # Work out whether to parse a string, a file or a filehandle
191
 
192
  if(not defined $target) {
193
    return $self->parse_file(undef, @_);
194
  }
195
 
196
  elsif($target eq '-') {
197
    local($/) = undef;
198
    $target = <STDIN>;
199
    return $self->parse_string(\$target, @_);
200
  }
201
 
202
  elsif(my $type = ref($target)) {
203
    if($type eq 'SCALAR') {
204
      return $self->parse_string($target, @_);
205
    }
206
    else {
207
      return $self->parse_fh($target, @_);
208
    }
209
  }
210
 
211
  elsif($target =~ m{<.*?>}s) {
212
    return $self->parse_string(\$target, @_);
213
  }
214
 
215
  else {
216
    return $self->parse_file($target, @_);
217
  }
218
}
219
 
220
 
221
##############################################################################
222
# Sub/Method: parse_file()
223
#
224
# Same as XMLin, but only parses from a named file.
225
#
226
 
227
sub parse_file {
228
  my $self = &_get_object;      # note, @_ is passed implicitly
229
 
230
  my $filename = shift;
231
 
232
  $self->handle_options('in', @_);
233
 
234
  $filename = $self->default_config_file if not defined $filename;
235
 
236
  $filename = $self->find_xml_file($filename, @{$self->{opt}->{searchpath}});
237
 
238
  # Check cache for previous parse
239
 
240
  if($self->{opt}->{cache}) {
241
    foreach my $scheme (@{$self->{opt}->{cache}}) {
242
      my $method = 'cache_read_' . $scheme;
243
      my $opt = $self->$method($filename);
244
      return($opt) if($opt);
245
    }
246
  }
247
 
248
  my $ref = $self->build_simple_tree($filename, undef);
249
 
250
  if($self->{opt}->{cache}) {
251
    my $method = 'cache_write_' . $self->{opt}->{cache}->[0];
252
    $self->$method($ref, $filename);
253
  }
254
 
255
  return $ref;
256
}
257
 
258
 
259
##############################################################################
260
# Sub/Method: parse_fh()
261
#
262
# Same as XMLin, but only parses from a filehandle.
263
#
264
 
265
sub parse_fh {
266
  my $self = &_get_object;      # note, @_ is passed implicitly
267
 
268
  my $fh = shift;
269
  croak "Can't use " . (defined $fh ? qq{string ("$fh")} : 'undef') .
270
        " as a filehandle" unless ref $fh;
271
 
272
  $self->handle_options('in', @_);
273
 
274
  return $self->build_simple_tree(undef, $fh);
275
}
276
 
277
 
278
##############################################################################
279
# Sub/Method: parse_string()
280
#
281
# Same as XMLin, but only parses from a string or a reference to a string.
282
#
283
 
284
sub parse_string {
285
  my $self = &_get_object;      # note, @_ is passed implicitly
286
 
287
  my $string = shift;
288
 
289
  $self->handle_options('in', @_);
290
 
291
  return $self->build_simple_tree(undef, ref $string ? $string : \$string);
292
}
293
 
294
 
295
##############################################################################
296
# Method: default_config_file()
297
#
298
# Returns the name of the XML file to parse if no filename (or XML string)
299
# was provided.
300
#
301
 
302
sub default_config_file {
303
  my $self = shift;
304
 
305
  require File::Basename;
306
 
307
  my($basename, $script_dir, $ext) = File::Basename::fileparse($0, '\.[^\.]+');
308
 
309
  # Add script directory to searchpath
310
 
311
  if($script_dir) {
312
    unshift(@{$self->{opt}->{searchpath}}, $script_dir);
313
  }
314
 
315
  return $basename . '.xml';
316
}
317
 
318
 
319
##############################################################################
320
# Method: build_simple_tree()
321
#
322
# Builds a 'tree' data structure as provided by XML::Parser and then
323
# 'simplifies' it as specified by the various options in effect.
324
#
325
 
326
sub build_simple_tree {
327
  my $self = shift;
328
 
329
  my $tree = $self->build_tree(@_);
330
 
331
  return $self->{opt}->{keeproot}
332
         ? $self->collapse({}, @$tree)
333
         : $self->collapse(@{$tree->[1]});
334
}
335
 
336
 
337
##############################################################################
338
# Method: build_tree()
339
#
340
# This routine will be called if there is no suitable pre-parsed tree in a
341
# cache.  It parses the XML and returns an XML::Parser 'Tree' style data
342
# structure (summarised in the comments for the collapse() routine below).
343
#
344
# XML::Simple requires the services of another module that knows how to parse
345
# XML.  If XML::SAX is installed, the default SAX parser will be used,
346
# otherwise XML::Parser will be used.
347
#
348
# This routine expects to be passed a filename as argument 1 or a 'string' as
349
# argument 2.  The 'string' might be a string of XML (passed by reference to
350
# save memory) or it might be a reference to an IO::Handle.  (This
351
# non-intuitive mess results in part from the way XML::Parser works but that's
352
# really no excuse).
353
#
354
 
355
sub build_tree {
356
  my $self     = shift;
357
  my $filename = shift;
358
  my $string   = shift;
359
 
360
 
361
  my $preferred_parser = $PREFERRED_PARSER;
362
  unless(defined($preferred_parser)) {
363
    $preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || '';
364
  }
365
  if($preferred_parser eq 'XML::Parser') {
366
    return($self->build_tree_xml_parser($filename, $string));
367
  }
368
 
369
  eval { require XML::SAX; };      # We didn't need it until now
370
  if($@) {                         # No XML::SAX - fall back to XML::Parser
371
    if($preferred_parser) {        # unless a SAX parser was expressly requested
372
      croak "XMLin() could not load XML::SAX";
373
    }
374
    return($self->build_tree_xml_parser($filename, $string));
375
  }
376
 
377
  $XML::SAX::ParserPackage = $preferred_parser if($preferred_parser);
378
 
379
  my $sp = XML::SAX::ParserFactory->parser(Handler => $self);
380
 
381
  $self->{nocollapse} = 1;
382
  my($tree);
383
  if($filename) {
384
    $tree = $sp->parse_uri($filename);
385
  }
386
  else {
387
    if(ref($string) && ref($string) ne 'SCALAR') {
388
      $tree = $sp->parse_file($string);
389
    }
390
    else {
391
      $tree = $sp->parse_string($$string);
392
    }
393
  }
394
 
395
  return($tree);
396
}
397
 
398
 
399
##############################################################################
400
# Method: build_tree_xml_parser()
401
#
402
# This routine will be called if XML::SAX is not installed, or if XML::Parser
403
# was specifically requested.  It takes the same arguments as build_tree() and
404
# returns the same data structure (XML::Parser 'Tree' style).
405
#
406
 
407
sub build_tree_xml_parser {
408
  my $self     = shift;
409
  my $filename = shift;
410
  my $string   = shift;
411
 
412
 
413
  eval {
414
    local($^W) = 0;      # Suppress warning from Expat.pm re File::Spec::load()
415
    require XML::Parser; # We didn't need it until now
416
  };
417
  if($@) {
418
    croak "XMLin() requires either XML::SAX or XML::Parser";
419
  }
420
 
421
  if($self->{opt}->{nsexpand}) {
422
    carp "'nsexpand' option requires XML::SAX";
423
  }
424
 
425
  my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}});
426
  my($tree);
427
  if($filename) {
428
    # $tree = $xp->parsefile($filename);  # Changed due to prob w/mod_perl
429
    open(my $xfh, '<', $filename) || croak qq($filename - $!);
430
    $tree = $xp->parse($xfh);
431
  }
432
  else {
433
    $tree = $xp->parse($$string);
434
  }
435
 
436
  return($tree);
437
}
438
 
439
 
440
##############################################################################
441
# Method: cache_write_storable()
442
#
443
# Wrapper routine for invoking Storable::nstore() to cache a parsed data
444
# structure.
445
#
446
 
447
sub cache_write_storable {
448
  my($self, $data, $filename) = @_;
449
 
450
  my $cachefile = $self->storable_filename($filename);
451
 
452
  require Storable;           # We didn't need it until now
453
 
454
  if ('VMS' eq $^O) {
455
    Storable::nstore($data, $cachefile);
456
  }
457
  else {
458
    # If the following line fails for you, your Storable.pm is old - upgrade
459
    Storable::lock_nstore($data, $cachefile);
460
  }
461
 
462
}
463
 
464
 
465
##############################################################################
466
# Method: cache_read_storable()
467
#
468
# Wrapper routine for invoking Storable::retrieve() to read a cached parsed
469
# data structure.  Only returns cached data if the cache file exists and is
470
# newer than the source XML file.
471
#
472
 
473
sub cache_read_storable {
474
  my($self, $filename) = @_;
475
 
476
  my $cachefile = $self->storable_filename($filename);
477
 
478
  return unless(-r $cachefile);
479
  return unless((stat($cachefile))[9] > (stat($filename))[9]);
480
 
481
  require Storable;           # We didn't need it until now
482
 
483
  if ('VMS' eq $^O) {
484
    return(Storable::retrieve($cachefile));
485
  }
486
  else {
487
    return(Storable::lock_retrieve($cachefile));
488
  }
489
 
490
}
491
 
492
 
493
##############################################################################
494
# Method: storable_filename()
495
#
496
# Translates the supplied source XML filename into a filename for the storable
497
# cached data.  A '.stor' suffix is added after stripping an optional '.xml'
498
# suffix.
499
#
500
 
501
sub storable_filename {
502
  my($self, $cachefile) = @_;
503
 
504
  $cachefile =~ s{(\.xml)?$}{.stor};
505
  return $cachefile;
506
}
507
 
508
 
509
##############################################################################
510
# Method: cache_write_memshare()
511
#
512
# Takes the supplied data structure reference and stores it away in a global
513
# hash structure.
514
#
515
 
516
sub cache_write_memshare {
517
  my($self, $data, $filename) = @_;
518
 
519
  $MemShareCache{$filename} = [time(), $data];
520
}
521
 
522
 
523
##############################################################################
524
# Method: cache_read_memshare()
525
#
526
# Takes a filename and looks in a global hash for a cached parsed version.
527
#
528
 
529
sub cache_read_memshare {
530
  my($self, $filename) = @_;
531
 
532
  return unless($MemShareCache{$filename});
533
  return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]);
534
 
535
  return($MemShareCache{$filename}->[1]);
536
 
537
}
538
 
539
 
540
##############################################################################
541
# Method: cache_write_memcopy()
542
#
543
# Takes the supplied data structure and stores a copy of it in a global hash
544
# structure.
545
#
546
 
547
sub cache_write_memcopy {
548
  my($self, $data, $filename) = @_;
549
 
550
  require Storable;           # We didn't need it until now
551
 
552
  $MemCopyCache{$filename} = [time(), Storable::dclone($data)];
553
}
554
 
555
 
556
##############################################################################
557
# Method: cache_read_memcopy()
558
#
559
# Takes a filename and looks in a global hash for a cached parsed version.
560
# Returns a reference to a copy of that data structure.
561
#
562
 
563
sub cache_read_memcopy {
564
  my($self, $filename) = @_;
565
 
566
  return unless($MemCopyCache{$filename});
567
  return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]);
568
 
569
  return(Storable::dclone($MemCopyCache{$filename}->[1]));
570
 
571
}
572
 
573
 
574
##############################################################################
575
# Sub/Method: XMLout()
576
#
577
# Exported routine for 'unslurping' a data structure out to XML.
578
#
579
# Expects a reference to a data structure and an optional list of option
580
# name => value pairs.
581
#
582
 
583
sub XMLout {
584
  my $self = &_get_object;      # note, @_ is passed implicitly
585
 
586
  croak "XMLout() requires at least one argument" unless(@_);
587
  my $ref = shift;
588
 
589
  $self->handle_options('out', @_);
590
 
591
 
592
  # If namespace expansion is set, XML::NamespaceSupport is required
593
 
594
  if($self->{opt}->{nsexpand}) {
595
    require XML::NamespaceSupport;
596
    $self->{nsup} = XML::NamespaceSupport->new();
597
    $self->{ns_prefix} = 'aaa';
598
  }
599
 
600
 
601
  # Wrap top level arrayref in a hash
602
 
603
  if(UNIVERSAL::isa($ref, 'ARRAY')) {
604
    $ref = { anon => $ref };
605
  }
606
 
607
 
608
  # Extract rootname from top level hash if keeproot enabled
609
 
610
  if($self->{opt}->{keeproot}) {
611
    my(@keys) = keys(%$ref);
612
    if(@keys == 1) {
613
      $ref = $ref->{$keys[0]};
614
      $self->{opt}->{rootname} = $keys[0];
615
    }
616
  }
617
 
618
  # Ensure there are no top level attributes if we're not adding root elements
619
 
620
  elsif($self->{opt}->{rootname} eq '') {
621
    if(UNIVERSAL::isa($ref, 'HASH')) {
622
      my $refsave = $ref;
623
      $ref = {};
624
      foreach (keys(%$refsave)) {
625
        if(ref($refsave->{$_})) {
626
          $ref->{$_} = $refsave->{$_};
627
        }
628
        else {
629
          $ref->{$_} = [ $refsave->{$_} ];
630
        }
631
      }
632
    }
633
  }
634
 
635
 
636
  # Encode the hashref and write to file if necessary
637
 
638
  $self->{_ancestors} = [];
639
  my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, '');
640
  delete $self->{_ancestors};
641
 
642
  if($self->{opt}->{xmldecl}) {
643
    $xml = $self->{opt}->{xmldecl} . "\n" . $xml;
644
  }
645
 
646
  if($self->{opt}->{outputfile}) {
647
    if(ref($self->{opt}->{outputfile})) {
648
      my $fh = $self->{opt}->{outputfile};
649
      if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) {
650
        eval { require IO::Handle; };
651
        croak $@ if $@;
652
      }
653
      return($fh->print($xml));
654
    }
655
    else {
656
      open(my $out, '>', "$self->{opt}->{outputfile}") ||
657
        croak "open($self->{opt}->{outputfile}): $!";
658
      binmode($out, ':utf8') if($] >= 5.008);
659
      print $out $xml or croak "print: $!";
660
      close $out or croak "close: $!";
661
    }
662
  }
663
  elsif($self->{opt}->{handler}) {
664
    require XML::SAX;
665
    my $sp = XML::SAX::ParserFactory->parser(
666
               Handler => $self->{opt}->{handler}
667
             );
668
    return($sp->parse_string($xml));
669
  }
670
  else {
671
    return($xml);
672
  }
673
}
674
 
675
 
676
##############################################################################
677
# Method: handle_options()
678
#
679
# Helper routine for both XMLin() and XMLout().  Both routines handle their
680
# first argument and assume all other args are options handled by this routine.
681
# Saves a hash of options in $self->{opt}.
682
#
683
# If default options were passed to the constructor, they will be retrieved
684
# here and merged with options supplied to the method call.
685
#
686
# First argument should be the string 'in' or the string 'out'.
687
#
688
# Remaining arguments should be name=>value pairs.  Sets up default values
689
# for options not supplied.  Unrecognised options are a fatal error.
690
#
691
 
692
sub handle_options  {
693
  my $self = shift;
694
  my $dirn = shift;
695
 
696
 
697
  # Determine valid options based on context
698
 
699
  my %known_opt;
700
  if($dirn eq 'in') {
701
    @known_opt{@KnownOptIn} = @KnownOptIn;
702
  }
703
  else {
704
    @known_opt{@KnownOptOut} = @KnownOptOut;
705
  }
706
 
707
 
708
  # Store supplied options in hashref and weed out invalid ones
709
 
710
  if(@_ % 2) {
711
    croak "Options must be name=>value pairs (odd number supplied)";
712
  }
713
  my %raw_opt  = @_;
714
  my $opt      = {};
715
  $self->{opt} = $opt;
716
 
717
  while(my($key, $val) = each %raw_opt) {
718
    my $lkey = lc($key);
719
    $lkey =~ s/_//g;
720
    croak "Unrecognised option: $key" unless($known_opt{$lkey});
721
    $opt->{$lkey} = $val;
722
  }
723
 
724
 
725
  # Merge in options passed to constructor
726
 
727
  foreach (keys(%known_opt)) {
728
    unless(exists($opt->{$_})) {
729
      if(exists($self->{def_opt}->{$_})) {
730
        $opt->{$_} = $self->{def_opt}->{$_};
731
      }
732
    }
733
  }
734
 
735
 
736
  # Set sensible defaults if not supplied
737
 
738
  if(exists($opt->{rootname})) {
739
    unless(defined($opt->{rootname})) {
740
      $opt->{rootname} = '';
741
    }
742
  }
743
  else {
744
    $opt->{rootname} = $DefRootName;
745
  }
746
 
747
  if($opt->{xmldecl}  and  $opt->{xmldecl} eq '1') {
748
    $opt->{xmldecl} = $DefXmlDecl;
749
  }
750
 
751
  if(exists($opt->{contentkey})) {
752
    if($opt->{contentkey} =~ m{^-(.*)$}) {
753
      $opt->{contentkey} = $1;
754
      $opt->{collapseagain} = 1;
755
    }
756
  }
757
  else {
758
    $opt->{contentkey} = $DefContentKey;
759
  }
760
 
761
  unless(exists($opt->{normalisespace})) {
762
    $opt->{normalisespace} = $opt->{normalizespace};
763
  }
764
  $opt->{normalisespace} = 0 unless(defined($opt->{normalisespace}));
765
 
766
  # Cleanups for values assumed to be arrays later
767
 
768
  if($opt->{searchpath}) {
769
    unless(ref($opt->{searchpath})) {
770
      $opt->{searchpath} = [ $opt->{searchpath} ];
771
    }
772
  }
773
  else  {
774
    $opt->{searchpath} = [ ];
775
  }
776
 
777
  if($opt->{cache}  and !ref($opt->{cache})) {
778
    $opt->{cache} = [ $opt->{cache} ];
779
  }
780
  if($opt->{cache}) {
781
    $_ = lc($_) foreach (@{$opt->{cache}});
782
    foreach my $scheme (@{$opt->{cache}}) {
783
      my $method = 'cache_read_' . $scheme;
784
      croak "Unsupported caching scheme: $scheme"
785
        unless($self->can($method));
786
    }
787
  }
788
 
789
  if(exists($opt->{parseropts})) {
790
    if($^W) {
791
      carp "Warning: " .
792
           "'ParserOpts' is deprecated, contact the author if you need it";
793
    }
794
  }
795
  else {
796
    $opt->{parseropts} = [ ];
797
  }
798
 
799
 
800
  # Special cleanup for {forcearray} which could be regex, arrayref or boolean
801
  # or left to default to 0
802
 
803
  if(exists($opt->{forcearray})) {
804
    if(ref($opt->{forcearray}) eq 'Regexp') {
805
      $opt->{forcearray} = [ $opt->{forcearray} ];
806
    }
807
 
808
    if(ref($opt->{forcearray}) eq 'ARRAY') {
809
      my @force_list = @{$opt->{forcearray}};
810
      if(@force_list) {
811
        $opt->{forcearray} = {};
812
        foreach my $tag (@force_list) {
813
          if(ref($tag) eq 'Regexp') {
814
            push @{$opt->{forcearray}->{_regex}}, $tag;
815
          }
816
          else {
817
            $opt->{forcearray}->{$tag} = 1;
818
          }
819
        }
820
      }
821
      else {
822
        $opt->{forcearray} = 0;
823
      }
824
    }
825
    else {
826
      $opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 );
827
    }
828
  }
829
  else {
830
    if($opt->{strictmode}  and  $dirn eq 'in') {
831
      croak "No value specified for 'ForceArray' option in call to XML$dirn()";
832
    }
833
    $opt->{forcearray} = 0;
834
  }
835
 
836
 
837
  # Special cleanup for {keyattr} which could be arrayref or hashref or left
838
  # to default to arrayref
839
 
840
  if(exists($opt->{keyattr}))  {
841
    if(ref($opt->{keyattr})) {
842
      if(ref($opt->{keyattr}) eq 'HASH') {
843
 
844
        # Make a copy so we can mess with it
845
 
846
        $opt->{keyattr} = { %{$opt->{keyattr}} };
847
 
848
 
849
        # Convert keyattr => { elem => '+attr' }
850
        # to keyattr => { elem => [ 'attr', '+' ] }
851
 
852
        foreach my $el (keys(%{$opt->{keyattr}})) {
853
          if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) {
854
            $opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ];
855
            if($opt->{strictmode}  and  $dirn eq 'in') {
856
              next if($opt->{forcearray} == 1);
857
              next if(ref($opt->{forcearray}) eq 'HASH'
858
                      and $opt->{forcearray}->{$el});
859
              croak "<$el> set in KeyAttr but not in ForceArray";
860
            }
861
          }
862
          else {
863
            delete($opt->{keyattr}->{$el}); # Never reached (famous last words?)
864
          }
865
        }
866
      }
867
      else {
868
        if(@{$opt->{keyattr}} == 0) {
869
          delete($opt->{keyattr});
870
        }
871
      }
872
    }
873
    else {
874
      $opt->{keyattr} = [ $opt->{keyattr} ];
875
    }
876
  }
877
  else  {
878
    if($opt->{strictmode}) {
879
      croak "No value specified for 'KeyAttr' option in call to XML$dirn()";
880
    }
881
    $opt->{keyattr} = [ @DefKeyAttr ];
882
  }
883
 
884
 
885
  # Special cleanup for {valueattr} which could be arrayref or hashref
886
 
887
  if(exists($opt->{valueattr})) {
888
    if(ref($opt->{valueattr}) eq 'ARRAY') {
889
      $opt->{valueattrlist} = {};
890
      $opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} });
891
    }
892
  }
893
 
894
  # make sure there's nothing weird in {grouptags}
895
 
896
  if($opt->{grouptags}) {
897
    croak "Illegal value for 'GroupTags' option - expected a hashref"
898
      unless UNIVERSAL::isa($opt->{grouptags}, 'HASH');
899
 
900
    while(my($key, $val) = each %{$opt->{grouptags}}) {
901
      next if $key ne $val;
902
      croak "Bad value in GroupTags: '$key' => '$val'";
903
    }
904
  }
905
 
906
 
907
  # Check the {variables} option is valid and initialise variables hash
908
 
909
  if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) {
910
    croak "Illegal value for 'Variables' option - expected a hashref";
911
  }
912
 
913
  if($opt->{variables}) {
914
    $self->{_var_values} = { %{$opt->{variables}} };
915
  }
916
  elsif($opt->{varattr}) {
917
    $self->{_var_values} = {};
918
  }
919
 
920
}
921
 
922
 
923
##############################################################################
924
# Method: find_xml_file()
925
#
926
# Helper routine for XMLin().
927
# Takes a filename, and a list of directories, attempts to locate the file in
928
# the directories listed.
929
# Returns a full pathname on success; croaks on failure.
930
#
931
 
932
sub find_xml_file  {
933
  my $self = shift;
934
  my $file = shift;
935
  my @search_path = @_;
936
 
937
 
938
  require File::Basename;
939
  require File::Spec;
940
 
941
  my($filename, $filedir) = File::Basename::fileparse($file);
942
 
943
  if($filename ne $file) {        # Ignore searchpath if dir component
944
    return($file) if(-e $file);
945
  }
946
  else {
947
    my($path);
948
    foreach $path (@search_path)  {
949
      my $fullpath = File::Spec->catfile($path, $file);
950
      return($fullpath) if(-e $fullpath);
951
    }
952
  }
953
 
954
  # If user did not supply a search path, default to current directory
955
 
956
  if(!@search_path) {
957
    return($file) if(-e $file);
958
    croak "File does not exist: $file";
959
  }
960
 
961
  croak "Could not find $file in ", join(':', @search_path);
962
}
963
 
964
 
965
##############################################################################
966
# Method: collapse()
967
#
968
# Helper routine for XMLin().  This routine really comprises the 'smarts' (or
969
# value add) of this module.
970
#
971
# Takes the parse tree that XML::Parser produced from the supplied XML and
972
# recurses through it 'collapsing' unnecessary levels of indirection (nested
973
# arrays etc) to produce a data structure that is easier to work with.
974
#
975
# Elements in the original parser tree are represented as an element name
976
# followed by an arrayref.  The first element of the array is a hashref
977
# containing the attributes.  The rest of the array contains a list of any
978
# nested elements as name+arrayref pairs:
979
#
980
#  <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ]
981
#
982
# The special element name '0' (zero) flags text content.
983
#
984
# This routine cuts down the noise by discarding any text content consisting of
985
# only whitespace and then moves the nested elements into the attribute hash
986
# using the name of the nested element as the hash key and the collapsed
987
# version of the nested element as the value.  Multiple nested elements with
988
# the same name will initially be represented as an arrayref, but this may be
989
# 'folded' into a hashref depending on the value of the keyattr option.
990
#
991
 
992
sub collapse {
993
  my $self = shift;
994
 
995
 
996
  # Start with the hash of attributes
997
 
998
  my $attr  = shift;
999
  if($self->{opt}->{noattr}) {                    # Discard if 'noattr' set
1000
    $attr = $self->new_hashref;
1001
  }
1002
  elsif($self->{opt}->{normalisespace} == 2) {
1003
    while(my($key, $value) = each %$attr) {
1004
      $attr->{$key} = $self->normalise_space($value)
1005
    }
1006
  }
1007
 
1008
 
1009
  # Do variable substitutions
1010
 
1011
  if(my $var = $self->{_var_values}) {
1012
    while(my($key, $val) = each(%$attr)) {
1013
      $val =~ s{\$\{([\w.]+)\}}{ $self->get_var($1) }ge;
1014
      $attr->{$key} = $val;
1015
    }
1016
  }
1017
 
1018
 
1019
  # Roll up 'value' attributes (but only if no nested elements)
1020
 
1021
  if(!@_  and  keys %$attr == 1) {
1022
    my($k) = keys %$attr;
1023
    if($self->{opt}->{valueattrlist}  and $self->{opt}->{valueattrlist}->{$k}) {
1024
      return $attr->{$k};
1025
    }
1026
  }
1027
 
1028
 
1029
  # Add any nested elements
1030
 
1031
  my($key, $val);
1032
  while(@_) {
1033
    $key = shift;
1034
    $val = shift;
1035
    $val = '' if not defined $val;
1036
 
1037
    if(ref($val)) {
1038
      $val = $self->collapse(@$val);
1039
      next if(!defined($val)  and  $self->{opt}->{suppressempty});
1040
    }
1041
    elsif($key eq '0') {
1042
      next if($val =~ m{^\s*$}s);  # Skip all whitespace content
1043
 
1044
      $val = $self->normalise_space($val)
1045
        if($self->{opt}->{normalisespace} == 2);
1046
 
1047
      # do variable substitutions
1048
 
1049
      if(my $var = $self->{_var_values}) {
1050
        $val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge;
1051
      }
1052
 
1053
 
1054
      # look for variable definitions
1055
 
1056
      if(my $var = $self->{opt}->{varattr}) {
1057
        if(exists $attr->{$var}) {
1058
          $self->set_var($attr->{$var}, $val);
1059
        }
1060
      }
1061
 
1062
 
1063
      # Collapse text content in element with no attributes to a string
1064
 
1065
      if(!%$attr  and  !@_) {
1066
        return($self->{opt}->{forcecontent} ?
1067
          { $self->{opt}->{contentkey} => $val } : $val
1068
        );
1069
      }
1070
      $key = $self->{opt}->{contentkey};
1071
    }
1072
 
1073
 
1074
    # Combine duplicate attributes into arrayref if required
1075
 
1076
    if(exists($attr->{$key})) {
1077
      if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) {
1078
        push(@{$attr->{$key}}, $val);
1079
      }
1080
      else {
1081
        $attr->{$key} = [ $attr->{$key}, $val ];
1082
      }
1083
    }
1084
    elsif(defined($val)  and  UNIVERSAL::isa($val, 'ARRAY')) {
1085
      $attr->{$key} = [ $val ];
1086
    }
1087
    else {
1088
      if( $key ne $self->{opt}->{contentkey}
1089
          and (
1090
            ($self->{opt}->{forcearray} == 1)
1091
            or (
1092
              (ref($self->{opt}->{forcearray}) eq 'HASH')
1093
              and (
1094
                $self->{opt}->{forcearray}->{$key}
1095
                or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}})
1096
              )
1097
            )
1098
          )
1099
        ) {
1100
        $attr->{$key} = [ $val ];
1101
      }
1102
      else {
1103
        $attr->{$key} = $val;
1104
      }
1105
    }
1106
 
1107
  }
1108
 
1109
 
1110
  # Turn arrayrefs into hashrefs if key fields present
1111
 
1112
  if($self->{opt}->{keyattr}) {
1113
    while(($key,$val) = each %$attr) {
1114
      if(defined($val)  and  UNIVERSAL::isa($val, 'ARRAY')) {
1115
        $attr->{$key} = $self->array_to_hash($key, $val);
1116
      }
1117
    }
1118
  }
1119
 
1120
 
1121
  # disintermediate grouped tags
1122
 
1123
  if($self->{opt}->{grouptags}) {
1124
    while(my($key, $val) = each(%$attr)) {
1125
      next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
1126
      next unless(exists($self->{opt}->{grouptags}->{$key}));
1127
 
1128
      my($child_key, $child_val) =  %$val;
1129
 
1130
      if($self->{opt}->{grouptags}->{$key} eq $child_key) {
1131
        $attr->{$key}= $child_val;
1132
      }
1133
    }
1134
  }
1135
 
1136
 
1137
  # Fold hashes containing a single anonymous array up into just the array
1138
 
1139
  my $count = scalar keys %$attr;
1140
  if($count == 1
1141
     and  exists $attr->{anon}
1142
     and  UNIVERSAL::isa($attr->{anon}, 'ARRAY')
1143
  ) {
1144
    return($attr->{anon});
1145
  }
1146
 
1147
 
1148
  # Do the right thing if hash is empty, otherwise just return it
1149
 
1150
  if(!%$attr  and  exists($self->{opt}->{suppressempty})) {
1151
    if(defined($self->{opt}->{suppressempty})  and
1152
       $self->{opt}->{suppressempty} eq '') {
1153
      return('');
1154
    }
1155
    return(undef);
1156
  }
1157
 
1158
 
1159
  # Roll up named elements with named nested 'value' attributes
1160
 
1161
  if($self->{opt}->{valueattr}) {
1162
    while(my($key, $val) = each(%$attr)) {
1163
      next unless($self->{opt}->{valueattr}->{$key});
1164
      next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
1165
      my($k) = keys %$val;
1166
      next unless($k eq $self->{opt}->{valueattr}->{$key});
1167
      $attr->{$key} = $val->{$k};
1168
    }
1169
  }
1170
 
1171
  return($attr)
1172
 
1173
}
1174
 
1175
 
1176
##############################################################################
1177
# Method: set_var()
1178
#
1179
# Called when a variable definition is encountered in the XML.  (A variable
1180
# definition looks like <element attrname="name">value</element> where attrname
1181
# matches the varattr setting).
1182
#
1183
 
1184
sub set_var {
1185
  my($self, $name, $value) = @_;
1186
 
1187
  $self->{_var_values}->{$name} = $value;
1188
}
1189
 
1190
 
1191
##############################################################################
1192
# Method: get_var()
1193
#
1194
# Called during variable substitution to get the value for the named variable.
1195
#
1196
 
1197
sub get_var {
1198
  my($self, $name) = @_;
1199
 
1200
  my $value = $self->{_var_values}->{$name};
1201
  return $value if(defined($value));
1202
 
1203
  return '${' . $name . '}';
1204
}
1205
 
1206
 
1207
##############################################################################
1208
# Method: normalise_space()
1209
#
1210
# Strips leading and trailing whitespace and collapses sequences of whitespace
1211
# characters to a single space.
1212
#
1213
 
1214
sub normalise_space {
1215
  my($self, $text) = @_;
1216
 
1217
  $text =~ s/^\s+//s;
1218
  $text =~ s/\s+$//s;
1219
  $text =~ s/\s\s+/ /sg;
1220
 
1221
  return $text;
1222
}
1223
 
1224
 
1225
##############################################################################
1226
# Method: array_to_hash()
1227
#
1228
# Helper routine for collapse().
1229
# Attempts to 'fold' an array of hashes into an hash of hashes.  Returns a
1230
# reference to the hash on success or the original array if folding is
1231
# not possible.  Behaviour is controlled by 'keyattr' option.
1232
#
1233
 
1234
sub array_to_hash {
1235
  my $self     = shift;
1236
  my $name     = shift;
1237
  my $arrayref = shift;
1238
 
1239
  my $hashref  = $self->new_hashref;
1240
 
1241
  my($i, $key, $val, $flag);
1242
 
1243
 
1244
  # Handle keyattr => { .... }
1245
 
1246
  if(ref($self->{opt}->{keyattr}) eq 'HASH') {
1247
    return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name}));
1248
    ($key, $flag) = @{$self->{opt}->{keyattr}->{$name}};
1249
    for($i = 0; $i < @$arrayref; $i++)  {
1250
      if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and
1251
         exists($arrayref->[$i]->{$key})
1252
      ) {
1253
        $val = $arrayref->[$i]->{$key};
1254
        if(ref($val)) {
1255
          $self->die_or_warn("<$name> element has non-scalar '$key' key attribute");
1256
          return($arrayref);
1257
        }
1258
        $val = $self->normalise_space($val)
1259
          if($self->{opt}->{normalisespace} == 1);
1260
        $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
1261
          if(exists($hashref->{$val}));
1262
        $hashref->{$val} = $self->new_hashref( %{$arrayref->[$i]} );
1263
        $hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-');
1264
        delete $hashref->{$val}->{$key} unless($flag eq '+');
1265
      }
1266
      else {
1267
        $self->die_or_warn("<$name> element has no '$key' key attribute");
1268
        return($arrayref);
1269
      }
1270
    }
1271
  }
1272
 
1273
 
1274
  # Or assume keyattr => [ .... ]
1275
 
1276
  else {
1277
    my $default_keys =
1278
      join(',', @DefKeyAttr) eq join(',', @{$self->{opt}->{keyattr}});
1279
 
1280
    ELEMENT: for($i = 0; $i < @$arrayref; $i++)  {
1281
      return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH'));
1282
 
1283
      foreach $key (@{$self->{opt}->{keyattr}}) {
1284
        if(defined($arrayref->[$i]->{$key}))  {
1285
          $val = $arrayref->[$i]->{$key};
1286
          if(ref($val)) {
1287
            $self->die_or_warn("<$name> element has non-scalar '$key' key attribute")
1288
              if not $default_keys;
1289
            return($arrayref);
1290
          }
1291
          $val = $self->normalise_space($val)
1292
            if($self->{opt}->{normalisespace} == 1);
1293
          $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
1294
            if(exists($hashref->{$val}));
1295
          $hashref->{$val} = $self->new_hashref( %{$arrayref->[$i]} );
1296
          delete $hashref->{$val}->{$key};
1297
          next ELEMENT;
1298
        }
1299
      }
1300
 
1301
      return($arrayref);    # No keyfield matched
1302
    }
1303
  }
1304
 
1305
  # collapse any hashes which now only have a 'content' key
1306
 
1307
  if($self->{opt}->{collapseagain}) {
1308
    $hashref = $self->collapse_content($hashref);
1309
  }
1310
 
1311
  return($hashref);
1312
}
1313
 
1314
 
1315
##############################################################################
1316
# Method: die_or_warn()
1317
#
1318
# Takes a diagnostic message and does one of three things:
1319
# 1. dies if strict mode is enabled
1320
# 2. warns if warnings are enabled but strict mode is not
1321
# 3. ignores message and returns silently if neither strict mode nor warnings
1322
#    are enabled
1323
#
1324
# Option 2 looks at the global warnings variable $^W - which is not really
1325
# appropriate in the modern world of lexical warnings - TODO: Fix
1326
 
1327
sub die_or_warn {
1328
  my $self = shift;
1329
  my $msg  = shift;
1330
 
1331
  croak $msg if($self->{opt}->{strictmode});
1332
  carp "Warning: $msg" if($^W);
1333
}
1334
 
1335
 
1336
##############################################################################
1337
# Method: new_hashref()
1338
#
1339
# This is a hook routine for overriding in a sub-class.  Some people believe
1340
# that using Tie::IxHash here will solve order-loss problems.
1341
#
1342
 
1343
sub new_hashref {
1344
  my $self = shift;
1345
 
1346
  return { @_ };
1347
}
1348
 
1349
 
1350
##############################################################################
1351
# Method: collapse_content()
1352
#
1353
# Helper routine for array_to_hash
1354
#
1355
# Arguments expected are:
1356
# - an XML::Simple object
1357
# - a hasref
1358
# the hashref is a former array, turned into a hash by array_to_hash because
1359
# of the presence of key attributes
1360
# at this point collapse_content avoids over-complicated structures like
1361
# dir => { libexecdir    => { content => '$exec_prefix/libexec' },
1362
#          localstatedir => { content => '$prefix' },
1363
#        }
1364
# into
1365
# dir => { libexecdir    => '$exec_prefix/libexec',
1366
#          localstatedir => '$prefix',
1367
#        }
1368
 
1369
sub collapse_content {
1370
  my $self       = shift;
1371
  my $hashref    = shift;
1372
 
1373
  my $contentkey = $self->{opt}->{contentkey};
1374
 
1375
  # first go through the values,checking that they are fit to collapse
1376
  foreach my $val (values %$hashref) {
1377
    return $hashref unless (     (ref($val) eq 'HASH')
1378
                             and (keys %$val == 1)
1379
                             and (exists $val->{$contentkey})
1380
                           );
1381
  }
1382
 
1383
  # now collapse them
1384
  foreach my $key (keys %$hashref) {
1385
    $hashref->{$key}=  $hashref->{$key}->{$contentkey};
1386
  }
1387
 
1388
  return $hashref;
1389
}
1390
 
1391
 
1392
##############################################################################
1393
# Method: value_to_xml()
1394
#
1395
# Helper routine for XMLout() - recurses through a data structure building up
1396
# and returning an XML representation of that structure as a string.
1397
#
1398
# Arguments expected are:
1399
# - the data structure to be encoded (usually a reference)
1400
# - the XML tag name to use for this item
1401
# - a string of spaces for use as the current indent level
1402
#
1403
 
1404
sub value_to_xml {
1405
  my $self = shift;;
1406
 
1407
 
1408
  # Grab the other arguments
1409
 
1410
  my($ref, $name, $indent) = @_;
1411
 
1412
  my $named = (defined($name) and $name ne '' ? 1 : 0);
1413
 
1414
  my $nl = "\n";
1415
 
1416
  my $is_root = $indent eq '' ? 1 : 0;   # Warning, dirty hack!
1417
  if($self->{opt}->{noindent}) {
1418
    $indent = '';
1419
    $nl     = '';
1420
  }
1421
 
1422
 
1423
  # Convert to XML
1424
 
1425
  if(ref($ref)) {
1426
    croak "circular data structures not supported"
1427
      if(grep($_ == $ref, @{$self->{_ancestors}}));
1428
    push @{$self->{_ancestors}}, $ref;
1429
  }
1430
  else {
1431
    if($named) {
1432
      return(join('',
1433
              $indent, '<', $name, '>',
1434
              ($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)),
1435
              '</', $name, ">", $nl
1436
            ));
1437
    }
1438
    else {
1439
      return("$ref$nl");
1440
    }
1441
  }
1442
 
1443
 
1444
  # Unfold hash to array if possible
1445
 
1446
  if(UNIVERSAL::isa($ref, 'HASH')      # It is a hash
1447
     and keys %$ref                    # and it's not empty
1448
     and $self->{opt}->{keyattr}       # and folding is enabled
1449
     and !$is_root                     # and its not the root element
1450
  ) {
1451
    $ref = $self->hash_to_array($name, $ref);
1452
  }
1453
 
1454
 
1455
  my @result = ();
1456
  my($key, $value);
1457
 
1458
 
1459
  # Handle hashrefs
1460
 
1461
  if(UNIVERSAL::isa($ref, 'HASH')) {
1462
 
1463
    # Reintermediate grouped values if applicable
1464
 
1465
    if($self->{opt}->{grouptags}) {
1466
      $ref = $self->copy_hash($ref);
1467
      while(my($key, $val) = each %$ref) {
1468
        if($self->{opt}->{grouptags}->{$key}) {
1469
          $ref->{$key} = $self->new_hashref(
1470
            $self->{opt}->{grouptags}->{$key} => $val
1471
          );
1472
        }
1473
      }
1474
    }
1475
 
1476
 
1477
    # Scan for namespace declaration attributes
1478
 
1479
    my $nsdecls = '';
1480
    my $default_ns_uri;
1481
    if($self->{nsup}) {
1482
      $ref = $self->copy_hash($ref);
1483
      $self->{nsup}->push_context();
1484
 
1485
      # Look for default namespace declaration first
1486
 
1487
      if(exists($ref->{xmlns})) {
1488
        $self->{nsup}->declare_prefix('', $ref->{xmlns});
1489
        $nsdecls .= qq( xmlns="$ref->{xmlns}");
1490
        delete($ref->{xmlns});
1491
      }
1492
      $default_ns_uri = $self->{nsup}->get_uri('');
1493
 
1494
 
1495
      # Then check all the other keys
1496
 
1497
      foreach my $qname (keys(%$ref)) {
1498
        my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
1499
        if($uri) {
1500
          if($uri eq $xmlns_ns) {
1501
            $self->{nsup}->declare_prefix($lname, $ref->{$qname});
1502
            $nsdecls .= qq( xmlns:$lname="$ref->{$qname}");
1503
            delete($ref->{$qname});
1504
          }
1505
        }
1506
      }
1507
 
1508
      # Translate any remaining Clarkian names
1509
 
1510
      foreach my $qname (keys(%$ref)) {
1511
        my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
1512
        if($uri) {
1513
          if($default_ns_uri  and  $uri eq $default_ns_uri) {
1514
            $ref->{$lname} = $ref->{$qname};
1515
            delete($ref->{$qname});
1516
          }
1517
          else {
1518
            my $prefix = $self->{nsup}->get_prefix($uri);
1519
            unless($prefix) {
1520
              # $self->{nsup}->declare_prefix(undef, $uri);
1521
              # $prefix = $self->{nsup}->get_prefix($uri);
1522
              $prefix = $self->{ns_prefix}++;
1523
              $self->{nsup}->declare_prefix($prefix, $uri);
1524
              $nsdecls .= qq( xmlns:$prefix="$uri");
1525
            }
1526
            $ref->{"$prefix:$lname"} = $ref->{$qname};
1527
            delete($ref->{$qname});
1528
          }
1529
        }
1530
      }
1531
    }
1532
 
1533
 
1534
    my @nested = ();
1535
    my $text_content = undef;
1536
    if($named) {
1537
      push @result, $indent, '<', $name, $nsdecls;
1538
    }
1539
 
1540
    if(keys %$ref) {
1541
      my $first_arg = 1;
1542
      foreach my $key ($self->sorted_keys($name, $ref)) {
1543
        my $value = $ref->{$key};
1544
        next if(substr($key, 0, 1) eq '-');
1545
        if(!defined($value)) {
1546
          next if $self->{opt}->{suppressempty};
1547
          unless(exists($self->{opt}->{suppressempty})
1548
             and !defined($self->{opt}->{suppressempty})
1549
          ) {
1550
            carp 'Use of uninitialized value' if($^W);
1551
          }
1552
          if($key eq $self->{opt}->{contentkey}) {
1553
            $text_content = '';
1554
          }
1555
          else {
1556
            $value = exists($self->{opt}->{suppressempty}) ? {} : '';
1557
          }
1558
        }
1559
 
1560
        if(!ref($value)
1561
           and $self->{opt}->{valueattr}
1562
           and $self->{opt}->{valueattr}->{$key}
1563
        ) {
1564
          $value = $self->new_hashref(
1565
            $self->{opt}->{valueattr}->{$key} => $value
1566
          );
1567
        }
1568
 
1569
        if(ref($value)  or  $self->{opt}->{noattr}) {
1570
          push @nested,
1571
            $self->value_to_xml($value, $key, "$indent  ");
1572
        }
1573
        else {
1574
          $value = $self->escape_value($value) unless($self->{opt}->{noescape});
1575
          if($key eq $self->{opt}->{contentkey}) {
1576
            $text_content = $value;
1577
          }
1578
          else {
1579
            push @result, "\n$indent " . ' ' x length($name)
1580
              if($self->{opt}->{attrindent}  and  !$first_arg);
1581
            push @result, ' ', $key, '="', $value , '"';
1582
            $first_arg = 0;
1583
          }
1584
        }
1585
      }
1586
    }
1587
    else {
1588
      $text_content = '';
1589
    }
1590
 
1591
    if(@nested  or  defined($text_content)) {
1592
      if($named) {
1593
        push @result, ">";
1594
        if(defined($text_content)) {
1595
          push @result, $text_content;
1596
          $nested[0] =~ s/^\s+// if(@nested);
1597
        }
1598
        else {
1599
          push @result, $nl;
1600
        }
1601
        if(@nested) {
1602
          push @result, @nested, $indent;
1603
        }
1604
        push @result, '</', $name, ">", $nl;
1605
      }
1606
      else {
1607
        push @result, @nested;             # Special case if no root elements
1608
      }
1609
    }
1610
    else {
1611
      push @result, " />", $nl;
1612
    }
1613
    $self->{nsup}->pop_context() if($self->{nsup});
1614
  }
1615
 
1616
 
1617
  # Handle arrayrefs
1618
 
1619
  elsif(UNIVERSAL::isa($ref, 'ARRAY')) {
1620
    foreach $value (@$ref) {
1621
      next if !defined($value) and $self->{opt}->{suppressempty};
1622
      if(!ref($value)) {
1623
        push @result,
1624
             $indent, '<', $name, '>',
1625
             ($self->{opt}->{noescape} ? $value : $self->escape_value($value)),
1626
             '</', $name, ">$nl";
1627
      }
1628
      elsif(UNIVERSAL::isa($value, 'HASH')) {
1629
        push @result, $self->value_to_xml($value, $name, $indent);
1630
      }
1631
      else {
1632
        push @result,
1633
               $indent, '<', $name, ">$nl",
1634
               $self->value_to_xml($value, 'anon', "$indent  "),
1635
               $indent, '</', $name, ">$nl";
1636
      }
1637
    }
1638
  }
1639
 
1640
  else {
1641
    croak "Can't encode a value of type: " . ref($ref);
1642
  }
1643
 
1644
 
1645
  pop @{$self->{_ancestors}} if(ref($ref));
1646
 
1647
  return(join('', @result));
1648
}
1649
 
1650
 
1651
##############################################################################
1652
# Method: sorted_keys()
1653
#
1654
# Returns the keys of the referenced hash sorted into alphabetical order, but
1655
# with the 'key' key (as in KeyAttr) first, if there is one.
1656
#
1657
 
1658
sub sorted_keys {
1659
  my($self, $name, $ref) = @_;
1660
 
1661
  return keys %$ref if $self->{opt}->{nosort};
1662
 
1663
  my %hash = %$ref;
1664
  my $keyattr = $self->{opt}->{keyattr};
1665
 
1666
  my @key;
1667
 
1668
  if(ref $keyattr eq 'HASH') {
1669
    if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) {
1670
      push @key, $keyattr->{$name}->[0];
1671
      delete $hash{$keyattr->{$name}->[0]};
1672
    }
1673
  }
1674
  elsif(ref $keyattr eq 'ARRAY') {
1675
    foreach (@{$keyattr}) {
1676
      if(exists $hash{$_}) {
1677
        push @key, $_;
1678
        delete $hash{$_};
1679
        last;
1680
      }
1681
    }
1682
  }
1683
 
1684
  return(@key, sort keys %hash);
1685
}
1686
 
1687
##############################################################################
1688
# Method: escape_value()
1689
#
1690
# Helper routine for automatically escaping values for XMLout().
1691
# Expects a scalar data value.  Returns escaped version.
1692
#
1693
 
1694
sub escape_value {
1695
  my($self, $data) = @_;
1696
 
1697
  return '' unless(defined($data));
1698
 
1699
  $data =~ s/&/&amp;/sg;
1700
  $data =~ s/</&lt;/sg;
1701
  $data =~ s/>/&gt;/sg;
1702
  $data =~ s/"/&quot;/sg;
1703
 
1704
  my $level = $self->{opt}->{numericescape} or return $data;
1705
 
1706
  return $self->numeric_escape($data, $level);
1707
}
1708
 
1709
sub numeric_escape {
1710
  my($self, $data, $level) = @_;
1711
 
1712
  use utf8; # required for 5.6
1713
 
1714
  if($self->{opt}->{numericescape} eq '2') {
1715
    $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse;
1716
  }
1717
  else {
1718
    $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse;
1719
  }
1720
 
1721
  return $data;
1722
}
1723
 
1724
 
1725
##############################################################################
1726
# Method: hash_to_array()
1727
#
1728
# Helper routine for value_to_xml().
1729
# Attempts to 'unfold' a hash of hashes into an array of hashes.  Returns a
1730
# reference to the array on success or the original hash if unfolding is
1731
# not possible.
1732
#
1733
 
1734
sub hash_to_array {
1735
  my $self    = shift;
1736
  my $parent  = shift;
1737
  my $hashref = shift;
1738
 
1739
  my $arrayref = [];
1740
 
1741
  my($key, $value);
1742
 
1743
  my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref;
1744
  foreach $key (@keys) {
1745
    $value = $hashref->{$key};
1746
    return($hashref) unless(UNIVERSAL::isa($value, 'HASH'));
1747
 
1748
    if(ref($self->{opt}->{keyattr}) eq 'HASH') {
1749
      return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent}));
1750
      push @$arrayref, $self->copy_hash(
1751
        $value, $self->{opt}->{keyattr}->{$parent}->[0] => $key
1752
      );
1753
    }
1754
    else {
1755
      push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value });
1756
    }
1757
  }
1758
 
1759
  return($arrayref);
1760
}
1761
 
1762
 
1763
##############################################################################
1764
# Method: copy_hash()
1765
#
1766
# Helper routine for hash_to_array().  When unfolding a hash of hashes into
1767
# an array of hashes, we need to copy the key from the outer hash into the
1768
# inner hash.  This routine makes a copy of the original hash so we don't
1769
# destroy the original data structure.  You might wish to override this
1770
# method if you're using tied hashes and don't want them to get untied.
1771
#
1772
 
1773
sub copy_hash {
1774
  my($self, $orig, @extra) = @_;
1775
 
1776
  return { @extra, %$orig };
1777
}
1778
 
1779
##############################################################################
1780
# Methods required for building trees from SAX events
1781
##############################################################################
1782
 
1783
sub start_document {
1784
  my $self = shift;
1785
 
1786
  $self->handle_options('in') unless($self->{opt});
1787
 
1788
  $self->{lists} = [];
1789
  $self->{curlist} = $self->{tree} = [];
1790
}
1791
 
1792
 
1793
sub start_element {
1794
  my $self    = shift;
1795
  my $element = shift;
1796
 
1797
  my $name = $element->{Name};
1798
  if($self->{opt}->{nsexpand}) {
1799
    $name = $element->{LocalName} || '';
1800
    if($element->{NamespaceURI}) {
1801
      $name = '{' . $element->{NamespaceURI} . '}' . $name;
1802
    }
1803
  }
1804
  my $attributes = {};
1805
  if($element->{Attributes}) {  # Might be undef
1806
    foreach my $attr (values %{$element->{Attributes}}) {
1807
      if($self->{opt}->{nsexpand}) {
1808
        my $name = $attr->{LocalName} || '';
1809
        if($attr->{NamespaceURI}) {
1810
          $name = '{' . $attr->{NamespaceURI} . '}' . $name
1811
        }
1812
        $name = 'xmlns' if($name eq $bad_def_ns_jcn);
1813
        $attributes->{$name} = $attr->{Value};
1814
      }
1815
      else {
1816
        $attributes->{$attr->{Name}} = $attr->{Value};
1817
      }
1818
    }
1819
  }
1820
  my $newlist = [ $attributes ];
1821
  push @{ $self->{lists} }, $self->{curlist};
1822
  push @{ $self->{curlist} }, $name => $newlist;
1823
  $self->{curlist} = $newlist;
1824
}
1825
 
1826
 
1827
sub characters {
1828
  my $self  = shift;
1829
  my $chars = shift;
1830
 
1831
  my $text  = $chars->{Data};
1832
  my $clist = $self->{curlist};
1833
  my $pos = $#$clist;
1834
 
1835
  if ($pos > 0 and $clist->[$pos - 1] eq '0') {
1836
    $clist->[$pos] .= $text;
1837
  }
1838
  else {
1839
    push @$clist, 0 => $text;
1840
  }
1841
}
1842
 
1843
 
1844
sub end_element {
1845
  my $self    = shift;
1846
 
1847
  $self->{curlist} = pop @{ $self->{lists} };
1848
}
1849
 
1850
 
1851
sub end_document {
1852
  my $self = shift;
1853
 
1854
  delete($self->{curlist});
1855
  delete($self->{lists});
1856
 
1857
  my $tree = $self->{tree};
1858
  delete($self->{tree});
1859
 
1860
 
1861
  # Return tree as-is to XMLin()
1862
 
1863
  return($tree) if($self->{nocollapse});
1864
 
1865
 
1866
  # Or collapse it before returning it to SAX parser class
1867
 
1868
  if($self->{opt}->{keeproot}) {
1869
    $tree = $self->collapse({}, @$tree);
1870
  }
1871
  else {
1872
    $tree = $self->collapse(@{$tree->[1]});
1873
  }
1874
 
1875
  if($self->{opt}->{datahandler}) {
1876
    return($self->{opt}->{datahandler}->($self, $tree));
1877
  }
1878
 
1879
  return($tree);
1880
}
1881
 
1882
*xml_in  = \&XMLin;
1883
*xml_out = \&XMLout;
1884
 
1885
1;
1886
 
1887
__END__
1888
 
1889
=head1 STATUS OF THIS MODULE
1890
 
1891
The use of this module in new code is discouraged.  Other modules are available
1892
which provide more straightforward and consistent interfaces.  In particular,
1893
L<XML::LibXML> is highly recommended.
1894
 
1895
The major problems with this module are the large number of options and the
1896
arbitrary ways in which these options interact - often with unexpected results.
1897
 
1898
Patches with bug fixes and documentation fixes are welcome, but new features
1899
are unlikely to be added.
1900
 
1901
=head1 QUICK START
1902
 
1903
Say you have a script called B<foo> and a file of configuration options
1904
called B<foo.xml> containing the following:
1905
 
1906
  <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug">
1907
    <server name="sahara" osname="solaris" osversion="2.6">
1908
      <address>10.0.0.101</address>
1909
      <address>10.0.1.101</address>
1910
    </server>
1911
    <server name="gobi" osname="irix" osversion="6.5">
1912
      <address>10.0.0.102</address>
1913
    </server>
1914
    <server name="kalahari" osname="linux" osversion="2.0.34">
1915
      <address>10.0.0.103</address>
1916
      <address>10.0.1.103</address>
1917
    </server>
1918
  </config>
1919
 
1920
The following lines of code in B<foo>:
1921
 
1922
  use XML::Simple qw(:strict);
1923
 
1924
  my $config = XMLin(undef, KeyAttr => { server => 'name' }, ForceArray => [ 'server', 'address' ]);
1925
 
1926
will 'slurp' the configuration options into the hashref $config (because no
1927
filename or XML string was passed as the first argument to C<XMLin()> the name
1928
and location of the XML file will be inferred from name and location of the
1929
script).  You can dump out the contents of the hashref using Data::Dumper:
1930
 
1931
  use Data::Dumper;
1932
 
1933
  print Dumper($config);
1934
 
1935
which will produce something like this (formatting has been adjusted for
1936
brevity):
1937
 
1938
  {
1939
      'logdir'        => '/var/log/foo/',
1940
      'debugfile'     => '/tmp/foo.debug',
1941
      'server'        => {
1942
          'sahara'        => {
1943
              'osversion'     => '2.6',
1944
              'osname'        => 'solaris',
1945
              'address'       => [ '10.0.0.101', '10.0.1.101' ]
1946
          },
1947
          'gobi'          => {
1948
              'osversion'     => '6.5',
1949
              'osname'        => 'irix',
1950
              'address'       => [ '10.0.0.102' ]
1951
          },
1952
          'kalahari'      => {
1953
              'osversion'     => '2.0.34',
1954
              'osname'        => 'linux',
1955
              'address'       => [ '10.0.0.103', '10.0.1.103' ]
1956
          }
1957
      }
1958
  }
1959
 
1960
Your script could then access the name of the log directory like this:
1961
 
1962
  print $config->{logdir};
1963
 
1964
similarly, the second address on the server 'kalahari' could be referenced as:
1965
 
1966
  print $config->{server}->{kalahari}->{address}->[1];
1967
 
1968
Note: If the mapping between the output of Data::Dumper and the print
1969
statements above is not obvious to you, then please refer to the 'references'
1970
tutorial (AKA: "Mark's very short tutorial about references") at L<perlreftut>.
1971
 
1972
In this example, the C<< ForceArray >> option was used to list elements that
1973
might occur multiple times and should therefore be represented as arrayrefs
1974
(even when only one element is present).
1975
 
1976
The C<< KeyAttr >> option was used to indicate that each C<< <server> >>
1977
element has a unique identifier in the C<< name >> attribute.  This allows you
1978
to index directly to a particular server record using the name as a hash key
1979
(as shown above).
1980
 
1981
For simple requirements, that's really all there is to it.  If you want to
1982
store your XML in a different directory or file, or pass it in as a string or
1983
even pass it in via some derivative of an IO::Handle, you'll need to check out
1984
L<"OPTIONS">.  If you want to turn off or tweak the array folding feature (that
1985
neat little transformation that produced $config->{server}) you'll find options
1986
for that as well.
1987
 
1988
If you want to generate XML (for example to write a modified version of
1989
$config back out as XML), check out C<XMLout()>.
1990
 
1991
If your needs are not so simple, this may not be the module for you.  In that
1992
case, you might want to read L<"WHERE TO FROM HERE?">.
1993
 
1994
=head1 DESCRIPTION
1995
 
1996
The XML::Simple module provides a simple API layer on top of an underlying XML
1997
parsing module (either XML::Parser or one of the SAX2 parser modules).  Two
1998
functions are exported: C<XMLin()> and C<XMLout()>.  Note: you can explicity
1999
request the lower case versions of the function names: C<xml_in()> and
2000
C<xml_out()>.
2001
 
2002
The simplest approach is to call these two functions directly, but an
2003
optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below)
2004
allows them to be called as methods of an B<XML::Simple> object.  The object
2005
interface can also be used at either end of a SAX pipeline.
2006
 
2007
=head2 XMLin()
2008
 
2009
Parses XML formatted data and returns a reference to a data structure which
2010
contains the same information in a more readily accessible form.  (Skip
2011
down to L<"EXAMPLES"> below, for more sample code).
2012
 
2013
C<XMLin()> accepts an optional XML specifier followed by zero or more 'name =>
2014
value' option pairs.  The XML specifier can be one of the following:
2015
 
2016
=over 4
2017
 
2018
=item A filename
2019
 
2020
If the filename contains no directory components C<XMLin()> will look for the
2021
file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the
2022
current directory if the SearchPath option is not defined.  eg:
2023
 
2024
  $ref = XMLin('/etc/params.xml');
2025
 
2026
Note, the filename '-' can be used to parse from STDIN.
2027
 
2028
=item undef
2029
 
2030
If there is no XML specifier, C<XMLin()> will check the script directory and
2031
each of the SearchPath directories for a file with the same name as the script
2032
but with the extension '.xml'.  Note: if you wish to specify options, you
2033
must specify the value 'undef'.  eg:
2034
 
2035
  $ref = XMLin(undef, ForceArray => 1);
2036
 
2037
=item A string of XML
2038
 
2039
A string containing XML (recognised by the presence of '<' and '>' characters)
2040
will be parsed directly.  eg:
2041
 
2042
  $ref = XMLin('<opt username="bob" password="flurp" />');
2043
 
2044
=item An IO::Handle object
2045
 
2046
An IO::Handle object will be read to EOF and its contents parsed. eg:
2047
 
2048
  $fh = IO::File->new('/etc/params.xml');
2049
  $ref = XMLin($fh);
2050
 
2051
=back
2052
 
2053
=head2 XMLout()
2054
 
2055
Takes a data structure (generally a hashref) and returns an XML encoding of
2056
that structure.  If the resulting XML is parsed using C<XMLin()>, it should
2057
return a data structure equivalent to the original (see caveats below).
2058
 
2059
The C<XMLout()> function can also be used to output the XML as SAX events
2060
see the C<Handler> option and L<"SAX SUPPORT"> for more details).
2061
 
2062
When translating hashes to XML, hash keys which have a leading '-' will be
2063
silently skipped.  This is the approved method for marking elements of a
2064
data structure which should be ignored by C<XMLout>.  (Note: If these items
2065
were not skipped the key names would be emitted as element or attribute names
2066
with a leading '-' which would not be valid XML).
2067
 
2068
=head2 Caveats
2069
 
2070
Some care is required in creating data structures which will be passed to
2071
C<XMLout()>.  Hash keys from the data structure will be encoded as either XML
2072
element names or attribute names.  Therefore, you should use hash key names
2073
which conform to the relatively strict XML naming rules:
2074
 
2075
Names in XML must begin with a letter.  The remaining characters may be
2076
letters, digits, hyphens (-), underscores (_) or full stops (.).  It is also
2077
allowable to include one colon (:) in an element name but this should only be
2078
used when working with namespaces (B<XML::Simple> can only usefully work with
2079
namespaces when teamed with a SAX Parser).
2080
 
2081
You can use other punctuation characters in hash values (just not in hash
2082
keys) however B<XML::Simple> does not support dumping binary data.
2083
 
2084
If you break these rules, the current implementation of C<XMLout()> will
2085
simply emit non-compliant XML which will be rejected if you try to read it
2086
back in.  (A later version of B<XML::Simple> might take a more proactive
2087
approach).
2088
 
2089
Note also that although you can nest hashes and arrays to arbitrary levels,
2090
circular data structures are not supported and will cause C<XMLout()> to die.
2091
 
2092
If you wish to 'round-trip' arbitrary data structures from Perl to XML and back
2093
to Perl, then you should probably disable array folding (using the KeyAttr
2094
option) both with C<XMLout()> and with C<XMLin()>.  If you still don't get the
2095
expected results, you may prefer to use L<XML::Dumper> which is designed for
2096
exactly that purpose.
2097
 
2098
Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs.
2099
 
2100
 
2101
=head1 OPTIONS
2102
 
2103
B<XML::Simple> supports a number of options (in fact as each release of
2104
B<XML::Simple> adds more options, the module's claim to the name 'Simple'
2105
becomes increasingly tenuous).  If you find yourself repeatedly having to
2106
specify the same options, you might like to investigate L<"OPTIONAL OO
2107
INTERFACE"> below.
2108
 
2109
If you can't be bothered reading the documentation, refer to
2110
L<"STRICT MODE"> to automatically catch common mistakes.
2111
 
2112
Because there are so many options, it's hard for new users to know which ones
2113
are important, so here are the two you really need to know about:
2114
 
2115
=over 4
2116
 
2117
=item *
2118
 
2119
check out C<ForceArray> because you'll almost certainly want to turn it on
2120
 
2121
=item *
2122
 
2123
make sure you know what the C<KeyAttr> option does and what its default value is
2124
because it may surprise you otherwise (note in particular that 'KeyAttr'
2125
affects both C<XMLin> and C<XMLout>)
2126
 
2127
=back
2128
 
2129
The option name headings below have a trailing 'comment' - a hash followed by
2130
two pieces of metadata:
2131
 
2132
=over 4
2133
 
2134
=item *
2135
 
2136
Options are marked with 'I<in>' if they are recognised by C<XMLin()> and
2137
'I<out>' if they are recognised by C<XMLout()>.
2138
 
2139
=item *
2140
 
2141
Each option is also flagged to indicate whether it is:
2142
 
2143
 'important'   - don't use the module until you understand this one
2144
 'handy'       - you can skip this on the first time through
2145
 'advanced'    - you can skip this on the second time through
2146
 'SAX only'    - don't worry about this unless you're using SAX (or
2147
                 alternatively if you need this, you also need SAX)
2148
 'seldom used' - you'll probably never use this unless you were the
2149
                 person that requested the feature
2150
 
2151
=back
2152
 
2153
The options are listed alphabetically:
2154
 
2155
Note: option names are no longer case sensitive so you can use the mixed case
2156
versions shown here; all lower case as required by versions 2.03 and earlier;
2157
or you can add underscores between the words (eg: key_attr).
2158
 
2159
 
2160
=head2 AttrIndent => 1 I<# out - handy>
2161
 
2162
When you are using C<XMLout()>, enable this option to have attributes printed
2163
one-per-line with sensible indentation rather than all on one line.
2164
 
2165
=head2 Cache => [ cache schemes ] I<# in - advanced>
2166
 
2167
Because loading the B<XML::Parser> module and parsing an XML file can consume a
2168
significant number of CPU cycles, it is often desirable to cache the output of
2169
C<XMLin()> for later reuse.
2170
 
2171
When parsing from a named file, B<XML::Simple> supports a number of caching
2172
schemes.  The 'Cache' option may be used to specify one or more schemes (using
2173
an anonymous array).  Each scheme will be tried in turn in the hope of finding
2174
a cached pre-parsed representation of the XML file.  If no cached copy is
2175
found, the file will be parsed and the first cache scheme in the list will be
2176
used to save a copy of the results.  The following cache schemes have been
2177
implemented:
2178
 
2179
=over 4
2180
 
2181
=item storable
2182
 
2183
Utilises B<Storable.pm> to read/write a cache file with the same name as the
2184
XML file but with the extension .stor
2185
 
2186
=item memshare
2187
 
2188
When a file is first parsed, a copy of the resulting data structure is retained
2189
in memory in the B<XML::Simple> module's namespace.  Subsequent calls to parse
2190
the same file will return a reference to this structure.  This cached version
2191
will persist only for the life of the Perl interpreter (which in the case of
2192
mod_perl for example, may be some significant time).
2193
 
2194
Because each caller receives a reference to the same data structure, a change
2195
made by one caller will be visible to all.  For this reason, the reference
2196
returned should be treated as read-only.
2197
 
2198
=item memcopy
2199
 
2200
This scheme works identically to 'memshare' (above) except that each caller
2201
receives a reference to a new data structure which is a copy of the cached
2202
version.  Copying the data structure will add a little processing overhead,
2203
therefore this scheme should only be used where the caller intends to modify
2204
the data structure (or wishes to protect itself from others who might).  This
2205
scheme uses B<Storable.pm> to perform the copy.
2206
 
2207
=back
2208
 
2209
Warning! The memory-based caching schemes compare the timestamp on the file to
2210
the time when it was last parsed.  If the file is stored on an NFS filesystem
2211
(or other network share) and the clock on the file server is not exactly
2212
synchronised with the clock where your script is run, updates to the source XML
2213
file may appear to be ignored.
2214
 
2215
=head2 ContentKey => 'keyname' I<# in+out - seldom used>
2216
 
2217
When text content is parsed to a hash value, this option let's you specify a
2218
name for the hash key to override the default 'content'.  So for example:
2219
 
2220
  XMLin('<opt one="1">Text</opt>', ContentKey => 'text')
2221
 
2222
will parse to:
2223
 
2224
  { 'one' => 1, 'text' => 'Text' }
2225
 
2226
instead of:
2227
 
2228
  { 'one' => 1, 'content' => 'Text' }
2229
 
2230
C<XMLout()> will also honour the value of this option when converting a hashref
2231
to XML.
2232
 
2233
You can also prefix your selected key name with a '-' character to have
2234
C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after
2235
array folding.  For example:
2236
 
2237
  XMLin(
2238
    '<opt><item name="one">First</item><item name="two">Second</item></opt>',
2239
    KeyAttr => {item => 'name'},
2240
    ForceArray => [ 'item' ],
2241
    ContentKey => '-content'
2242
  )
2243
 
2244
will parse to:
2245
 
2246
  {
2247
    'item' => {
2248
      'one' =>  'First'
2249
      'two' =>  'Second'
2250
    }
2251
  }
2252
 
2253
rather than this (without the '-'):
2254
 
2255
  {
2256
    'item' => {
2257
      'one' => { 'content' => 'First' }
2258
      'two' => { 'content' => 'Second' }
2259
    }
2260
  }
2261
 
2262
=head2 DataHandler => code_ref I<# in - SAX only>
2263
 
2264
When you use an B<XML::Simple> object as a SAX handler, it will return a
2265
'simple tree' data structure in the same format as C<XMLin()> would return.  If
2266
this option is set (to a subroutine reference), then when the tree is built the
2267
subroutine will be called and passed two arguments: a reference to the
2268
B<XML::Simple> object and a reference to the data tree.  The return value from
2269
the subroutine will be returned to the SAX driver.  (See L<"SAX SUPPORT"> for
2270
more details).
2271
 
2272
=head2 ForceArray => 1 I<# in - important>
2273
 
2274
This option should be set to '1' to force nested elements to be represented
2275
as arrays even when there is only one.  Eg, with ForceArray enabled, this
2276
XML:
2277
 
2278
    <opt>
2279
      <name>value</name>
2280
    </opt>
2281
 
2282
would parse to this:
2283
 
2284
    {
2285
      'name' => [
2286
                  'value'
2287
                ]
2288
    }
2289
 
2290
instead of this (the default):
2291
 
2292
    {
2293
      'name' => 'value'
2294
    }
2295
 
2296
This option is especially useful if the data structure is likely to be written
2297
back out as XML and the default behaviour of rolling single nested elements up
2298
into attributes is not desirable.
2299
 
2300
If you are using the array folding feature, you should almost certainly enable
2301
this option.  If you do not, single nested elements will not be parsed to
2302
arrays and therefore will not be candidates for folding to a hash.  (Given that
2303
the default value of 'KeyAttr' enables array folding, the default value of this
2304
option should probably also have been enabled too - sorry).
2305
 
2306
=head2 ForceArray => [ names ] I<# in - important>
2307
 
2308
This alternative (and preferred) form of the 'ForceArray' option allows you to
2309
specify a list of element names which should always be forced into an array
2310
representation, rather than the 'all or nothing' approach above.
2311
 
2312
It is also possible (since version 2.05) to include compiled regular
2313
expressions in the list - any element names which match the pattern will be
2314
forced to arrays.  If the list contains only a single regex, then it is not
2315
necessary to enclose it in an arrayref.  Eg:
2316
 
2317
  ForceArray => qr/_list$/
2318
 
2319
=head2 ForceContent => 1 I<# in - seldom used>
2320
 
2321
When C<XMLin()> parses elements which have text content as well as attributes,
2322
the text content must be represented as a hash value rather than a simple
2323
scalar.  This option allows you to force text content to always parse to
2324
a hash value even when there are no attributes.  So for example:
2325
 
2326
  XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1)
2327
 
2328
will parse to:
2329
 
2330
  {
2331
    'x' => {           'content' => 'text1' },
2332
    'y' => { 'a' => 2, 'content' => 'text2' }
2333
  }
2334
 
2335
instead of:
2336
 
2337
  {
2338
    'x' => 'text1',
2339
    'y' => { 'a' => 2, 'content' => 'text2' }
2340
  }
2341
 
2342
=head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy>
2343
 
2344
You can use this option to eliminate extra levels of indirection in your Perl
2345
data structure.  For example this XML:
2346
 
2347
  <opt>
2348
   <searchpath>
2349
     <dir>/usr/bin</dir>
2350
     <dir>/usr/local/bin</dir>
2351
     <dir>/usr/X11/bin</dir>
2352
   </searchpath>
2353
 </opt>
2354
 
2355
Would normally be read into a structure like this:
2356
 
2357
  {
2358
    searchpath => {
2359
                    dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
2360
                  }
2361
  }
2362
 
2363
But when read in with the appropriate value for 'GroupTags':
2364
 
2365
  my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' });
2366
 
2367
It will return this simpler structure:
2368
 
2369
  {
2370
    searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
2371
  }
2372
 
2373
The grouping element (C<< <searchpath> >> in the example) must not contain any
2374
attributes or elements other than the grouped element.
2375
 
2376
You can specify multiple 'grouping element' to 'grouped element' mappings in
2377
the same hashref.  If this option is combined with C<KeyAttr>, the array
2378
folding will occur first and then the grouped element names will be eliminated.
2379
 
2380
C<XMLout> will also use the grouptag mappings to re-introduce the tags around
2381
the grouped elements.  Beware though that this will occur in all places that
2382
the 'grouping tag' name occurs - you probably don't want to use the same name
2383
for elements as well as attributes.
2384
 
2385
=head2 Handler => object_ref I<# out - SAX only>
2386
 
2387
Use the 'Handler' option to have C<XMLout()> generate SAX events rather than
2388
returning a string of XML.  For more details see L<"SAX SUPPORT"> below.
2389
 
2390
Note: the current implementation of this option generates a string of XML
2391
and uses a SAX parser to translate it into SAX events.  The normal encoding
2392
rules apply here - your data must be UTF8 encoded unless you specify an
2393
alternative encoding via the 'XMLDecl' option; and by the time the data reaches
2394
the handler object, it will be in UTF8 form regardless of the encoding you
2395
supply.  A future implementation of this option may generate the events
2396
directly.
2397
 
2398
=head2 KeepRoot => 1 I<# in+out - handy>
2399
 
2400
In its attempt to return a data structure free of superfluous detail and
2401
unnecessary levels of indirection, C<XMLin()> normally discards the root
2402
element name.  Setting the 'KeepRoot' option to '1' will cause the root element
2403
name to be retained.  So after executing this code:
2404
 
2405
  $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1)
2406
 
2407
You'll be able to reference the tempdir as
2408
C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default
2409
C<$config-E<gt>{tempdir}>.
2410
 
2411
Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the
2412
data structure already contains a root element name and it is not necessary to
2413
add another.
2414
 
2415
=head2 KeyAttr => [ list ] I<# in+out - important>
2416
 
2417
This option controls the 'array folding' feature which translates nested
2418
elements from an array to a hash.  It also controls the 'unfolding' of hashes
2419
to arrays.
2420
 
2421
For example, this XML:
2422
 
2423
    <opt>
2424
      <user login="grep" fullname="Gary R Epstein" />
2425
      <user login="stty" fullname="Simon T Tyson" />
2426
    </opt>
2427
 
2428
would, by default, parse to this:
2429
 
2430
    {
2431
      'user' => [
2432
                  {
2433
                    'login' => 'grep',
2434
                    'fullname' => 'Gary R Epstein'
2435
                  },
2436
                  {
2437
                    'login' => 'stty',
2438
                    'fullname' => 'Simon T Tyson'
2439
                  }
2440
                ]
2441
    }
2442
 
2443
If the option 'KeyAttr => "login"' were used to specify that the 'login'
2444
attribute is a key, the same XML would parse to:
2445
 
2446
    {
2447
      'user' => {
2448
                  'stty' => {
2449
                              'fullname' => 'Simon T Tyson'
2450
                            },
2451
                  'grep' => {
2452
                              'fullname' => 'Gary R Epstein'
2453
                            }
2454
                }
2455
    }
2456
 
2457
The key attribute names should be supplied in an arrayref if there is more
2458
than one.  C<XMLin()> will attempt to match attribute names in the order
2459
supplied.  C<XMLout()> will use the first attribute name supplied when
2460
'unfolding' a hash into an array.
2461
 
2462
Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id'].  If you do
2463
not want folding on input or unfolding on output you must set this option
2464
to an empty list to disable the feature.
2465
 
2466
Note 2: If you wish to use this option, you should also enable the
2467
C<ForceArray> option.  Without 'ForceArray', a single nested element will be
2468
rolled up into a scalar rather than an array and therefore will not be folded
2469
(since only arrays get folded).
2470
 
2471
=head2 KeyAttr => { list } I<# in+out - important>
2472
 
2473
This alternative (and preferred) method of specifiying the key attributes
2474
allows more fine grained control over which elements are folded and on which
2475
attributes.  For example the option 'KeyAttr => { package => 'id' } will cause
2476
any package elements to be folded on the 'id' attribute.  No other elements
2477
which have an 'id' attribute will be folded at all.
2478
 
2479
Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">)
2480
if this syntax is used and an element which does not have the specified key
2481
attribute is encountered (eg: a 'package' element without an 'id' attribute, to
2482
use the example above).  Warnings will only be generated if B<-w> is in force.
2483
 
2484
Two further variations are made possible by prefixing a '+' or a '-' character
2485
to the attribute name:
2486
 
2487
The option 'KeyAttr => { user => "+login" }' will cause this XML:
2488
 
2489
    <opt>
2490
      <user login="grep" fullname="Gary R Epstein" />
2491
      <user login="stty" fullname="Simon T Tyson" />
2492
    </opt>
2493
 
2494
to parse to this data structure:
2495
 
2496
    {
2497
      'user' => {
2498
                  'stty' => {
2499
                              'fullname' => 'Simon T Tyson',
2500
                              'login'    => 'stty'
2501
                            },
2502
                  'grep' => {
2503
                              'fullname' => 'Gary R Epstein',
2504
                              'login'    => 'grep'
2505
                            }
2506
                }
2507
    }
2508
 
2509
The '+' indicates that the value of the key attribute should be copied rather
2510
than moved to the folded hash key.
2511
 
2512
A '-' prefix would produce this result:
2513
 
2514
    {
2515
      'user' => {
2516
                  'stty' => {
2517
                              'fullname' => 'Simon T Tyson',
2518
                              '-login'    => 'stty'
2519
                            },
2520
                  'grep' => {
2521
                              'fullname' => 'Gary R Epstein',
2522
                              '-login'    => 'grep'
2523
                            }
2524
                }
2525
    }
2526
 
2527
As described earlier, C<XMLout> will ignore hash keys starting with a '-'.
2528
 
2529
=head2 NoAttr => 1 I<# in+out - handy>
2530
 
2531
When used with C<XMLout()>, the generated XML will contain no attributes.
2532
All hash key/values will be represented as nested elements instead.
2533
 
2534
When used with C<XMLin()>, any attributes in the XML will be ignored.
2535
 
2536
=head2 NoEscape => 1 I<# out - seldom used>
2537
 
2538
By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and
2539
'"' to '&lt;', '&gt;', '&amp;' and '&quot' respectively.  Use this option to
2540
suppress escaping (presumably because you've already escaped the data in some
2541
more sophisticated manner).
2542
 
2543
=head2 NoIndent => 1 I<# out - seldom used>
2544
 
2545
Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode.
2546
With this option enabled, the XML output will all be on one line (unless there
2547
are newlines in the data) - this may be easier for downstream processing.
2548
 
2549
=head2 NoSort => 1 I<# out - seldom used>
2550
 
2551
Newer versions of XML::Simple sort elements and attributes alphabetically (*),
2552
by default.  Enable this option to suppress the sorting - possibly for
2553
backwards compatibility.
2554
 
2555
* Actually, sorting is alphabetical but 'key' attribute or element names (as in
2556
'KeyAttr') sort first.  Also, when a hash of hashes is 'unfolded', the elements
2557
are sorted alphabetically by the value of the key field.
2558
 
2559
=head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy>
2560
 
2561
This option controls how whitespace in text content is handled.  Recognised
2562
values for the option are:
2563
 
2564
=over 4
2565
 
2566
=item *
2567
 
2568
 
2569
normalisation of whitespace in attribute values which is mandated by the XML
2570
recommendation)
2571
 
2572
=item *
2573
 
2574
1 = whitespace is normalised in any value used as a hash key (normalising means
2575
removing leading and trailing whitespace and collapsing sequences of whitespace
2576
characters to a single space)
2577
 
2578
=item *
2579
 
2580
2 = whitespace is normalised in all text content
2581
 
2582
=back
2583
 
2584
Note: you can spell this option with a 'z' if that is more natural for you.
2585
 
2586
=head2 NSExpand => 1 I<# in+out handy - SAX only>
2587
 
2588
This option controls namespace expansion - the translation of element and
2589
attribute names of the form 'prefix:name' to '{uri}name'.  For example the
2590
element name 'xsl:template' might be expanded to:
2591
'{http://www.w3.org/1999/XSL/Transform}template'.
2592
 
2593
By default, C<XMLin()> will return element names and attribute names exactly as
2594
they appear in the XML.  Setting this option to 1 will cause all element and
2595
attribute names to be expanded to include their namespace prefix.
2596
 
2597
I<Note: You must be using a SAX parser for this option to work (ie: it does not
2598
work with XML::Parser)>.
2599
 
2600
This option also controls whether C<XMLout()> performs the reverse translation
2601
from '{uri}name' back to 'prefix:name'.  The default is no translation.  If
2602
your data contains expanded names, you should set this option to 1 otherwise
2603
C<XMLout> will emit XML which is not well formed.
2604
 
2605
I<Note: You must have the XML::NamespaceSupport module installed if you want
2606
C<XMLout()> to translate URIs back to prefixes>.
2607
 
2608
=head2 NumericEscape => 0 | 1 | 2 I<# out - handy>
2609
 
2610
Use this option to have 'high' (non-ASCII) characters in your Perl data
2611
structure converted to numeric entities (eg: &#8364;) in the XML output.  Three
2612
levels are possible:
2613
 
2614
 
2615
 
2616
1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output
2617
 
2618
2 - all characters above 0x7F are escaped (good for plain ASCII output)
2619
 
2620
=head2 OutputFile => <file specifier> I<# out - handy>
2621
 
2622
The default behaviour of C<XMLout()> is to return the XML as a string.  If you
2623
wish to write the XML to a file, simply supply the filename using the
2624
'OutputFile' option.
2625
 
2626
This option also accepts an IO handle object - especially useful in Perl 5.8.0
2627
and later for output using an encoding other than UTF-8, eg:
2628
 
2629
  open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!";
2630
  XMLout($ref, OutputFile => $fh);
2631
 
2632
Note, XML::Simple does not require that the object you pass in to the
2633
OutputFile option inherits from L<IO::Handle> - it simply assumes the object
2634
supports a C<print> method.
2635
 
2636
=head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this>
2637
 
2638
I<Note: This option is now officially deprecated.  If you find it useful, email
2639
the author with an example of what you use it for.  Do not use this option to
2640
set the ProtocolEncoding, that's just plain wrong - fix the XML>.
2641
 
2642
This option allows you to pass parameters to the constructor of the underlying
2643
XML::Parser object (which of course assumes you're not using SAX).
2644
 
2645
=head2 RootName => 'string' I<# out - handy>
2646
 
2647
By default, when C<XMLout()> generates XML, the root element will be named
2648
'opt'.  This option allows you to specify an alternative name.
2649
 
2650
Specifying either undef or the empty string for the RootName option will
2651
produce XML with no root elements.  In most cases the resulting XML fragment
2652
will not be 'well formed' and therefore could not be read back in by C<XMLin()>.
2653
Nevertheless, the option has been found to be useful in certain circumstances.
2654
 
2655
=head2 SearchPath => [ list ] I<# in - handy>
2656
 
2657
If you pass C<XMLin()> a filename, but the filename include no directory
2658
component, you can use this option to specify which directories should be
2659
searched to locate the file.  You might use this option to search first in the
2660
user's home directory, then in a global directory such as /etc.
2661
 
2662
If a filename is provided to C<XMLin()> but SearchPath is not defined, the
2663
file is assumed to be in the current directory.
2664
 
2665
If the first parameter to C<XMLin()> is undefined, the default SearchPath
2666
will contain only the directory in which the script itself is located.
2667
Otherwise the default SearchPath will be empty.
2668
 
2669
=head2 StrictMode => 1 | 0  I<# in+out seldom used>
2670
 
2671
This option allows you to turn L<STRICT MODE> on or off for a particular call,
2672
regardless of whether it was enabled at the time XML::Simple was loaded.
2673
 
2674
=head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy>
2675
 
2676
This option controls what C<XMLin()> should do with empty elements (no
2677
attributes and no content).  The default behaviour is to represent them as
2678
empty hashes.  Setting this option to a true value (eg: 1) will cause empty
2679
elements to be skipped altogether.  Setting the option to 'undef' or the empty
2680
string will cause empty elements to be represented as the undefined value or
2681
the empty string respectively.  The latter two alternatives are a little
2682
easier to test for in your code than a hash with no keys.
2683
 
2684
The option also controls what C<XMLout()> does with undefined values.  Setting
2685
the option to undef causes undefined values to be output as empty elements
2686
(rather than empty attributes), it also suppresses the generation of warnings
2687
about undefined values.  Setting the option to a true value (eg: 1) causes
2688
undefined values to be skipped altogether on output.
2689
 
2690
=head2 ValueAttr => [ names ] I<# in - handy>
2691
 
2692
Use this option to deal elements which always have a single attribute and no
2693
content.  Eg:
2694
 
2695
  <opt>
2696
    <colour value="red" />
2697
    <size   value="XXL" />
2698
  </opt>
2699
 
2700
Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to:
2701
 
2702
  {
2703
    colour => 'red',
2704
    size   => 'XXL'
2705
  }
2706
 
2707
instead of this (the default):
2708
 
2709
  {
2710
    colour => { value => 'red' },
2711
    size   => { value => 'XXL' }
2712
  }
2713
 
2714
Note: This form of the ValueAttr option is not compatible with C<XMLout()> -
2715
since the attribute name is discarded at parse time, the original XML cannot be
2716
reconstructed.
2717
 
2718
=head2 ValueAttr => { element => attribute, ... } I<# in+out - handy>
2719
 
2720
This (preferred) form of the ValueAttr option requires you to specify both
2721
the element and the attribute names.  This is not only safer, it also allows
2722
the original XML to be reconstructed by C<XMLout()>.
2723
 
2724
Note: You probably don't want to use this option and the NoAttr option at the
2725
same time.
2726
 
2727
=head2 Variables => { name => value } I<# in - handy>
2728
 
2729
This option allows variables in the XML to be expanded when the file is read.
2730
(there is no facility for putting the variable names back if you regenerate
2731
XML using C<XMLout>).
2732
 
2733
A 'variable' is any text of the form C<${name}> which occurs in an attribute
2734
value or in the text content of an element.  If 'name' matches a key in the
2735
supplied hashref, C<${name}> will be replaced with the corresponding value from
2736
the hashref.  If no matching key is found, the variable will not be replaced.
2737
Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are
2738
allowed).
2739
 
2740
=head2 VarAttr => 'attr_name' I<# in - handy>
2741
 
2742
In addition to the variables defined using C<Variables>, this option allows
2743
variables to be defined in the XML.  A variable definition consists of an
2744
element with an attribute called 'attr_name' (the value of the C<VarAttr>
2745
option).  The value of the attribute will be used as the variable name and the
2746
text content of the element will be used as the value.  A variable defined in
2747
this way will override a variable defined using the C<Variables> option.  For
2748
example:
2749
 
2750
  XMLin( '<opt>
2751
            <dir name="prefix">/usr/local/apache</dir>
2752
            <dir name="exec_prefix">${prefix}</dir>
2753
            <dir name="bindir">${exec_prefix}/bin</dir>
2754
          </opt>',
2755
         VarAttr => 'name', ContentKey => '-content'
2756
        );
2757
 
2758
produces the following data structure:
2759
 
2760
  {
2761
    dir => {
2762
             prefix      => '/usr/local/apache',
2763
             exec_prefix => '/usr/local/apache',
2764
             bindir      => '/usr/local/apache/bin',
2765
           }
2766
  }
2767
 
2768
=head2 XMLDecl => 1  or  XMLDecl => 'string'  I<# out - handy>
2769
 
2770
If you want the output from C<XMLout()> to start with the optional XML
2771
declaration, simply set the option to '1'.  The default XML declaration is:
2772
 
2773
        <?xml version='1.0' standalone='yes'?>
2774
 
2775
If you want some other string (for example to declare an encoding value), set
2776
the value of this option to the complete string you require.
2777
 
2778
 
2779
=head1 OPTIONAL OO INTERFACE
2780
 
2781
The procedural interface is both simple and convenient however there are a
2782
couple of reasons why you might prefer to use the object oriented (OO)
2783
interface:
2784
 
2785
=over 4
2786
 
2787
=item *
2788
 
2789
to define a set of default values which should be used on all subsequent calls
2790
to C<XMLin()> or C<XMLout()>
2791
 
2792
=item *
2793
 
2794
to override methods in B<XML::Simple> to provide customised behaviour
2795
 
2796
=back
2797
 
2798
The default values for the options described above are unlikely to suit
2799
everyone.  The OO interface allows you to effectively override B<XML::Simple>'s
2800
defaults with your preferred values.  It works like this:
2801
 
2802
First create an XML::Simple parser object with your preferred defaults:
2803
 
2804
  my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1);
2805
 
2806
then call C<XMLin()> or C<XMLout()> as a method of that object:
2807
 
2808
  my $ref = $xs->XMLin($xml);
2809
  my $xml = $xs->XMLout($ref);
2810
 
2811
You can also specify options when you make the method calls and these values
2812
will be merged with the values specified when the object was created.  Values
2813
specified in a method call take precedence.
2814
 
2815
Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be
2816
called as C<xml_in()> or C<xml_out()>.  The method names are aliased so the
2817
only difference is the aesthetics.
2818
 
2819
=head2 Parsing Methods
2820
 
2821
You can explicitly call one of the following methods rather than rely on the
2822
C<xml_in()> method automatically determining whether the target to be parsed is
2823
a string, a file or a filehandle:
2824
 
2825
=over 4
2826
 
2827
=item parse_string(text)
2828
 
2829
Works exactly like the C<xml_in()> method but assumes the first argument is
2830
a string of XML (or a reference to a scalar containing a string of XML).
2831
 
2832
=item parse_file(filename)
2833
 
2834
Works exactly like the C<xml_in()> method but assumes the first argument is
2835
the name of a file containing XML.
2836
 
2837
=item parse_fh(file_handle)
2838
 
2839
Works exactly like the C<xml_in()> method but assumes the first argument is
2840
a filehandle which can be read to get XML.
2841
 
2842
=back
2843
 
2844
=head2 Hook Methods
2845
 
2846
You can make your own class which inherits from XML::Simple and overrides
2847
certain behaviours.  The following methods may provide useful 'hooks' upon
2848
which to hang your modified behaviour.  You may find other undocumented methods
2849
by examining the source, but those may be subject to change in future releases.
2850
 
2851
=over 4
2852
 
2853
=item handle_options(direction, name => value ...)
2854
 
2855
This method will be called when one of the parsing methods or the C<XMLout()>
2856
method is called.  The initial argument will be a string (either 'in' or 'out')
2857
and the remaining arguments will be name value pairs.
2858
 
2859
=item default_config_file()
2860
 
2861
Calculates and returns the name of the file which should be parsed if no
2862
filename is passed to C<XMLin()> (default: C<$0.xml>).
2863
 
2864
=item build_simple_tree(filename, string)
2865
 
2866
Called from C<XMLin()> or any of the parsing methods.  Takes either a file name
2867
as the first argument or C<undef> followed by a 'string' as the second
2868
argument.  Returns a simple tree data structure.  You could override this
2869
method to apply your own transformations before the data structure is returned
2870
to the caller.
2871
 
2872
=item new_hashref()
2873
 
2874
When the 'simple tree' data structure is being built, this method will be
2875
called to create any required anonymous hashrefs.
2876
 
2877
=item sorted_keys(name, hashref)
2878
 
2879
Called when C<XMLout()> is translating a hashref to XML.  This routine returns
2880
a list of hash keys in the order that the corresponding attributes/elements
2881
should appear in the output.
2882
 
2883
=item escape_value(string)
2884
 
2885
Called from C<XMLout()>, takes a string and returns a copy of the string with
2886
XML character escaping rules applied.
2887
 
2888
=item numeric_escape(string)
2889
 
2890
Called from C<escape_value()>, to handle non-ASCII characters (depending on the
2891
value of the NumericEscape option).
2892
 
2893
=item copy_hash(hashref, extra_key => value, ...)
2894
 
2895
Called from C<XMLout()>, when 'unfolding' a hash of hashes into an array of
2896
hashes.  You might wish to override this method if you're using tied hashes and
2897
don't want them to get untied.
2898
 
2899
=back
2900
 
2901
=head2 Cache Methods
2902
 
2903
XML::Simple implements three caching schemes ('storable', 'memshare' and
2904
'memcopy').  You can implement a custom caching scheme by implementing
2905
two methods - one for reading from the cache and one for writing to it.
2906
 
2907
For example, you might implement a new 'dbm' scheme that stores cached data
2908
structures using the L<MLDBM> module.  First, you would add a
2909
C<cache_read_dbm()> method which accepted a filename for use as a lookup key
2910
and returned a data structure on success, or undef on failure.  Then, you would
2911
implement a C<cache_read_dbm()> method which accepted a data structure and a
2912
filename.
2913
 
2914
You would use this caching scheme by specifying the option:
2915
 
2916
  Cache => [ 'dbm' ]
2917
 
2918
=head1 STRICT MODE
2919
 
2920
If you import the B<XML::Simple> routines like this:
2921
 
2922
  use XML::Simple qw(:strict);
2923
 
2924
the following common mistakes will be detected and treated as fatal errors
2925
 
2926
=over 4
2927
 
2928
=item *
2929
 
2930
Failing to explicitly set the C<KeyAttr> option - if you can't be bothered
2931
reading about this option, turn it off with: KeyAttr => [ ]
2932
 
2933
=item *
2934
 
2935
Failing to explicitly set the C<ForceArray> option - if you can't be bothered
2936
reading about this option, set it to the safest mode with: ForceArray => 1
2937
 
2938
=item *
2939
 
2940
Setting ForceArray to an array, but failing to list all the elements from the
2941
KeyAttr hash.
2942
 
2943
=item *
2944
 
2945
Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains
2946
one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested
2947
element).  Note: if strict mode is not set but -w is, this condition triggers a
2948
warning.
2949
 
2950
=item *
2951
 
2952
Data error - as above, but non-unique values are present in the key attribute
2953
(eg: more than one E<lt>partE<gt> element with the same partnum).  This will
2954
also trigger a warning if strict mode is not enabled.
2955
 
2956
=item *
2957
 
2958
Data error - as above, but value of key attribute (eg: partnum) is not a
2959
scalar string (due to nested elements etc).  This will also trigger a warning
2960
if strict mode is not enabled.
2961
 
2962
=back
2963
 
2964
=head1 SAX SUPPORT
2965
 
2966
From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API
2967
for XML) - specifically SAX2.
2968
 
2969
In a typical SAX application, an XML parser (or SAX 'driver') module generates
2970
SAX events (start of element, character data, end of element, etc) as it parses
2971
an XML document and a 'handler' module processes the events to extract the
2972
required data.  This simple model allows for some interesting and powerful
2973
possibilities:
2974
 
2975
=over 4
2976
 
2977
=item *
2978
 
2979
Applications written to the SAX API can extract data from huge XML documents
2980
without the memory overheads of a DOM or tree API.
2981
 
2982
=item *
2983
 
2984
The SAX API allows for plug and play interchange of parser modules without
2985
having to change your code to fit a new module's API.  A number of SAX parsers
2986
are available with capabilities ranging from extreme portability to blazing
2987
performance.
2988
 
2989
=item *
2990
 
2991
A SAX 'filter' module can implement both a handler interface for receiving
2992
data and a generator interface for passing modified data on to a downstream
2993
handler.  Filters can be chained together in 'pipelines'.
2994
 
2995
=item *
2996
 
2997
One filter module might split a data stream to direct data to two or more
2998
downstream handlers.
2999
 
3000
=item *
3001
 
3002
Generating SAX events is not the exclusive preserve of XML parsing modules.
3003
For example, a module might extract data from a relational database using DBI
3004
and pass it on to a SAX pipeline for filtering and formatting.
3005
 
3006
=back
3007
 
3008
B<XML::Simple> can operate at either end of a SAX pipeline.  For example,
3009
you can take a data structure in the form of a hashref and pass it into a
3010
SAX pipeline using the 'Handler' option on C<XMLout()>:
3011
 
3012
  use XML::Simple;
3013
  use Some::SAX::Filter;
3014
  use XML::SAX::Writer;
3015
 
3016
  my $ref = {
3017
               ....   # your data here
3018
            };
3019
 
3020
  my $writer = XML::SAX::Writer->new();
3021
  my $filter = Some::SAX::Filter->new(Handler => $writer);
3022
  my $simple = XML::Simple->new(Handler => $filter);
3023
  $simple->XMLout($ref);
3024
 
3025
You can also put B<XML::Simple> at the opposite end of the pipeline to take
3026
advantage of the simple 'tree' data structure once the relevant data has been
3027
isolated through filtering:
3028
 
3029
  use XML::SAX;
3030
  use Some::SAX::Filter;
3031
  use XML::Simple;
3032
 
3033
  my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']);
3034
  my $filter = Some::SAX::Filter->new(Handler => $simple);
3035
  my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
3036
 
3037
  my $ref = $parser->parse_uri('some_huge_file.xml');
3038
 
3039
  print $ref->{part}->{'555-1234'};
3040
 
3041
You can build a filter by using an XML::Simple object as a handler and setting
3042
its DataHandler option to point to a routine which takes the resulting tree,
3043
modifies it and sends it off as SAX events to a downstream handler:
3044
 
3045
  my $writer = XML::SAX::Writer->new();
3046
  my $filter = XML::Simple->new(
3047
                 DataHandler => sub {
3048
                                  my $simple = shift;
3049
                                  my $data = shift;
3050
 
3051
                                  # Modify $data here
3052
 
3053
                                  $simple->XMLout($data, Handler => $writer);
3054
                                }
3055
               );
3056
  my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
3057
 
3058
  $parser->parse_uri($filename);
3059
 
3060
I<Note: In this last example, the 'Handler' option was specified in the call to
3061
C<XMLout()> but it could also have been specified in the constructor>.
3062
 
3063
=head1 ENVIRONMENT
3064
 
3065
If you don't care which parser module B<XML::Simple> uses then skip this
3066
section entirely (it looks more complicated than it really is).
3067
 
3068
B<XML::Simple> will default to using a B<SAX> parser if one is available or
3069
B<XML::Parser> if SAX is not available.
3070
 
3071
You can dictate which parser module is used by setting either the environment
3072
variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable
3073
$XML::Simple::PREFERRED_PARSER to contain the module name.  The following rules
3074
are used:
3075
 
3076
=over 4
3077
 
3078
=item *
3079
 
3080
The package variable takes precedence over the environment variable if both are defined.  To force B<XML::Simple> to ignore the environment settings and use
3081
its default rules, you can set the package variable to an empty string.
3082
 
3083
=item *
3084
 
3085
If the 'preferred parser' is set to the string 'XML::Parser', then
3086
L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not
3087
installed).
3088
 
3089
=item *
3090
 
3091
If the 'preferred parser' is set to some other value, then it is assumed to be
3092
the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory.>
3093
If L<XML::SAX> is not installed, or the requested parser module is not
3094
installed, then C<XMLin()> will die.
3095
 
3096
=item *
3097
 
3098
If the 'preferred parser' is not defined at all (the normal default
3099
state), an attempt will be made to load L<XML::SAX>.  If L<XML::SAX> is
3100
installed, then a parser module will be selected according to
3101
L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX
3102
parser installed).
3103
 
3104
=item *
3105
 
3106
if the 'preferred parser' is not defined and B<XML::SAX> is not
3107
installed, then B<XML::Parser> will be used.  C<XMLin()> will die if
3108
L<XML::Parser> is not installed.
3109
 
3110
=back
3111
 
3112
Note: The B<XML::SAX> distribution includes an XML parser written entirely in
3113
Perl.  It is very portable but it is not very fast.  You should consider
3114
installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your
3115
platform.
3116
 
3117
=head1 ERROR HANDLING
3118
 
3119
The XML standard is very clear on the issue of non-compliant documents.  An
3120
error in parsing any single element (for example a missing end tag) must cause
3121
the whole document to be rejected.  B<XML::Simple> will die with an appropriate
3122
message if it encounters a parsing error.
3123
 
3124
If dying is not appropriate for your application, you should arrange to call
3125
C<XMLin()> in an eval block and look for errors in $@.  eg:
3126
 
3127
    my $config = eval { XMLin() };
3128
    PopUpMessage($@) if($@);
3129
 
3130
Note, there is a common misconception that use of B<eval> will significantly
3131
slow down a script.  While that may be true when the code being eval'd is in a
3132
string, it is not true of code like the sample above.
3133
 
3134
=head1 EXAMPLES
3135
 
3136
When C<XMLin()> reads the following very simple piece of XML:
3137
 
3138
    <opt username="testuser" password="frodo"></opt>
3139
 
3140
it returns the following data structure:
3141
 
3142
    {
3143
      'username' => 'testuser',
3144
      'password' => 'frodo'
3145
    }
3146
 
3147
The identical result could have been produced with this alternative XML:
3148
 
3149
    <opt username="testuser" password="frodo" />
3150
 
3151
Or this (although see 'ForceArray' option for variations):
3152
 
3153
    <opt>
3154
      <username>testuser</username>
3155
      <password>frodo</password>
3156
    </opt>
3157
 
3158
Repeated nested elements are represented as anonymous arrays:
3159
 
3160
    <opt>
3161
      <person firstname="Joe" lastname="Smith">
3162
        <email>joe@smith.com</email>
3163
        <email>jsmith@yahoo.com</email>
3164
      </person>
3165
      <person firstname="Bob" lastname="Smith">
3166
        <email>bob@smith.com</email>
3167
      </person>
3168
    </opt>
3169
 
3170
    {
3171
      'person' => [
3172
                    {
3173
                      'email' => [
3174
                                   'joe@smith.com',
3175
                                   'jsmith@yahoo.com'
3176
                                 ],
3177
                      'firstname' => 'Joe',
3178
                      'lastname' => 'Smith'
3179
                    },
3180
                    {
3181
                      'email' => 'bob@smith.com',
3182
                      'firstname' => 'Bob',
3183
                      'lastname' => 'Smith'
3184
                    }
3185
                  ]
3186
    }
3187
 
3188
Nested elements with a recognised key attribute are transformed (folded) from
3189
an array into a hash keyed on the value of that attribute (see the C<KeyAttr>
3190
option):
3191
 
3192
    <opt>
3193
      <person key="jsmith" firstname="Joe" lastname="Smith" />
3194
      <person key="tsmith" firstname="Tom" lastname="Smith" />
3195
      <person key="jbloggs" firstname="Joe" lastname="Bloggs" />
3196
    </opt>
3197
 
3198
    {
3199
      'person' => {
3200
                    'jbloggs' => {
3201
                                   'firstname' => 'Joe',
3202
                                   'lastname' => 'Bloggs'
3203
                                 },
3204
                    'tsmith' => {
3205
                                  'firstname' => 'Tom',
3206
                                  'lastname' => 'Smith'
3207
                                },
3208
                    'jsmith' => {
3209
                                  'firstname' => 'Joe',
3210
                                  'lastname' => 'Smith'
3211
                                }
3212
                  }
3213
    }
3214
 
3215
 
3216
The <anon> tag can be used to form anonymous arrays:
3217
 
3218
    <opt>
3219
      <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head>
3220
      <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data>
3221
      <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data>
3222
      <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data>
3223
    </opt>
3224
 
3225
    {
3226
      'head' => [
3227
                  [ 'Col 1', 'Col 2', 'Col 3' ]
3228
                ],
3229
      'data' => [
3230
                  [ 'R1C1', 'R1C2', 'R1C3' ],
3231
                  [ 'R2C1', 'R2C2', 'R2C3' ],
3232
                  [ 'R3C1', 'R3C2', 'R3C3' ]
3233
                ]
3234
    }
3235
 
3236
Anonymous arrays can be nested to arbirtrary levels and as a special case, if
3237
the surrounding tags for an XML document contain only an anonymous array the
3238
arrayref will be returned directly rather than the usual hashref:
3239
 
3240
    <opt>
3241
      <anon><anon>Col 1</anon><anon>Col 2</anon></anon>
3242
      <anon><anon>R1C1</anon><anon>R1C2</anon></anon>
3243
      <anon><anon>R2C1</anon><anon>R2C2</anon></anon>
3244
    </opt>
3245
 
3246
    [
3247
      [ 'Col 1', 'Col 2' ],
3248
      [ 'R1C1', 'R1C2' ],
3249
      [ 'R2C1', 'R2C2' ]
3250
    ]
3251
 
3252
Elements which only contain text content will simply be represented as a
3253
scalar.  Where an element has both attributes and text content, the element
3254
will be represented as a hashref with the text content in the 'content' key
3255
(see the C<ContentKey> option):
3256
 
3257
  <opt>
3258
    <one>first</one>
3259
    <two attr="value">second</two>
3260
  </opt>
3261
 
3262
  {
3263
    'one' => 'first',
3264
    'two' => { 'attr' => 'value', 'content' => 'second' }
3265
  }
3266
 
3267
Mixed content (elements which contain both text content and nested elements)
3268
will be not be represented in a useful way - element order and significant
3269
whitespace will be lost.  If you need to work with mixed content, then
3270
XML::Simple is not the right tool for your job - check out the next section.
3271
 
3272
=head1 WHERE TO FROM HERE?
3273
 
3274
B<XML::Simple> is able to present a simple API because it makes some
3275
assumptions on your behalf.  These include:
3276
 
3277
=over 4
3278
 
3279
=item *
3280
 
3281
You're not interested in text content consisting only of whitespace
3282
 
3283
=item *
3284
 
3285
You don't mind that when things get slurped into a hash the order is lost
3286
 
3287
=item *
3288
 
3289
You don't want fine-grained control of the formatting of generated XML
3290
 
3291
=item *
3292
 
3293
You would never use a hash key that was not a legal XML element name
3294
 
3295
=item *
3296
 
3297
You don't need help converting between different encodings
3298
 
3299
=back
3300
 
3301
In a serious XML project, you'll probably outgrow these assumptions fairly
3302
quickly.  This section of the document used to offer some advice on chosing a
3303
more powerful option.  That advice has now grown into the 'Perl-XML FAQ'
3304
document which you can find at: L<http://perl-xml.sourceforge.net/faq/>
3305
 
3306
The advice in the FAQ boils down to a quick explanation of tree versus
3307
event based parsers and then recommends:
3308
 
3309
For event based parsing, use SAX (do not set out to write any new code for
3310
XML::Parser's handler API - it is obselete).
3311
 
3312
For tree-based parsing, you could choose between the 'Perlish' approach of
3313
L<XML::Twig> and more standards based DOM implementations - preferably one with
3314
XPath support such as L<XML::LibXML>.
3315
 
3316
 
3317
=head1 SEE ALSO
3318
 
3319
B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>.
3320
 
3321
To generate documents with namespaces, L<XML::NamespaceSupport> is required.
3322
 
3323
The optional caching functions require L<Storable>.
3324
 
3325
Answers to Frequently Asked Questions about XML::Simple are bundled with this
3326
distribution as: L<XML::Simple::FAQ>
3327
 
3328
=head1 COPYRIGHT
3329
 
3330
Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt>
3331
 
3332
This library is free software; you can redistribute it and/or modify it
3333
under the same terms as Perl itself.
3334
 
3335
=cut
3336
 
3337