| 4384 |
dpurdie |
1 |
package XML::Simple;
|
|
|
2 |
BEGIN {
|
|
|
3 |
$XML::Simple::VERSION = '2.20';
|
|
|
4 |
}
|
|
|
5 |
|
|
|
6 |
=head1 NAME
|
|
|
7 |
|
|
|
8 |
XML::Simple - Easily read/write XML (esp config files)
|
|
|
9 |
|
|
|
10 |
=head1 SYNOPSIS
|
|
|
11 |
|
|
|
12 |
use XML::Simple qw(:strict);
|
|
|
13 |
|
|
|
14 |
my $ref = XMLin([<xml file or string>] [, <options>]);
|
|
|
15 |
|
|
|
16 |
my $xml = XMLout($hashref [, <options>]);
|
|
|
17 |
|
|
|
18 |
Or the object oriented way:
|
|
|
19 |
|
|
|
20 |
require XML::Simple qw(:strict);
|
|
|
21 |
|
|
|
22 |
my $xs = XML::Simple->new([<options>]);
|
|
|
23 |
|
|
|
24 |
my $ref = $xs->XMLin([<xml file or string>] [, <options>]);
|
|
|
25 |
|
|
|
26 |
my $xml = $xs->XMLout($hashref [, <options>]);
|
|
|
27 |
|
|
|
28 |
(or see L<"SAX SUPPORT"> for 'the SAX way').
|
|
|
29 |
|
|
|
30 |
Note, in these examples, the square brackets are used to denote optional items
|
|
|
31 |
not to imply items should be supplied in arrayrefs.
|
|
|
32 |
|
|
|
33 |
=cut
|
|
|
34 |
|
|
|
35 |
# See after __END__ for more POD documentation
|
|
|
36 |
|
|
|
37 |
|
|
|
38 |
# Load essentials here, other modules loaded on demand later
|
|
|
39 |
|
|
|
40 |
use strict;
|
|
|
41 |
use Carp;
|
|
|
42 |
require Exporter;
|
|
|
43 |
|
|
|
44 |
|
|
|
45 |
##############################################################################
|
|
|
46 |
# Define some constants
|
|
|
47 |
#
|
|
|
48 |
|
|
|
49 |
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER);
|
|
|
50 |
|
|
|
51 |
@ISA = qw(Exporter);
|
|
|
52 |
@EXPORT = qw(XMLin XMLout);
|
|
|
53 |
@EXPORT_OK = qw(xml_in xml_out);
|
|
|
54 |
$PREFERRED_PARSER = undef;
|
|
|
55 |
|
|
|
56 |
my %StrictMode = ();
|
|
|
57 |
|
|
|
58 |
my @KnownOptIn = qw(keyattr keeproot forcecontent contentkey noattr
|
|
|
59 |
searchpath forcearray cache suppressempty parseropts
|
|
|
60 |
grouptags nsexpand datahandler varattr variables
|
|
|
61 |
normalisespace normalizespace valueattr strictmode);
|
|
|
62 |
|
|
|
63 |
my @KnownOptOut = qw(keyattr keeproot contentkey noattr
|
|
|
64 |
rootname xmldecl outputfile noescape suppressempty
|
|
|
65 |
grouptags nsexpand handler noindent attrindent nosort
|
|
|
66 |
valueattr numericescape strictmode);
|
|
|
67 |
|
|
|
68 |
my @DefKeyAttr = qw(name key id);
|
|
|
69 |
my $DefRootName = qq(opt);
|
|
|
70 |
my $DefContentKey = qq(content);
|
|
|
71 |
my $DefXmlDecl = qq(<?xml version='1.0' standalone='yes'?>);
|
|
|
72 |
|
|
|
73 |
my $xmlns_ns = 'http://www.w3.org/2000/xmlns/';
|
|
|
74 |
my $bad_def_ns_jcn = '{' . $xmlns_ns . '}'; # LibXML::SAX workaround
|
|
|
75 |
|
|
|
76 |
|
|
|
77 |
##############################################################################
|
|
|
78 |
# Globals for use by caching routines
|
|
|
79 |
#
|
|
|
80 |
|
|
|
81 |
my %MemShareCache = ();
|
|
|
82 |
my %MemCopyCache = ();
|
|
|
83 |
|
|
|
84 |
|
|
|
85 |
##############################################################################
|
|
|
86 |
# Wrapper for Exporter - handles ':strict'
|
|
|
87 |
#
|
|
|
88 |
|
|
|
89 |
sub import {
|
|
|
90 |
# Handle the :strict tag
|
|
|
91 |
|
|
|
92 |
my($calling_package) = caller();
|
|
|
93 |
_strict_mode_for_caller(1) if grep(/^:strict$/, @_);
|
|
|
94 |
|
|
|
95 |
# Pass everything else to Exporter.pm
|
|
|
96 |
|
|
|
97 |
@_ = grep(!/^:strict$/, @_);
|
|
|
98 |
goto &Exporter::import;
|
|
|
99 |
}
|
|
|
100 |
|
|
|
101 |
|
|
|
102 |
##############################################################################
|
|
|
103 |
# Constructor for optional object interface.
|
|
|
104 |
#
|
|
|
105 |
|
|
|
106 |
sub new {
|
|
|
107 |
my $class = shift;
|
|
|
108 |
|
|
|
109 |
if(@_ % 2) {
|
|
|
110 |
croak "Default options must be name=>value pairs (odd number supplied)";
|
|
|
111 |
}
|
|
|
112 |
|
|
|
113 |
my %known_opt;
|
|
|
114 |
@known_opt{@KnownOptIn, @KnownOptOut} = ();
|
|
|
115 |
|
|
|
116 |
my %raw_opt = @_;
|
|
|
117 |
$raw_opt{strictmode} = _strict_mode_for_caller()
|
|
|
118 |
unless exists $raw_opt{strictmode};
|
|
|
119 |
my %def_opt;
|
|
|
120 |
while(my($key, $val) = each %raw_opt) {
|
|
|
121 |
my $lkey = lc($key);
|
|
|
122 |
$lkey =~ s/_//g;
|
|
|
123 |
croak "Unrecognised option: $key" unless(exists($known_opt{$lkey}));
|
|
|
124 |
$def_opt{$lkey} = $val;
|
|
|
125 |
}
|
|
|
126 |
my $self = { def_opt => \%def_opt };
|
|
|
127 |
|
|
|
128 |
return(bless($self, $class));
|
|
|
129 |
}
|
|
|
130 |
|
|
|
131 |
|
|
|
132 |
##############################################################################
|
|
|
133 |
# Sub: _strict_mode_for_caller()
|
|
|
134 |
#
|
|
|
135 |
# Gets or sets the XML::Simple :strict mode flag for the calling namespace.
|
|
|
136 |
# Walks back through call stack to find the calling namespace and sets the
|
|
|
137 |
# :strict mode flag for that namespace if an argument was supplied and returns
|
|
|
138 |
# the flag value if not.
|
|
|
139 |
#
|
|
|
140 |
|
|
|
141 |
sub _strict_mode_for_caller {
|
|
|
142 |
my $set_mode = @_;
|
|
|
143 |
my $frame = 1;
|
|
|
144 |
while(my($package) = caller($frame++)) {
|
|
|
145 |
next if $package eq 'XML::Simple';
|
|
|
146 |
$StrictMode{$package} = 1 if $set_mode;
|
|
|
147 |
return $StrictMode{$package};
|
|
|
148 |
}
|
|
|
149 |
return(0);
|
|
|
150 |
}
|
|
|
151 |
|
|
|
152 |
|
|
|
153 |
##############################################################################
|
|
|
154 |
# Sub: _get_object()
|
|
|
155 |
#
|
|
|
156 |
# Helper routine called from XMLin() and XMLout() to create an object if none
|
|
|
157 |
# was provided. Note, this routine does mess with the caller's @_ array.
|
|
|
158 |
#
|
|
|
159 |
|
|
|
160 |
sub _get_object {
|
|
|
161 |
my $self;
|
|
|
162 |
if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) {
|
|
|
163 |
$self = shift;
|
|
|
164 |
}
|
|
|
165 |
else {
|
|
|
166 |
$self = XML::Simple->new();
|
|
|
167 |
}
|
|
|
168 |
|
|
|
169 |
return $self;
|
|
|
170 |
}
|
|
|
171 |
|
|
|
172 |
|
|
|
173 |
##############################################################################
|
|
|
174 |
# Sub/Method: XMLin()
|
|
|
175 |
#
|
|
|
176 |
# Exported routine for slurping XML into a hashref - see pod for info.
|
|
|
177 |
#
|
|
|
178 |
# May be called as object method or as a plain function.
|
|
|
179 |
#
|
|
|
180 |
# Expects one arg for the source XML, optionally followed by a number of
|
|
|
181 |
# name => value option pairs.
|
|
|
182 |
#
|
|
|
183 |
|
|
|
184 |
sub XMLin {
|
|
|
185 |
my $self = &_get_object; # note, @_ is passed implicitly
|
|
|
186 |
|
|
|
187 |
my $target = shift;
|
|
|
188 |
|
|
|
189 |
|
|
|
190 |
# Work out whether to parse a string, a file or a filehandle
|
|
|
191 |
|
|
|
192 |
if(not defined $target) {
|
|
|
193 |
return $self->parse_file(undef, @_);
|
|
|
194 |
}
|
|
|
195 |
|
|
|
196 |
elsif($target eq '-') {
|
|
|
197 |
local($/) = undef;
|
|
|
198 |
$target = <STDIN>;
|
|
|
199 |
return $self->parse_string(\$target, @_);
|
|
|
200 |
}
|
|
|
201 |
|
|
|
202 |
elsif(my $type = ref($target)) {
|
|
|
203 |
if($type eq 'SCALAR') {
|
|
|
204 |
return $self->parse_string($target, @_);
|
|
|
205 |
}
|
|
|
206 |
else {
|
|
|
207 |
return $self->parse_fh($target, @_);
|
|
|
208 |
}
|
|
|
209 |
}
|
|
|
210 |
|
|
|
211 |
elsif($target =~ m{<.*?>}s) {
|
|
|
212 |
return $self->parse_string(\$target, @_);
|
|
|
213 |
}
|
|
|
214 |
|
|
|
215 |
else {
|
|
|
216 |
return $self->parse_file($target, @_);
|
|
|
217 |
}
|
|
|
218 |
}
|
|
|
219 |
|
|
|
220 |
|
|
|
221 |
##############################################################################
|
|
|
222 |
# Sub/Method: parse_file()
|
|
|
223 |
#
|
|
|
224 |
# Same as XMLin, but only parses from a named file.
|
|
|
225 |
#
|
|
|
226 |
|
|
|
227 |
sub parse_file {
|
|
|
228 |
my $self = &_get_object; # note, @_ is passed implicitly
|
|
|
229 |
|
|
|
230 |
my $filename = shift;
|
|
|
231 |
|
|
|
232 |
$self->handle_options('in', @_);
|
|
|
233 |
|
|
|
234 |
$filename = $self->default_config_file if not defined $filename;
|
|
|
235 |
|
|
|
236 |
$filename = $self->find_xml_file($filename, @{$self->{opt}->{searchpath}});
|
|
|
237 |
|
|
|
238 |
# Check cache for previous parse
|
|
|
239 |
|
|
|
240 |
if($self->{opt}->{cache}) {
|
|
|
241 |
foreach my $scheme (@{$self->{opt}->{cache}}) {
|
|
|
242 |
my $method = 'cache_read_' . $scheme;
|
|
|
243 |
my $opt = $self->$method($filename);
|
|
|
244 |
return($opt) if($opt);
|
|
|
245 |
}
|
|
|
246 |
}
|
|
|
247 |
|
|
|
248 |
my $ref = $self->build_simple_tree($filename, undef);
|
|
|
249 |
|
|
|
250 |
if($self->{opt}->{cache}) {
|
|
|
251 |
my $method = 'cache_write_' . $self->{opt}->{cache}->[0];
|
|
|
252 |
$self->$method($ref, $filename);
|
|
|
253 |
}
|
|
|
254 |
|
|
|
255 |
return $ref;
|
|
|
256 |
}
|
|
|
257 |
|
|
|
258 |
|
|
|
259 |
##############################################################################
|
|
|
260 |
# Sub/Method: parse_fh()
|
|
|
261 |
#
|
|
|
262 |
# Same as XMLin, but only parses from a filehandle.
|
|
|
263 |
#
|
|
|
264 |
|
|
|
265 |
sub parse_fh {
|
|
|
266 |
my $self = &_get_object; # note, @_ is passed implicitly
|
|
|
267 |
|
|
|
268 |
my $fh = shift;
|
|
|
269 |
croak "Can't use " . (defined $fh ? qq{string ("$fh")} : 'undef') .
|
|
|
270 |
" as a filehandle" unless ref $fh;
|
|
|
271 |
|
|
|
272 |
$self->handle_options('in', @_);
|
|
|
273 |
|
|
|
274 |
return $self->build_simple_tree(undef, $fh);
|
|
|
275 |
}
|
|
|
276 |
|
|
|
277 |
|
|
|
278 |
##############################################################################
|
|
|
279 |
# Sub/Method: parse_string()
|
|
|
280 |
#
|
|
|
281 |
# Same as XMLin, but only parses from a string or a reference to a string.
|
|
|
282 |
#
|
|
|
283 |
|
|
|
284 |
sub parse_string {
|
|
|
285 |
my $self = &_get_object; # note, @_ is passed implicitly
|
|
|
286 |
|
|
|
287 |
my $string = shift;
|
|
|
288 |
|
|
|
289 |
$self->handle_options('in', @_);
|
|
|
290 |
|
|
|
291 |
return $self->build_simple_tree(undef, ref $string ? $string : \$string);
|
|
|
292 |
}
|
|
|
293 |
|
|
|
294 |
|
|
|
295 |
##############################################################################
|
|
|
296 |
# Method: default_config_file()
|
|
|
297 |
#
|
|
|
298 |
# Returns the name of the XML file to parse if no filename (or XML string)
|
|
|
299 |
# was provided.
|
|
|
300 |
#
|
|
|
301 |
|
|
|
302 |
sub default_config_file {
|
|
|
303 |
my $self = shift;
|
|
|
304 |
|
|
|
305 |
require File::Basename;
|
|
|
306 |
|
|
|
307 |
my($basename, $script_dir, $ext) = File::Basename::fileparse($0, '\.[^\.]+');
|
|
|
308 |
|
|
|
309 |
# Add script directory to searchpath
|
|
|
310 |
|
|
|
311 |
if($script_dir) {
|
|
|
312 |
unshift(@{$self->{opt}->{searchpath}}, $script_dir);
|
|
|
313 |
}
|
|
|
314 |
|
|
|
315 |
return $basename . '.xml';
|
|
|
316 |
}
|
|
|
317 |
|
|
|
318 |
|
|
|
319 |
##############################################################################
|
|
|
320 |
# Method: build_simple_tree()
|
|
|
321 |
#
|
|
|
322 |
# Builds a 'tree' data structure as provided by XML::Parser and then
|
|
|
323 |
# 'simplifies' it as specified by the various options in effect.
|
|
|
324 |
#
|
|
|
325 |
|
|
|
326 |
sub build_simple_tree {
|
|
|
327 |
my $self = shift;
|
|
|
328 |
|
|
|
329 |
my $tree = $self->build_tree(@_);
|
|
|
330 |
|
|
|
331 |
return $self->{opt}->{keeproot}
|
|
|
332 |
? $self->collapse({}, @$tree)
|
|
|
333 |
: $self->collapse(@{$tree->[1]});
|
|
|
334 |
}
|
|
|
335 |
|
|
|
336 |
|
|
|
337 |
##############################################################################
|
|
|
338 |
# Method: build_tree()
|
|
|
339 |
#
|
|
|
340 |
# This routine will be called if there is no suitable pre-parsed tree in a
|
|
|
341 |
# cache. It parses the XML and returns an XML::Parser 'Tree' style data
|
|
|
342 |
# structure (summarised in the comments for the collapse() routine below).
|
|
|
343 |
#
|
|
|
344 |
# XML::Simple requires the services of another module that knows how to parse
|
|
|
345 |
# XML. If XML::SAX is installed, the default SAX parser will be used,
|
|
|
346 |
# otherwise XML::Parser will be used.
|
|
|
347 |
#
|
|
|
348 |
# This routine expects to be passed a filename as argument 1 or a 'string' as
|
|
|
349 |
# argument 2. The 'string' might be a string of XML (passed by reference to
|
|
|
350 |
# save memory) or it might be a reference to an IO::Handle. (This
|
|
|
351 |
# non-intuitive mess results in part from the way XML::Parser works but that's
|
|
|
352 |
# really no excuse).
|
|
|
353 |
#
|
|
|
354 |
|
|
|
355 |
sub build_tree {
|
|
|
356 |
my $self = shift;
|
|
|
357 |
my $filename = shift;
|
|
|
358 |
my $string = shift;
|
|
|
359 |
|
|
|
360 |
|
|
|
361 |
my $preferred_parser = $PREFERRED_PARSER;
|
|
|
362 |
unless(defined($preferred_parser)) {
|
|
|
363 |
$preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || '';
|
|
|
364 |
}
|
|
|
365 |
if($preferred_parser eq 'XML::Parser') {
|
|
|
366 |
return($self->build_tree_xml_parser($filename, $string));
|
|
|
367 |
}
|
|
|
368 |
|
|
|
369 |
eval { require XML::SAX; }; # We didn't need it until now
|
|
|
370 |
if($@) { # No XML::SAX - fall back to XML::Parser
|
|
|
371 |
if($preferred_parser) { # unless a SAX parser was expressly requested
|
|
|
372 |
croak "XMLin() could not load XML::SAX";
|
|
|
373 |
}
|
|
|
374 |
return($self->build_tree_xml_parser($filename, $string));
|
|
|
375 |
}
|
|
|
376 |
|
|
|
377 |
$XML::SAX::ParserPackage = $preferred_parser if($preferred_parser);
|
|
|
378 |
|
|
|
379 |
my $sp = XML::SAX::ParserFactory->parser(Handler => $self);
|
|
|
380 |
|
|
|
381 |
$self->{nocollapse} = 1;
|
|
|
382 |
my($tree);
|
|
|
383 |
if($filename) {
|
|
|
384 |
$tree = $sp->parse_uri($filename);
|
|
|
385 |
}
|
|
|
386 |
else {
|
|
|
387 |
if(ref($string) && ref($string) ne 'SCALAR') {
|
|
|
388 |
$tree = $sp->parse_file($string);
|
|
|
389 |
}
|
|
|
390 |
else {
|
|
|
391 |
$tree = $sp->parse_string($$string);
|
|
|
392 |
}
|
|
|
393 |
}
|
|
|
394 |
|
|
|
395 |
return($tree);
|
|
|
396 |
}
|
|
|
397 |
|
|
|
398 |
|
|
|
399 |
##############################################################################
|
|
|
400 |
# Method: build_tree_xml_parser()
|
|
|
401 |
#
|
|
|
402 |
# This routine will be called if XML::SAX is not installed, or if XML::Parser
|
|
|
403 |
# was specifically requested. It takes the same arguments as build_tree() and
|
|
|
404 |
# returns the same data structure (XML::Parser 'Tree' style).
|
|
|
405 |
#
|
|
|
406 |
|
|
|
407 |
sub build_tree_xml_parser {
|
|
|
408 |
my $self = shift;
|
|
|
409 |
my $filename = shift;
|
|
|
410 |
my $string = shift;
|
|
|
411 |
|
|
|
412 |
|
|
|
413 |
eval {
|
|
|
414 |
local($^W) = 0; # Suppress warning from Expat.pm re File::Spec::load()
|
|
|
415 |
require XML::Parser; # We didn't need it until now
|
|
|
416 |
};
|
|
|
417 |
if($@) {
|
|
|
418 |
croak "XMLin() requires either XML::SAX or XML::Parser";
|
|
|
419 |
}
|
|
|
420 |
|
|
|
421 |
if($self->{opt}->{nsexpand}) {
|
|
|
422 |
carp "'nsexpand' option requires XML::SAX";
|
|
|
423 |
}
|
|
|
424 |
|
|
|
425 |
my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}});
|
|
|
426 |
my($tree);
|
|
|
427 |
if($filename) {
|
|
|
428 |
# $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl
|
|
|
429 |
open(my $xfh, '<', $filename) || croak qq($filename - $!);
|
|
|
430 |
$tree = $xp->parse($xfh);
|
|
|
431 |
}
|
|
|
432 |
else {
|
|
|
433 |
$tree = $xp->parse($$string);
|
|
|
434 |
}
|
|
|
435 |
|
|
|
436 |
return($tree);
|
|
|
437 |
}
|
|
|
438 |
|
|
|
439 |
|
|
|
440 |
##############################################################################
|
|
|
441 |
# Method: cache_write_storable()
|
|
|
442 |
#
|
|
|
443 |
# Wrapper routine for invoking Storable::nstore() to cache a parsed data
|
|
|
444 |
# structure.
|
|
|
445 |
#
|
|
|
446 |
|
|
|
447 |
sub cache_write_storable {
|
|
|
448 |
my($self, $data, $filename) = @_;
|
|
|
449 |
|
|
|
450 |
my $cachefile = $self->storable_filename($filename);
|
|
|
451 |
|
|
|
452 |
require Storable; # We didn't need it until now
|
|
|
453 |
|
|
|
454 |
if ('VMS' eq $^O) {
|
|
|
455 |
Storable::nstore($data, $cachefile);
|
|
|
456 |
}
|
|
|
457 |
else {
|
|
|
458 |
# If the following line fails for you, your Storable.pm is old - upgrade
|
|
|
459 |
Storable::lock_nstore($data, $cachefile);
|
|
|
460 |
}
|
|
|
461 |
|
|
|
462 |
}
|
|
|
463 |
|
|
|
464 |
|
|
|
465 |
##############################################################################
|
|
|
466 |
# Method: cache_read_storable()
|
|
|
467 |
#
|
|
|
468 |
# Wrapper routine for invoking Storable::retrieve() to read a cached parsed
|
|
|
469 |
# data structure. Only returns cached data if the cache file exists and is
|
|
|
470 |
# newer than the source XML file.
|
|
|
471 |
#
|
|
|
472 |
|
|
|
473 |
sub cache_read_storable {
|
|
|
474 |
my($self, $filename) = @_;
|
|
|
475 |
|
|
|
476 |
my $cachefile = $self->storable_filename($filename);
|
|
|
477 |
|
|
|
478 |
return unless(-r $cachefile);
|
|
|
479 |
return unless((stat($cachefile))[9] > (stat($filename))[9]);
|
|
|
480 |
|
|
|
481 |
require Storable; # We didn't need it until now
|
|
|
482 |
|
|
|
483 |
if ('VMS' eq $^O) {
|
|
|
484 |
return(Storable::retrieve($cachefile));
|
|
|
485 |
}
|
|
|
486 |
else {
|
|
|
487 |
return(Storable::lock_retrieve($cachefile));
|
|
|
488 |
}
|
|
|
489 |
|
|
|
490 |
}
|
|
|
491 |
|
|
|
492 |
|
|
|
493 |
##############################################################################
|
|
|
494 |
# Method: storable_filename()
|
|
|
495 |
#
|
|
|
496 |
# Translates the supplied source XML filename into a filename for the storable
|
|
|
497 |
# cached data. A '.stor' suffix is added after stripping an optional '.xml'
|
|
|
498 |
# suffix.
|
|
|
499 |
#
|
|
|
500 |
|
|
|
501 |
sub storable_filename {
|
|
|
502 |
my($self, $cachefile) = @_;
|
|
|
503 |
|
|
|
504 |
$cachefile =~ s{(\.xml)?$}{.stor};
|
|
|
505 |
return $cachefile;
|
|
|
506 |
}
|
|
|
507 |
|
|
|
508 |
|
|
|
509 |
##############################################################################
|
|
|
510 |
# Method: cache_write_memshare()
|
|
|
511 |
#
|
|
|
512 |
# Takes the supplied data structure reference and stores it away in a global
|
|
|
513 |
# hash structure.
|
|
|
514 |
#
|
|
|
515 |
|
|
|
516 |
sub cache_write_memshare {
|
|
|
517 |
my($self, $data, $filename) = @_;
|
|
|
518 |
|
|
|
519 |
$MemShareCache{$filename} = [time(), $data];
|
|
|
520 |
}
|
|
|
521 |
|
|
|
522 |
|
|
|
523 |
##############################################################################
|
|
|
524 |
# Method: cache_read_memshare()
|
|
|
525 |
#
|
|
|
526 |
# Takes a filename and looks in a global hash for a cached parsed version.
|
|
|
527 |
#
|
|
|
528 |
|
|
|
529 |
sub cache_read_memshare {
|
|
|
530 |
my($self, $filename) = @_;
|
|
|
531 |
|
|
|
532 |
return unless($MemShareCache{$filename});
|
|
|
533 |
return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]);
|
|
|
534 |
|
|
|
535 |
return($MemShareCache{$filename}->[1]);
|
|
|
536 |
|
|
|
537 |
}
|
|
|
538 |
|
|
|
539 |
|
|
|
540 |
##############################################################################
|
|
|
541 |
# Method: cache_write_memcopy()
|
|
|
542 |
#
|
|
|
543 |
# Takes the supplied data structure and stores a copy of it in a global hash
|
|
|
544 |
# structure.
|
|
|
545 |
#
|
|
|
546 |
|
|
|
547 |
sub cache_write_memcopy {
|
|
|
548 |
my($self, $data, $filename) = @_;
|
|
|
549 |
|
|
|
550 |
require Storable; # We didn't need it until now
|
|
|
551 |
|
|
|
552 |
$MemCopyCache{$filename} = [time(), Storable::dclone($data)];
|
|
|
553 |
}
|
|
|
554 |
|
|
|
555 |
|
|
|
556 |
##############################################################################
|
|
|
557 |
# Method: cache_read_memcopy()
|
|
|
558 |
#
|
|
|
559 |
# Takes a filename and looks in a global hash for a cached parsed version.
|
|
|
560 |
# Returns a reference to a copy of that data structure.
|
|
|
561 |
#
|
|
|
562 |
|
|
|
563 |
sub cache_read_memcopy {
|
|
|
564 |
my($self, $filename) = @_;
|
|
|
565 |
|
|
|
566 |
return unless($MemCopyCache{$filename});
|
|
|
567 |
return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]);
|
|
|
568 |
|
|
|
569 |
return(Storable::dclone($MemCopyCache{$filename}->[1]));
|
|
|
570 |
|
|
|
571 |
}
|
|
|
572 |
|
|
|
573 |
|
|
|
574 |
##############################################################################
|
|
|
575 |
# Sub/Method: XMLout()
|
|
|
576 |
#
|
|
|
577 |
# Exported routine for 'unslurping' a data structure out to XML.
|
|
|
578 |
#
|
|
|
579 |
# Expects a reference to a data structure and an optional list of option
|
|
|
580 |
# name => value pairs.
|
|
|
581 |
#
|
|
|
582 |
|
|
|
583 |
sub XMLout {
|
|
|
584 |
my $self = &_get_object; # note, @_ is passed implicitly
|
|
|
585 |
|
|
|
586 |
croak "XMLout() requires at least one argument" unless(@_);
|
|
|
587 |
my $ref = shift;
|
|
|
588 |
|
|
|
589 |
$self->handle_options('out', @_);
|
|
|
590 |
|
|
|
591 |
|
|
|
592 |
# If namespace expansion is set, XML::NamespaceSupport is required
|
|
|
593 |
|
|
|
594 |
if($self->{opt}->{nsexpand}) {
|
|
|
595 |
require XML::NamespaceSupport;
|
|
|
596 |
$self->{nsup} = XML::NamespaceSupport->new();
|
|
|
597 |
$self->{ns_prefix} = 'aaa';
|
|
|
598 |
}
|
|
|
599 |
|
|
|
600 |
|
|
|
601 |
# Wrap top level arrayref in a hash
|
|
|
602 |
|
|
|
603 |
if(UNIVERSAL::isa($ref, 'ARRAY')) {
|
|
|
604 |
$ref = { anon => $ref };
|
|
|
605 |
}
|
|
|
606 |
|
|
|
607 |
|
|
|
608 |
# Extract rootname from top level hash if keeproot enabled
|
|
|
609 |
|
|
|
610 |
if($self->{opt}->{keeproot}) {
|
|
|
611 |
my(@keys) = keys(%$ref);
|
|
|
612 |
if(@keys == 1) {
|
|
|
613 |
$ref = $ref->{$keys[0]};
|
|
|
614 |
$self->{opt}->{rootname} = $keys[0];
|
|
|
615 |
}
|
|
|
616 |
}
|
|
|
617 |
|
|
|
618 |
# Ensure there are no top level attributes if we're not adding root elements
|
|
|
619 |
|
|
|
620 |
elsif($self->{opt}->{rootname} eq '') {
|
|
|
621 |
if(UNIVERSAL::isa($ref, 'HASH')) {
|
|
|
622 |
my $refsave = $ref;
|
|
|
623 |
$ref = {};
|
|
|
624 |
foreach (keys(%$refsave)) {
|
|
|
625 |
if(ref($refsave->{$_})) {
|
|
|
626 |
$ref->{$_} = $refsave->{$_};
|
|
|
627 |
}
|
|
|
628 |
else {
|
|
|
629 |
$ref->{$_} = [ $refsave->{$_} ];
|
|
|
630 |
}
|
|
|
631 |
}
|
|
|
632 |
}
|
|
|
633 |
}
|
|
|
634 |
|
|
|
635 |
|
|
|
636 |
# Encode the hashref and write to file if necessary
|
|
|
637 |
|
|
|
638 |
$self->{_ancestors} = [];
|
|
|
639 |
my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, '');
|
|
|
640 |
delete $self->{_ancestors};
|
|
|
641 |
|
|
|
642 |
if($self->{opt}->{xmldecl}) {
|
|
|
643 |
$xml = $self->{opt}->{xmldecl} . "\n" . $xml;
|
|
|
644 |
}
|
|
|
645 |
|
|
|
646 |
if($self->{opt}->{outputfile}) {
|
|
|
647 |
if(ref($self->{opt}->{outputfile})) {
|
|
|
648 |
my $fh = $self->{opt}->{outputfile};
|
|
|
649 |
if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) {
|
|
|
650 |
eval { require IO::Handle; };
|
|
|
651 |
croak $@ if $@;
|
|
|
652 |
}
|
|
|
653 |
return($fh->print($xml));
|
|
|
654 |
}
|
|
|
655 |
else {
|
|
|
656 |
open(my $out, '>', "$self->{opt}->{outputfile}") ||
|
|
|
657 |
croak "open($self->{opt}->{outputfile}): $!";
|
|
|
658 |
binmode($out, ':utf8') if($] >= 5.008);
|
|
|
659 |
print $out $xml or croak "print: $!";
|
|
|
660 |
close $out or croak "close: $!";
|
|
|
661 |
}
|
|
|
662 |
}
|
|
|
663 |
elsif($self->{opt}->{handler}) {
|
|
|
664 |
require XML::SAX;
|
|
|
665 |
my $sp = XML::SAX::ParserFactory->parser(
|
|
|
666 |
Handler => $self->{opt}->{handler}
|
|
|
667 |
);
|
|
|
668 |
return($sp->parse_string($xml));
|
|
|
669 |
}
|
|
|
670 |
else {
|
|
|
671 |
return($xml);
|
|
|
672 |
}
|
|
|
673 |
}
|
|
|
674 |
|
|
|
675 |
|
|
|
676 |
##############################################################################
|
|
|
677 |
# Method: handle_options()
|
|
|
678 |
#
|
|
|
679 |
# Helper routine for both XMLin() and XMLout(). Both routines handle their
|
|
|
680 |
# first argument and assume all other args are options handled by this routine.
|
|
|
681 |
# Saves a hash of options in $self->{opt}.
|
|
|
682 |
#
|
|
|
683 |
# If default options were passed to the constructor, they will be retrieved
|
|
|
684 |
# here and merged with options supplied to the method call.
|
|
|
685 |
#
|
|
|
686 |
# First argument should be the string 'in' or the string 'out'.
|
|
|
687 |
#
|
|
|
688 |
# Remaining arguments should be name=>value pairs. Sets up default values
|
|
|
689 |
# for options not supplied. Unrecognised options are a fatal error.
|
|
|
690 |
#
|
|
|
691 |
|
|
|
692 |
sub handle_options {
|
|
|
693 |
my $self = shift;
|
|
|
694 |
my $dirn = shift;
|
|
|
695 |
|
|
|
696 |
|
|
|
697 |
# Determine valid options based on context
|
|
|
698 |
|
|
|
699 |
my %known_opt;
|
|
|
700 |
if($dirn eq 'in') {
|
|
|
701 |
@known_opt{@KnownOptIn} = @KnownOptIn;
|
|
|
702 |
}
|
|
|
703 |
else {
|
|
|
704 |
@known_opt{@KnownOptOut} = @KnownOptOut;
|
|
|
705 |
}
|
|
|
706 |
|
|
|
707 |
|
|
|
708 |
# Store supplied options in hashref and weed out invalid ones
|
|
|
709 |
|
|
|
710 |
if(@_ % 2) {
|
|
|
711 |
croak "Options must be name=>value pairs (odd number supplied)";
|
|
|
712 |
}
|
|
|
713 |
my %raw_opt = @_;
|
|
|
714 |
my $opt = {};
|
|
|
715 |
$self->{opt} = $opt;
|
|
|
716 |
|
|
|
717 |
while(my($key, $val) = each %raw_opt) {
|
|
|
718 |
my $lkey = lc($key);
|
|
|
719 |
$lkey =~ s/_//g;
|
|
|
720 |
croak "Unrecognised option: $key" unless($known_opt{$lkey});
|
|
|
721 |
$opt->{$lkey} = $val;
|
|
|
722 |
}
|
|
|
723 |
|
|
|
724 |
|
|
|
725 |
# Merge in options passed to constructor
|
|
|
726 |
|
|
|
727 |
foreach (keys(%known_opt)) {
|
|
|
728 |
unless(exists($opt->{$_})) {
|
|
|
729 |
if(exists($self->{def_opt}->{$_})) {
|
|
|
730 |
$opt->{$_} = $self->{def_opt}->{$_};
|
|
|
731 |
}
|
|
|
732 |
}
|
|
|
733 |
}
|
|
|
734 |
|
|
|
735 |
|
|
|
736 |
# Set sensible defaults if not supplied
|
|
|
737 |
|
|
|
738 |
if(exists($opt->{rootname})) {
|
|
|
739 |
unless(defined($opt->{rootname})) {
|
|
|
740 |
$opt->{rootname} = '';
|
|
|
741 |
}
|
|
|
742 |
}
|
|
|
743 |
else {
|
|
|
744 |
$opt->{rootname} = $DefRootName;
|
|
|
745 |
}
|
|
|
746 |
|
|
|
747 |
if($opt->{xmldecl} and $opt->{xmldecl} eq '1') {
|
|
|
748 |
$opt->{xmldecl} = $DefXmlDecl;
|
|
|
749 |
}
|
|
|
750 |
|
|
|
751 |
if(exists($opt->{contentkey})) {
|
|
|
752 |
if($opt->{contentkey} =~ m{^-(.*)$}) {
|
|
|
753 |
$opt->{contentkey} = $1;
|
|
|
754 |
$opt->{collapseagain} = 1;
|
|
|
755 |
}
|
|
|
756 |
}
|
|
|
757 |
else {
|
|
|
758 |
$opt->{contentkey} = $DefContentKey;
|
|
|
759 |
}
|
|
|
760 |
|
|
|
761 |
unless(exists($opt->{normalisespace})) {
|
|
|
762 |
$opt->{normalisespace} = $opt->{normalizespace};
|
|
|
763 |
}
|
|
|
764 |
$opt->{normalisespace} = 0 unless(defined($opt->{normalisespace}));
|
|
|
765 |
|
|
|
766 |
# Cleanups for values assumed to be arrays later
|
|
|
767 |
|
|
|
768 |
if($opt->{searchpath}) {
|
|
|
769 |
unless(ref($opt->{searchpath})) {
|
|
|
770 |
$opt->{searchpath} = [ $opt->{searchpath} ];
|
|
|
771 |
}
|
|
|
772 |
}
|
|
|
773 |
else {
|
|
|
774 |
$opt->{searchpath} = [ ];
|
|
|
775 |
}
|
|
|
776 |
|
|
|
777 |
if($opt->{cache} and !ref($opt->{cache})) {
|
|
|
778 |
$opt->{cache} = [ $opt->{cache} ];
|
|
|
779 |
}
|
|
|
780 |
if($opt->{cache}) {
|
|
|
781 |
$_ = lc($_) foreach (@{$opt->{cache}});
|
|
|
782 |
foreach my $scheme (@{$opt->{cache}}) {
|
|
|
783 |
my $method = 'cache_read_' . $scheme;
|
|
|
784 |
croak "Unsupported caching scheme: $scheme"
|
|
|
785 |
unless($self->can($method));
|
|
|
786 |
}
|
|
|
787 |
}
|
|
|
788 |
|
|
|
789 |
if(exists($opt->{parseropts})) {
|
|
|
790 |
if($^W) {
|
|
|
791 |
carp "Warning: " .
|
|
|
792 |
"'ParserOpts' is deprecated, contact the author if you need it";
|
|
|
793 |
}
|
|
|
794 |
}
|
|
|
795 |
else {
|
|
|
796 |
$opt->{parseropts} = [ ];
|
|
|
797 |
}
|
|
|
798 |
|
|
|
799 |
|
|
|
800 |
# Special cleanup for {forcearray} which could be regex, arrayref or boolean
|
|
|
801 |
# or left to default to 0
|
|
|
802 |
|
|
|
803 |
if(exists($opt->{forcearray})) {
|
|
|
804 |
if(ref($opt->{forcearray}) eq 'Regexp') {
|
|
|
805 |
$opt->{forcearray} = [ $opt->{forcearray} ];
|
|
|
806 |
}
|
|
|
807 |
|
|
|
808 |
if(ref($opt->{forcearray}) eq 'ARRAY') {
|
|
|
809 |
my @force_list = @{$opt->{forcearray}};
|
|
|
810 |
if(@force_list) {
|
|
|
811 |
$opt->{forcearray} = {};
|
|
|
812 |
foreach my $tag (@force_list) {
|
|
|
813 |
if(ref($tag) eq 'Regexp') {
|
|
|
814 |
push @{$opt->{forcearray}->{_regex}}, $tag;
|
|
|
815 |
}
|
|
|
816 |
else {
|
|
|
817 |
$opt->{forcearray}->{$tag} = 1;
|
|
|
818 |
}
|
|
|
819 |
}
|
|
|
820 |
}
|
|
|
821 |
else {
|
|
|
822 |
$opt->{forcearray} = 0;
|
|
|
823 |
}
|
|
|
824 |
}
|
|
|
825 |
else {
|
|
|
826 |
$opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 );
|
|
|
827 |
}
|
|
|
828 |
}
|
|
|
829 |
else {
|
|
|
830 |
if($opt->{strictmode} and $dirn eq 'in') {
|
|
|
831 |
croak "No value specified for 'ForceArray' option in call to XML$dirn()";
|
|
|
832 |
}
|
|
|
833 |
$opt->{forcearray} = 0;
|
|
|
834 |
}
|
|
|
835 |
|
|
|
836 |
|
|
|
837 |
# Special cleanup for {keyattr} which could be arrayref or hashref or left
|
|
|
838 |
# to default to arrayref
|
|
|
839 |
|
|
|
840 |
if(exists($opt->{keyattr})) {
|
|
|
841 |
if(ref($opt->{keyattr})) {
|
|
|
842 |
if(ref($opt->{keyattr}) eq 'HASH') {
|
|
|
843 |
|
|
|
844 |
# Make a copy so we can mess with it
|
|
|
845 |
|
|
|
846 |
$opt->{keyattr} = { %{$opt->{keyattr}} };
|
|
|
847 |
|
|
|
848 |
|
|
|
849 |
# Convert keyattr => { elem => '+attr' }
|
|
|
850 |
# to keyattr => { elem => [ 'attr', '+' ] }
|
|
|
851 |
|
|
|
852 |
foreach my $el (keys(%{$opt->{keyattr}})) {
|
|
|
853 |
if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) {
|
|
|
854 |
$opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ];
|
|
|
855 |
if($opt->{strictmode} and $dirn eq 'in') {
|
|
|
856 |
next if($opt->{forcearray} == 1);
|
|
|
857 |
next if(ref($opt->{forcearray}) eq 'HASH'
|
|
|
858 |
and $opt->{forcearray}->{$el});
|
|
|
859 |
croak "<$el> set in KeyAttr but not in ForceArray";
|
|
|
860 |
}
|
|
|
861 |
}
|
|
|
862 |
else {
|
|
|
863 |
delete($opt->{keyattr}->{$el}); # Never reached (famous last words?)
|
|
|
864 |
}
|
|
|
865 |
}
|
|
|
866 |
}
|
|
|
867 |
else {
|
|
|
868 |
if(@{$opt->{keyattr}} == 0) {
|
|
|
869 |
delete($opt->{keyattr});
|
|
|
870 |
}
|
|
|
871 |
}
|
|
|
872 |
}
|
|
|
873 |
else {
|
|
|
874 |
$opt->{keyattr} = [ $opt->{keyattr} ];
|
|
|
875 |
}
|
|
|
876 |
}
|
|
|
877 |
else {
|
|
|
878 |
if($opt->{strictmode}) {
|
|
|
879 |
croak "No value specified for 'KeyAttr' option in call to XML$dirn()";
|
|
|
880 |
}
|
|
|
881 |
$opt->{keyattr} = [ @DefKeyAttr ];
|
|
|
882 |
}
|
|
|
883 |
|
|
|
884 |
|
|
|
885 |
# Special cleanup for {valueattr} which could be arrayref or hashref
|
|
|
886 |
|
|
|
887 |
if(exists($opt->{valueattr})) {
|
|
|
888 |
if(ref($opt->{valueattr}) eq 'ARRAY') {
|
|
|
889 |
$opt->{valueattrlist} = {};
|
|
|
890 |
$opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} });
|
|
|
891 |
}
|
|
|
892 |
}
|
|
|
893 |
|
|
|
894 |
# make sure there's nothing weird in {grouptags}
|
|
|
895 |
|
|
|
896 |
if($opt->{grouptags}) {
|
|
|
897 |
croak "Illegal value for 'GroupTags' option - expected a hashref"
|
|
|
898 |
unless UNIVERSAL::isa($opt->{grouptags}, 'HASH');
|
|
|
899 |
|
|
|
900 |
while(my($key, $val) = each %{$opt->{grouptags}}) {
|
|
|
901 |
next if $key ne $val;
|
|
|
902 |
croak "Bad value in GroupTags: '$key' => '$val'";
|
|
|
903 |
}
|
|
|
904 |
}
|
|
|
905 |
|
|
|
906 |
|
|
|
907 |
# Check the {variables} option is valid and initialise variables hash
|
|
|
908 |
|
|
|
909 |
if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) {
|
|
|
910 |
croak "Illegal value for 'Variables' option - expected a hashref";
|
|
|
911 |
}
|
|
|
912 |
|
|
|
913 |
if($opt->{variables}) {
|
|
|
914 |
$self->{_var_values} = { %{$opt->{variables}} };
|
|
|
915 |
}
|
|
|
916 |
elsif($opt->{varattr}) {
|
|
|
917 |
$self->{_var_values} = {};
|
|
|
918 |
}
|
|
|
919 |
|
|
|
920 |
}
|
|
|
921 |
|
|
|
922 |
|
|
|
923 |
##############################################################################
|
|
|
924 |
# Method: find_xml_file()
|
|
|
925 |
#
|
|
|
926 |
# Helper routine for XMLin().
|
|
|
927 |
# Takes a filename, and a list of directories, attempts to locate the file in
|
|
|
928 |
# the directories listed.
|
|
|
929 |
# Returns a full pathname on success; croaks on failure.
|
|
|
930 |
#
|
|
|
931 |
|
|
|
932 |
sub find_xml_file {
|
|
|
933 |
my $self = shift;
|
|
|
934 |
my $file = shift;
|
|
|
935 |
my @search_path = @_;
|
|
|
936 |
|
|
|
937 |
|
|
|
938 |
require File::Basename;
|
|
|
939 |
require File::Spec;
|
|
|
940 |
|
|
|
941 |
my($filename, $filedir) = File::Basename::fileparse($file);
|
|
|
942 |
|
|
|
943 |
if($filename ne $file) { # Ignore searchpath if dir component
|
|
|
944 |
return($file) if(-e $file);
|
|
|
945 |
}
|
|
|
946 |
else {
|
|
|
947 |
my($path);
|
|
|
948 |
foreach $path (@search_path) {
|
|
|
949 |
my $fullpath = File::Spec->catfile($path, $file);
|
|
|
950 |
return($fullpath) if(-e $fullpath);
|
|
|
951 |
}
|
|
|
952 |
}
|
|
|
953 |
|
|
|
954 |
# If user did not supply a search path, default to current directory
|
|
|
955 |
|
|
|
956 |
if(!@search_path) {
|
|
|
957 |
return($file) if(-e $file);
|
|
|
958 |
croak "File does not exist: $file";
|
|
|
959 |
}
|
|
|
960 |
|
|
|
961 |
croak "Could not find $file in ", join(':', @search_path);
|
|
|
962 |
}
|
|
|
963 |
|
|
|
964 |
|
|
|
965 |
##############################################################################
|
|
|
966 |
# Method: collapse()
|
|
|
967 |
#
|
|
|
968 |
# Helper routine for XMLin(). This routine really comprises the 'smarts' (or
|
|
|
969 |
# value add) of this module.
|
|
|
970 |
#
|
|
|
971 |
# Takes the parse tree that XML::Parser produced from the supplied XML and
|
|
|
972 |
# recurses through it 'collapsing' unnecessary levels of indirection (nested
|
|
|
973 |
# arrays etc) to produce a data structure that is easier to work with.
|
|
|
974 |
#
|
|
|
975 |
# Elements in the original parser tree are represented as an element name
|
|
|
976 |
# followed by an arrayref. The first element of the array is a hashref
|
|
|
977 |
# containing the attributes. The rest of the array contains a list of any
|
|
|
978 |
# nested elements as name+arrayref pairs:
|
|
|
979 |
#
|
|
|
980 |
# <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ]
|
|
|
981 |
#
|
|
|
982 |
# The special element name '0' (zero) flags text content.
|
|
|
983 |
#
|
|
|
984 |
# This routine cuts down the noise by discarding any text content consisting of
|
|
|
985 |
# only whitespace and then moves the nested elements into the attribute hash
|
|
|
986 |
# using the name of the nested element as the hash key and the collapsed
|
|
|
987 |
# version of the nested element as the value. Multiple nested elements with
|
|
|
988 |
# the same name will initially be represented as an arrayref, but this may be
|
|
|
989 |
# 'folded' into a hashref depending on the value of the keyattr option.
|
|
|
990 |
#
|
|
|
991 |
|
|
|
992 |
sub collapse {
|
|
|
993 |
my $self = shift;
|
|
|
994 |
|
|
|
995 |
|
|
|
996 |
# Start with the hash of attributes
|
|
|
997 |
|
|
|
998 |
my $attr = shift;
|
|
|
999 |
if($self->{opt}->{noattr}) { # Discard if 'noattr' set
|
|
|
1000 |
$attr = $self->new_hashref;
|
|
|
1001 |
}
|
|
|
1002 |
elsif($self->{opt}->{normalisespace} == 2) {
|
|
|
1003 |
while(my($key, $value) = each %$attr) {
|
|
|
1004 |
$attr->{$key} = $self->normalise_space($value)
|
|
|
1005 |
}
|
|
|
1006 |
}
|
|
|
1007 |
|
|
|
1008 |
|
|
|
1009 |
# Do variable substitutions
|
|
|
1010 |
|
|
|
1011 |
if(my $var = $self->{_var_values}) {
|
|
|
1012 |
while(my($key, $val) = each(%$attr)) {
|
|
|
1013 |
$val =~ s{\$\{([\w.]+)\}}{ $self->get_var($1) }ge;
|
|
|
1014 |
$attr->{$key} = $val;
|
|
|
1015 |
}
|
|
|
1016 |
}
|
|
|
1017 |
|
|
|
1018 |
|
|
|
1019 |
# Roll up 'value' attributes (but only if no nested elements)
|
|
|
1020 |
|
|
|
1021 |
if(!@_ and keys %$attr == 1) {
|
|
|
1022 |
my($k) = keys %$attr;
|
|
|
1023 |
if($self->{opt}->{valueattrlist} and $self->{opt}->{valueattrlist}->{$k}) {
|
|
|
1024 |
return $attr->{$k};
|
|
|
1025 |
}
|
|
|
1026 |
}
|
|
|
1027 |
|
|
|
1028 |
|
|
|
1029 |
# Add any nested elements
|
|
|
1030 |
|
|
|
1031 |
my($key, $val);
|
|
|
1032 |
while(@_) {
|
|
|
1033 |
$key = shift;
|
|
|
1034 |
$val = shift;
|
|
|
1035 |
$val = '' if not defined $val;
|
|
|
1036 |
|
|
|
1037 |
if(ref($val)) {
|
|
|
1038 |
$val = $self->collapse(@$val);
|
|
|
1039 |
next if(!defined($val) and $self->{opt}->{suppressempty});
|
|
|
1040 |
}
|
|
|
1041 |
elsif($key eq '0') {
|
|
|
1042 |
next if($val =~ m{^\s*$}s); # Skip all whitespace content
|
|
|
1043 |
|
|
|
1044 |
$val = $self->normalise_space($val)
|
|
|
1045 |
if($self->{opt}->{normalisespace} == 2);
|
|
|
1046 |
|
|
|
1047 |
# do variable substitutions
|
|
|
1048 |
|
|
|
1049 |
if(my $var = $self->{_var_values}) {
|
|
|
1050 |
$val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge;
|
|
|
1051 |
}
|
|
|
1052 |
|
|
|
1053 |
|
|
|
1054 |
# look for variable definitions
|
|
|
1055 |
|
|
|
1056 |
if(my $var = $self->{opt}->{varattr}) {
|
|
|
1057 |
if(exists $attr->{$var}) {
|
|
|
1058 |
$self->set_var($attr->{$var}, $val);
|
|
|
1059 |
}
|
|
|
1060 |
}
|
|
|
1061 |
|
|
|
1062 |
|
|
|
1063 |
# Collapse text content in element with no attributes to a string
|
|
|
1064 |
|
|
|
1065 |
if(!%$attr and !@_) {
|
|
|
1066 |
return($self->{opt}->{forcecontent} ?
|
|
|
1067 |
{ $self->{opt}->{contentkey} => $val } : $val
|
|
|
1068 |
);
|
|
|
1069 |
}
|
|
|
1070 |
$key = $self->{opt}->{contentkey};
|
|
|
1071 |
}
|
|
|
1072 |
|
|
|
1073 |
|
|
|
1074 |
# Combine duplicate attributes into arrayref if required
|
|
|
1075 |
|
|
|
1076 |
if(exists($attr->{$key})) {
|
|
|
1077 |
if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) {
|
|
|
1078 |
push(@{$attr->{$key}}, $val);
|
|
|
1079 |
}
|
|
|
1080 |
else {
|
|
|
1081 |
$attr->{$key} = [ $attr->{$key}, $val ];
|
|
|
1082 |
}
|
|
|
1083 |
}
|
|
|
1084 |
elsif(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) {
|
|
|
1085 |
$attr->{$key} = [ $val ];
|
|
|
1086 |
}
|
|
|
1087 |
else {
|
|
|
1088 |
if( $key ne $self->{opt}->{contentkey}
|
|
|
1089 |
and (
|
|
|
1090 |
($self->{opt}->{forcearray} == 1)
|
|
|
1091 |
or (
|
|
|
1092 |
(ref($self->{opt}->{forcearray}) eq 'HASH')
|
|
|
1093 |
and (
|
|
|
1094 |
$self->{opt}->{forcearray}->{$key}
|
|
|
1095 |
or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}})
|
|
|
1096 |
)
|
|
|
1097 |
)
|
|
|
1098 |
)
|
|
|
1099 |
) {
|
|
|
1100 |
$attr->{$key} = [ $val ];
|
|
|
1101 |
}
|
|
|
1102 |
else {
|
|
|
1103 |
$attr->{$key} = $val;
|
|
|
1104 |
}
|
|
|
1105 |
}
|
|
|
1106 |
|
|
|
1107 |
}
|
|
|
1108 |
|
|
|
1109 |
|
|
|
1110 |
# Turn arrayrefs into hashrefs if key fields present
|
|
|
1111 |
|
|
|
1112 |
if($self->{opt}->{keyattr}) {
|
|
|
1113 |
while(($key,$val) = each %$attr) {
|
|
|
1114 |
if(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) {
|
|
|
1115 |
$attr->{$key} = $self->array_to_hash($key, $val);
|
|
|
1116 |
}
|
|
|
1117 |
}
|
|
|
1118 |
}
|
|
|
1119 |
|
|
|
1120 |
|
|
|
1121 |
# disintermediate grouped tags
|
|
|
1122 |
|
|
|
1123 |
if($self->{opt}->{grouptags}) {
|
|
|
1124 |
while(my($key, $val) = each(%$attr)) {
|
|
|
1125 |
next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
|
|
|
1126 |
next unless(exists($self->{opt}->{grouptags}->{$key}));
|
|
|
1127 |
|
|
|
1128 |
my($child_key, $child_val) = %$val;
|
|
|
1129 |
|
|
|
1130 |
if($self->{opt}->{grouptags}->{$key} eq $child_key) {
|
|
|
1131 |
$attr->{$key}= $child_val;
|
|
|
1132 |
}
|
|
|
1133 |
}
|
|
|
1134 |
}
|
|
|
1135 |
|
|
|
1136 |
|
|
|
1137 |
# Fold hashes containing a single anonymous array up into just the array
|
|
|
1138 |
|
|
|
1139 |
my $count = scalar keys %$attr;
|
|
|
1140 |
if($count == 1
|
|
|
1141 |
and exists $attr->{anon}
|
|
|
1142 |
and UNIVERSAL::isa($attr->{anon}, 'ARRAY')
|
|
|
1143 |
) {
|
|
|
1144 |
return($attr->{anon});
|
|
|
1145 |
}
|
|
|
1146 |
|
|
|
1147 |
|
|
|
1148 |
# Do the right thing if hash is empty, otherwise just return it
|
|
|
1149 |
|
|
|
1150 |
if(!%$attr and exists($self->{opt}->{suppressempty})) {
|
|
|
1151 |
if(defined($self->{opt}->{suppressempty}) and
|
|
|
1152 |
$self->{opt}->{suppressempty} eq '') {
|
|
|
1153 |
return('');
|
|
|
1154 |
}
|
|
|
1155 |
return(undef);
|
|
|
1156 |
}
|
|
|
1157 |
|
|
|
1158 |
|
|
|
1159 |
# Roll up named elements with named nested 'value' attributes
|
|
|
1160 |
|
|
|
1161 |
if($self->{opt}->{valueattr}) {
|
|
|
1162 |
while(my($key, $val) = each(%$attr)) {
|
|
|
1163 |
next unless($self->{opt}->{valueattr}->{$key});
|
|
|
1164 |
next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
|
|
|
1165 |
my($k) = keys %$val;
|
|
|
1166 |
next unless($k eq $self->{opt}->{valueattr}->{$key});
|
|
|
1167 |
$attr->{$key} = $val->{$k};
|
|
|
1168 |
}
|
|
|
1169 |
}
|
|
|
1170 |
|
|
|
1171 |
return($attr)
|
|
|
1172 |
|
|
|
1173 |
}
|
|
|
1174 |
|
|
|
1175 |
|
|
|
1176 |
##############################################################################
|
|
|
1177 |
# Method: set_var()
|
|
|
1178 |
#
|
|
|
1179 |
# Called when a variable definition is encountered in the XML. (A variable
|
|
|
1180 |
# definition looks like <element attrname="name">value</element> where attrname
|
|
|
1181 |
# matches the varattr setting).
|
|
|
1182 |
#
|
|
|
1183 |
|
|
|
1184 |
sub set_var {
|
|
|
1185 |
my($self, $name, $value) = @_;
|
|
|
1186 |
|
|
|
1187 |
$self->{_var_values}->{$name} = $value;
|
|
|
1188 |
}
|
|
|
1189 |
|
|
|
1190 |
|
|
|
1191 |
##############################################################################
|
|
|
1192 |
# Method: get_var()
|
|
|
1193 |
#
|
|
|
1194 |
# Called during variable substitution to get the value for the named variable.
|
|
|
1195 |
#
|
|
|
1196 |
|
|
|
1197 |
sub get_var {
|
|
|
1198 |
my($self, $name) = @_;
|
|
|
1199 |
|
|
|
1200 |
my $value = $self->{_var_values}->{$name};
|
|
|
1201 |
return $value if(defined($value));
|
|
|
1202 |
|
|
|
1203 |
return '${' . $name . '}';
|
|
|
1204 |
}
|
|
|
1205 |
|
|
|
1206 |
|
|
|
1207 |
##############################################################################
|
|
|
1208 |
# Method: normalise_space()
|
|
|
1209 |
#
|
|
|
1210 |
# Strips leading and trailing whitespace and collapses sequences of whitespace
|
|
|
1211 |
# characters to a single space.
|
|
|
1212 |
#
|
|
|
1213 |
|
|
|
1214 |
sub normalise_space {
|
|
|
1215 |
my($self, $text) = @_;
|
|
|
1216 |
|
|
|
1217 |
$text =~ s/^\s+//s;
|
|
|
1218 |
$text =~ s/\s+$//s;
|
|
|
1219 |
$text =~ s/\s\s+/ /sg;
|
|
|
1220 |
|
|
|
1221 |
return $text;
|
|
|
1222 |
}
|
|
|
1223 |
|
|
|
1224 |
|
|
|
1225 |
##############################################################################
|
|
|
1226 |
# Method: array_to_hash()
|
|
|
1227 |
#
|
|
|
1228 |
# Helper routine for collapse().
|
|
|
1229 |
# Attempts to 'fold' an array of hashes into an hash of hashes. Returns a
|
|
|
1230 |
# reference to the hash on success or the original array if folding is
|
|
|
1231 |
# not possible. Behaviour is controlled by 'keyattr' option.
|
|
|
1232 |
#
|
|
|
1233 |
|
|
|
1234 |
sub array_to_hash {
|
|
|
1235 |
my $self = shift;
|
|
|
1236 |
my $name = shift;
|
|
|
1237 |
my $arrayref = shift;
|
|
|
1238 |
|
|
|
1239 |
my $hashref = $self->new_hashref;
|
|
|
1240 |
|
|
|
1241 |
my($i, $key, $val, $flag);
|
|
|
1242 |
|
|
|
1243 |
|
|
|
1244 |
# Handle keyattr => { .... }
|
|
|
1245 |
|
|
|
1246 |
if(ref($self->{opt}->{keyattr}) eq 'HASH') {
|
|
|
1247 |
return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name}));
|
|
|
1248 |
($key, $flag) = @{$self->{opt}->{keyattr}->{$name}};
|
|
|
1249 |
for($i = 0; $i < @$arrayref; $i++) {
|
|
|
1250 |
if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and
|
|
|
1251 |
exists($arrayref->[$i]->{$key})
|
|
|
1252 |
) {
|
|
|
1253 |
$val = $arrayref->[$i]->{$key};
|
|
|
1254 |
if(ref($val)) {
|
|
|
1255 |
$self->die_or_warn("<$name> element has non-scalar '$key' key attribute");
|
|
|
1256 |
return($arrayref);
|
|
|
1257 |
}
|
|
|
1258 |
$val = $self->normalise_space($val)
|
|
|
1259 |
if($self->{opt}->{normalisespace} == 1);
|
|
|
1260 |
$self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
|
|
|
1261 |
if(exists($hashref->{$val}));
|
|
|
1262 |
$hashref->{$val} = $self->new_hashref( %{$arrayref->[$i]} );
|
|
|
1263 |
$hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-');
|
|
|
1264 |
delete $hashref->{$val}->{$key} unless($flag eq '+');
|
|
|
1265 |
}
|
|
|
1266 |
else {
|
|
|
1267 |
$self->die_or_warn("<$name> element has no '$key' key attribute");
|
|
|
1268 |
return($arrayref);
|
|
|
1269 |
}
|
|
|
1270 |
}
|
|
|
1271 |
}
|
|
|
1272 |
|
|
|
1273 |
|
|
|
1274 |
# Or assume keyattr => [ .... ]
|
|
|
1275 |
|
|
|
1276 |
else {
|
|
|
1277 |
my $default_keys =
|
|
|
1278 |
join(',', @DefKeyAttr) eq join(',', @{$self->{opt}->{keyattr}});
|
|
|
1279 |
|
|
|
1280 |
ELEMENT: for($i = 0; $i < @$arrayref; $i++) {
|
|
|
1281 |
return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH'));
|
|
|
1282 |
|
|
|
1283 |
foreach $key (@{$self->{opt}->{keyattr}}) {
|
|
|
1284 |
if(defined($arrayref->[$i]->{$key})) {
|
|
|
1285 |
$val = $arrayref->[$i]->{$key};
|
|
|
1286 |
if(ref($val)) {
|
|
|
1287 |
$self->die_or_warn("<$name> element has non-scalar '$key' key attribute")
|
|
|
1288 |
if not $default_keys;
|
|
|
1289 |
return($arrayref);
|
|
|
1290 |
}
|
|
|
1291 |
$val = $self->normalise_space($val)
|
|
|
1292 |
if($self->{opt}->{normalisespace} == 1);
|
|
|
1293 |
$self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
|
|
|
1294 |
if(exists($hashref->{$val}));
|
|
|
1295 |
$hashref->{$val} = $self->new_hashref( %{$arrayref->[$i]} );
|
|
|
1296 |
delete $hashref->{$val}->{$key};
|
|
|
1297 |
next ELEMENT;
|
|
|
1298 |
}
|
|
|
1299 |
}
|
|
|
1300 |
|
|
|
1301 |
return($arrayref); # No keyfield matched
|
|
|
1302 |
}
|
|
|
1303 |
}
|
|
|
1304 |
|
|
|
1305 |
# collapse any hashes which now only have a 'content' key
|
|
|
1306 |
|
|
|
1307 |
if($self->{opt}->{collapseagain}) {
|
|
|
1308 |
$hashref = $self->collapse_content($hashref);
|
|
|
1309 |
}
|
|
|
1310 |
|
|
|
1311 |
return($hashref);
|
|
|
1312 |
}
|
|
|
1313 |
|
|
|
1314 |
|
|
|
1315 |
##############################################################################
|
|
|
1316 |
# Method: die_or_warn()
|
|
|
1317 |
#
|
|
|
1318 |
# Takes a diagnostic message and does one of three things:
|
|
|
1319 |
# 1. dies if strict mode is enabled
|
|
|
1320 |
# 2. warns if warnings are enabled but strict mode is not
|
|
|
1321 |
# 3. ignores message and returns silently if neither strict mode nor warnings
|
|
|
1322 |
# are enabled
|
|
|
1323 |
#
|
|
|
1324 |
# Option 2 looks at the global warnings variable $^W - which is not really
|
|
|
1325 |
# appropriate in the modern world of lexical warnings - TODO: Fix
|
|
|
1326 |
|
|
|
1327 |
sub die_or_warn {
|
|
|
1328 |
my $self = shift;
|
|
|
1329 |
my $msg = shift;
|
|
|
1330 |
|
|
|
1331 |
croak $msg if($self->{opt}->{strictmode});
|
|
|
1332 |
carp "Warning: $msg" if($^W);
|
|
|
1333 |
}
|
|
|
1334 |
|
|
|
1335 |
|
|
|
1336 |
##############################################################################
|
|
|
1337 |
# Method: new_hashref()
|
|
|
1338 |
#
|
|
|
1339 |
# This is a hook routine for overriding in a sub-class. Some people believe
|
|
|
1340 |
# that using Tie::IxHash here will solve order-loss problems.
|
|
|
1341 |
#
|
|
|
1342 |
|
|
|
1343 |
sub new_hashref {
|
|
|
1344 |
my $self = shift;
|
|
|
1345 |
|
|
|
1346 |
return { @_ };
|
|
|
1347 |
}
|
|
|
1348 |
|
|
|
1349 |
|
|
|
1350 |
##############################################################################
|
|
|
1351 |
# Method: collapse_content()
|
|
|
1352 |
#
|
|
|
1353 |
# Helper routine for array_to_hash
|
|
|
1354 |
#
|
|
|
1355 |
# Arguments expected are:
|
|
|
1356 |
# - an XML::Simple object
|
|
|
1357 |
# - a hasref
|
|
|
1358 |
# the hashref is a former array, turned into a hash by array_to_hash because
|
|
|
1359 |
# of the presence of key attributes
|
|
|
1360 |
# at this point collapse_content avoids over-complicated structures like
|
|
|
1361 |
# dir => { libexecdir => { content => '$exec_prefix/libexec' },
|
|
|
1362 |
# localstatedir => { content => '$prefix' },
|
|
|
1363 |
# }
|
|
|
1364 |
# into
|
|
|
1365 |
# dir => { libexecdir => '$exec_prefix/libexec',
|
|
|
1366 |
# localstatedir => '$prefix',
|
|
|
1367 |
# }
|
|
|
1368 |
|
|
|
1369 |
sub collapse_content {
|
|
|
1370 |
my $self = shift;
|
|
|
1371 |
my $hashref = shift;
|
|
|
1372 |
|
|
|
1373 |
my $contentkey = $self->{opt}->{contentkey};
|
|
|
1374 |
|
|
|
1375 |
# first go through the values,checking that they are fit to collapse
|
|
|
1376 |
foreach my $val (values %$hashref) {
|
|
|
1377 |
return $hashref unless ( (ref($val) eq 'HASH')
|
|
|
1378 |
and (keys %$val == 1)
|
|
|
1379 |
and (exists $val->{$contentkey})
|
|
|
1380 |
);
|
|
|
1381 |
}
|
|
|
1382 |
|
|
|
1383 |
# now collapse them
|
|
|
1384 |
foreach my $key (keys %$hashref) {
|
|
|
1385 |
$hashref->{$key}= $hashref->{$key}->{$contentkey};
|
|
|
1386 |
}
|
|
|
1387 |
|
|
|
1388 |
return $hashref;
|
|
|
1389 |
}
|
|
|
1390 |
|
|
|
1391 |
|
|
|
1392 |
##############################################################################
|
|
|
1393 |
# Method: value_to_xml()
|
|
|
1394 |
#
|
|
|
1395 |
# Helper routine for XMLout() - recurses through a data structure building up
|
|
|
1396 |
# and returning an XML representation of that structure as a string.
|
|
|
1397 |
#
|
|
|
1398 |
# Arguments expected are:
|
|
|
1399 |
# - the data structure to be encoded (usually a reference)
|
|
|
1400 |
# - the XML tag name to use for this item
|
|
|
1401 |
# - a string of spaces for use as the current indent level
|
|
|
1402 |
#
|
|
|
1403 |
|
|
|
1404 |
sub value_to_xml {
|
|
|
1405 |
my $self = shift;;
|
|
|
1406 |
|
|
|
1407 |
|
|
|
1408 |
# Grab the other arguments
|
|
|
1409 |
|
|
|
1410 |
my($ref, $name, $indent) = @_;
|
|
|
1411 |
|
|
|
1412 |
my $named = (defined($name) and $name ne '' ? 1 : 0);
|
|
|
1413 |
|
|
|
1414 |
my $nl = "\n";
|
|
|
1415 |
|
|
|
1416 |
my $is_root = $indent eq '' ? 1 : 0; # Warning, dirty hack!
|
|
|
1417 |
if($self->{opt}->{noindent}) {
|
|
|
1418 |
$indent = '';
|
|
|
1419 |
$nl = '';
|
|
|
1420 |
}
|
|
|
1421 |
|
|
|
1422 |
|
|
|
1423 |
# Convert to XML
|
|
|
1424 |
|
|
|
1425 |
if(ref($ref)) {
|
|
|
1426 |
croak "circular data structures not supported"
|
|
|
1427 |
if(grep($_ == $ref, @{$self->{_ancestors}}));
|
|
|
1428 |
push @{$self->{_ancestors}}, $ref;
|
|
|
1429 |
}
|
|
|
1430 |
else {
|
|
|
1431 |
if($named) {
|
|
|
1432 |
return(join('',
|
|
|
1433 |
$indent, '<', $name, '>',
|
|
|
1434 |
($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)),
|
|
|
1435 |
'</', $name, ">", $nl
|
|
|
1436 |
));
|
|
|
1437 |
}
|
|
|
1438 |
else {
|
|
|
1439 |
return("$ref$nl");
|
|
|
1440 |
}
|
|
|
1441 |
}
|
|
|
1442 |
|
|
|
1443 |
|
|
|
1444 |
# Unfold hash to array if possible
|
|
|
1445 |
|
|
|
1446 |
if(UNIVERSAL::isa($ref, 'HASH') # It is a hash
|
|
|
1447 |
and keys %$ref # and it's not empty
|
|
|
1448 |
and $self->{opt}->{keyattr} # and folding is enabled
|
|
|
1449 |
and !$is_root # and its not the root element
|
|
|
1450 |
) {
|
|
|
1451 |
$ref = $self->hash_to_array($name, $ref);
|
|
|
1452 |
}
|
|
|
1453 |
|
|
|
1454 |
|
|
|
1455 |
my @result = ();
|
|
|
1456 |
my($key, $value);
|
|
|
1457 |
|
|
|
1458 |
|
|
|
1459 |
# Handle hashrefs
|
|
|
1460 |
|
|
|
1461 |
if(UNIVERSAL::isa($ref, 'HASH')) {
|
|
|
1462 |
|
|
|
1463 |
# Reintermediate grouped values if applicable
|
|
|
1464 |
|
|
|
1465 |
if($self->{opt}->{grouptags}) {
|
|
|
1466 |
$ref = $self->copy_hash($ref);
|
|
|
1467 |
while(my($key, $val) = each %$ref) {
|
|
|
1468 |
if($self->{opt}->{grouptags}->{$key}) {
|
|
|
1469 |
$ref->{$key} = $self->new_hashref(
|
|
|
1470 |
$self->{opt}->{grouptags}->{$key} => $val
|
|
|
1471 |
);
|
|
|
1472 |
}
|
|
|
1473 |
}
|
|
|
1474 |
}
|
|
|
1475 |
|
|
|
1476 |
|
|
|
1477 |
# Scan for namespace declaration attributes
|
|
|
1478 |
|
|
|
1479 |
my $nsdecls = '';
|
|
|
1480 |
my $default_ns_uri;
|
|
|
1481 |
if($self->{nsup}) {
|
|
|
1482 |
$ref = $self->copy_hash($ref);
|
|
|
1483 |
$self->{nsup}->push_context();
|
|
|
1484 |
|
|
|
1485 |
# Look for default namespace declaration first
|
|
|
1486 |
|
|
|
1487 |
if(exists($ref->{xmlns})) {
|
|
|
1488 |
$self->{nsup}->declare_prefix('', $ref->{xmlns});
|
|
|
1489 |
$nsdecls .= qq( xmlns="$ref->{xmlns}");
|
|
|
1490 |
delete($ref->{xmlns});
|
|
|
1491 |
}
|
|
|
1492 |
$default_ns_uri = $self->{nsup}->get_uri('');
|
|
|
1493 |
|
|
|
1494 |
|
|
|
1495 |
# Then check all the other keys
|
|
|
1496 |
|
|
|
1497 |
foreach my $qname (keys(%$ref)) {
|
|
|
1498 |
my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
|
|
|
1499 |
if($uri) {
|
|
|
1500 |
if($uri eq $xmlns_ns) {
|
|
|
1501 |
$self->{nsup}->declare_prefix($lname, $ref->{$qname});
|
|
|
1502 |
$nsdecls .= qq( xmlns:$lname="$ref->{$qname}");
|
|
|
1503 |
delete($ref->{$qname});
|
|
|
1504 |
}
|
|
|
1505 |
}
|
|
|
1506 |
}
|
|
|
1507 |
|
|
|
1508 |
# Translate any remaining Clarkian names
|
|
|
1509 |
|
|
|
1510 |
foreach my $qname (keys(%$ref)) {
|
|
|
1511 |
my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
|
|
|
1512 |
if($uri) {
|
|
|
1513 |
if($default_ns_uri and $uri eq $default_ns_uri) {
|
|
|
1514 |
$ref->{$lname} = $ref->{$qname};
|
|
|
1515 |
delete($ref->{$qname});
|
|
|
1516 |
}
|
|
|
1517 |
else {
|
|
|
1518 |
my $prefix = $self->{nsup}->get_prefix($uri);
|
|
|
1519 |
unless($prefix) {
|
|
|
1520 |
# $self->{nsup}->declare_prefix(undef, $uri);
|
|
|
1521 |
# $prefix = $self->{nsup}->get_prefix($uri);
|
|
|
1522 |
$prefix = $self->{ns_prefix}++;
|
|
|
1523 |
$self->{nsup}->declare_prefix($prefix, $uri);
|
|
|
1524 |
$nsdecls .= qq( xmlns:$prefix="$uri");
|
|
|
1525 |
}
|
|
|
1526 |
$ref->{"$prefix:$lname"} = $ref->{$qname};
|
|
|
1527 |
delete($ref->{$qname});
|
|
|
1528 |
}
|
|
|
1529 |
}
|
|
|
1530 |
}
|
|
|
1531 |
}
|
|
|
1532 |
|
|
|
1533 |
|
|
|
1534 |
my @nested = ();
|
|
|
1535 |
my $text_content = undef;
|
|
|
1536 |
if($named) {
|
|
|
1537 |
push @result, $indent, '<', $name, $nsdecls;
|
|
|
1538 |
}
|
|
|
1539 |
|
|
|
1540 |
if(keys %$ref) {
|
|
|
1541 |
my $first_arg = 1;
|
|
|
1542 |
foreach my $key ($self->sorted_keys($name, $ref)) {
|
|
|
1543 |
my $value = $ref->{$key};
|
|
|
1544 |
next if(substr($key, 0, 1) eq '-');
|
|
|
1545 |
if(!defined($value)) {
|
|
|
1546 |
next if $self->{opt}->{suppressempty};
|
|
|
1547 |
unless(exists($self->{opt}->{suppressempty})
|
|
|
1548 |
and !defined($self->{opt}->{suppressempty})
|
|
|
1549 |
) {
|
|
|
1550 |
carp 'Use of uninitialized value' if($^W);
|
|
|
1551 |
}
|
|
|
1552 |
if($key eq $self->{opt}->{contentkey}) {
|
|
|
1553 |
$text_content = '';
|
|
|
1554 |
}
|
|
|
1555 |
else {
|
|
|
1556 |
$value = exists($self->{opt}->{suppressempty}) ? {} : '';
|
|
|
1557 |
}
|
|
|
1558 |
}
|
|
|
1559 |
|
|
|
1560 |
if(!ref($value)
|
|
|
1561 |
and $self->{opt}->{valueattr}
|
|
|
1562 |
and $self->{opt}->{valueattr}->{$key}
|
|
|
1563 |
) {
|
|
|
1564 |
$value = $self->new_hashref(
|
|
|
1565 |
$self->{opt}->{valueattr}->{$key} => $value
|
|
|
1566 |
);
|
|
|
1567 |
}
|
|
|
1568 |
|
|
|
1569 |
if(ref($value) or $self->{opt}->{noattr}) {
|
|
|
1570 |
push @nested,
|
|
|
1571 |
$self->value_to_xml($value, $key, "$indent ");
|
|
|
1572 |
}
|
|
|
1573 |
else {
|
|
|
1574 |
$value = $self->escape_value($value) unless($self->{opt}->{noescape});
|
|
|
1575 |
if($key eq $self->{opt}->{contentkey}) {
|
|
|
1576 |
$text_content = $value;
|
|
|
1577 |
}
|
|
|
1578 |
else {
|
|
|
1579 |
push @result, "\n$indent " . ' ' x length($name)
|
|
|
1580 |
if($self->{opt}->{attrindent} and !$first_arg);
|
|
|
1581 |
push @result, ' ', $key, '="', $value , '"';
|
|
|
1582 |
$first_arg = 0;
|
|
|
1583 |
}
|
|
|
1584 |
}
|
|
|
1585 |
}
|
|
|
1586 |
}
|
|
|
1587 |
else {
|
|
|
1588 |
$text_content = '';
|
|
|
1589 |
}
|
|
|
1590 |
|
|
|
1591 |
if(@nested or defined($text_content)) {
|
|
|
1592 |
if($named) {
|
|
|
1593 |
push @result, ">";
|
|
|
1594 |
if(defined($text_content)) {
|
|
|
1595 |
push @result, $text_content;
|
|
|
1596 |
$nested[0] =~ s/^\s+// if(@nested);
|
|
|
1597 |
}
|
|
|
1598 |
else {
|
|
|
1599 |
push @result, $nl;
|
|
|
1600 |
}
|
|
|
1601 |
if(@nested) {
|
|
|
1602 |
push @result, @nested, $indent;
|
|
|
1603 |
}
|
|
|
1604 |
push @result, '</', $name, ">", $nl;
|
|
|
1605 |
}
|
|
|
1606 |
else {
|
|
|
1607 |
push @result, @nested; # Special case if no root elements
|
|
|
1608 |
}
|
|
|
1609 |
}
|
|
|
1610 |
else {
|
|
|
1611 |
push @result, " />", $nl;
|
|
|
1612 |
}
|
|
|
1613 |
$self->{nsup}->pop_context() if($self->{nsup});
|
|
|
1614 |
}
|
|
|
1615 |
|
|
|
1616 |
|
|
|
1617 |
# Handle arrayrefs
|
|
|
1618 |
|
|
|
1619 |
elsif(UNIVERSAL::isa($ref, 'ARRAY')) {
|
|
|
1620 |
foreach $value (@$ref) {
|
|
|
1621 |
next if !defined($value) and $self->{opt}->{suppressempty};
|
|
|
1622 |
if(!ref($value)) {
|
|
|
1623 |
push @result,
|
|
|
1624 |
$indent, '<', $name, '>',
|
|
|
1625 |
($self->{opt}->{noescape} ? $value : $self->escape_value($value)),
|
|
|
1626 |
'</', $name, ">$nl";
|
|
|
1627 |
}
|
|
|
1628 |
elsif(UNIVERSAL::isa($value, 'HASH')) {
|
|
|
1629 |
push @result, $self->value_to_xml($value, $name, $indent);
|
|
|
1630 |
}
|
|
|
1631 |
else {
|
|
|
1632 |
push @result,
|
|
|
1633 |
$indent, '<', $name, ">$nl",
|
|
|
1634 |
$self->value_to_xml($value, 'anon', "$indent "),
|
|
|
1635 |
$indent, '</', $name, ">$nl";
|
|
|
1636 |
}
|
|
|
1637 |
}
|
|
|
1638 |
}
|
|
|
1639 |
|
|
|
1640 |
else {
|
|
|
1641 |
croak "Can't encode a value of type: " . ref($ref);
|
|
|
1642 |
}
|
|
|
1643 |
|
|
|
1644 |
|
|
|
1645 |
pop @{$self->{_ancestors}} if(ref($ref));
|
|
|
1646 |
|
|
|
1647 |
return(join('', @result));
|
|
|
1648 |
}
|
|
|
1649 |
|
|
|
1650 |
|
|
|
1651 |
##############################################################################
|
|
|
1652 |
# Method: sorted_keys()
|
|
|
1653 |
#
|
|
|
1654 |
# Returns the keys of the referenced hash sorted into alphabetical order, but
|
|
|
1655 |
# with the 'key' key (as in KeyAttr) first, if there is one.
|
|
|
1656 |
#
|
|
|
1657 |
|
|
|
1658 |
sub sorted_keys {
|
|
|
1659 |
my($self, $name, $ref) = @_;
|
|
|
1660 |
|
|
|
1661 |
return keys %$ref if $self->{opt}->{nosort};
|
|
|
1662 |
|
|
|
1663 |
my %hash = %$ref;
|
|
|
1664 |
my $keyattr = $self->{opt}->{keyattr};
|
|
|
1665 |
|
|
|
1666 |
my @key;
|
|
|
1667 |
|
|
|
1668 |
if(ref $keyattr eq 'HASH') {
|
|
|
1669 |
if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) {
|
|
|
1670 |
push @key, $keyattr->{$name}->[0];
|
|
|
1671 |
delete $hash{$keyattr->{$name}->[0]};
|
|
|
1672 |
}
|
|
|
1673 |
}
|
|
|
1674 |
elsif(ref $keyattr eq 'ARRAY') {
|
|
|
1675 |
foreach (@{$keyattr}) {
|
|
|
1676 |
if(exists $hash{$_}) {
|
|
|
1677 |
push @key, $_;
|
|
|
1678 |
delete $hash{$_};
|
|
|
1679 |
last;
|
|
|
1680 |
}
|
|
|
1681 |
}
|
|
|
1682 |
}
|
|
|
1683 |
|
|
|
1684 |
return(@key, sort keys %hash);
|
|
|
1685 |
}
|
|
|
1686 |
|
|
|
1687 |
##############################################################################
|
|
|
1688 |
# Method: escape_value()
|
|
|
1689 |
#
|
|
|
1690 |
# Helper routine for automatically escaping values for XMLout().
|
|
|
1691 |
# Expects a scalar data value. Returns escaped version.
|
|
|
1692 |
#
|
|
|
1693 |
|
|
|
1694 |
sub escape_value {
|
|
|
1695 |
my($self, $data) = @_;
|
|
|
1696 |
|
|
|
1697 |
return '' unless(defined($data));
|
|
|
1698 |
|
|
|
1699 |
$data =~ s/&/&/sg;
|
|
|
1700 |
$data =~ s/</</sg;
|
|
|
1701 |
$data =~ s/>/>/sg;
|
|
|
1702 |
$data =~ s/"/"/sg;
|
|
|
1703 |
|
|
|
1704 |
my $level = $self->{opt}->{numericescape} or return $data;
|
|
|
1705 |
|
|
|
1706 |
return $self->numeric_escape($data, $level);
|
|
|
1707 |
}
|
|
|
1708 |
|
|
|
1709 |
sub numeric_escape {
|
|
|
1710 |
my($self, $data, $level) = @_;
|
|
|
1711 |
|
|
|
1712 |
use utf8; # required for 5.6
|
|
|
1713 |
|
|
|
1714 |
if($self->{opt}->{numericescape} eq '2') {
|
|
|
1715 |
$data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse;
|
|
|
1716 |
}
|
|
|
1717 |
else {
|
|
|
1718 |
$data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse;
|
|
|
1719 |
}
|
|
|
1720 |
|
|
|
1721 |
return $data;
|
|
|
1722 |
}
|
|
|
1723 |
|
|
|
1724 |
|
|
|
1725 |
##############################################################################
|
|
|
1726 |
# Method: hash_to_array()
|
|
|
1727 |
#
|
|
|
1728 |
# Helper routine for value_to_xml().
|
|
|
1729 |
# Attempts to 'unfold' a hash of hashes into an array of hashes. Returns a
|
|
|
1730 |
# reference to the array on success or the original hash if unfolding is
|
|
|
1731 |
# not possible.
|
|
|
1732 |
#
|
|
|
1733 |
|
|
|
1734 |
sub hash_to_array {
|
|
|
1735 |
my $self = shift;
|
|
|
1736 |
my $parent = shift;
|
|
|
1737 |
my $hashref = shift;
|
|
|
1738 |
|
|
|
1739 |
my $arrayref = [];
|
|
|
1740 |
|
|
|
1741 |
my($key, $value);
|
|
|
1742 |
|
|
|
1743 |
my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref;
|
|
|
1744 |
foreach $key (@keys) {
|
|
|
1745 |
$value = $hashref->{$key};
|
|
|
1746 |
return($hashref) unless(UNIVERSAL::isa($value, 'HASH'));
|
|
|
1747 |
|
|
|
1748 |
if(ref($self->{opt}->{keyattr}) eq 'HASH') {
|
|
|
1749 |
return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent}));
|
|
|
1750 |
push @$arrayref, $self->copy_hash(
|
|
|
1751 |
$value, $self->{opt}->{keyattr}->{$parent}->[0] => $key
|
|
|
1752 |
);
|
|
|
1753 |
}
|
|
|
1754 |
else {
|
|
|
1755 |
push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value });
|
|
|
1756 |
}
|
|
|
1757 |
}
|
|
|
1758 |
|
|
|
1759 |
return($arrayref);
|
|
|
1760 |
}
|
|
|
1761 |
|
|
|
1762 |
|
|
|
1763 |
##############################################################################
|
|
|
1764 |
# Method: copy_hash()
|
|
|
1765 |
#
|
|
|
1766 |
# Helper routine for hash_to_array(). When unfolding a hash of hashes into
|
|
|
1767 |
# an array of hashes, we need to copy the key from the outer hash into the
|
|
|
1768 |
# inner hash. This routine makes a copy of the original hash so we don't
|
|
|
1769 |
# destroy the original data structure. You might wish to override this
|
|
|
1770 |
# method if you're using tied hashes and don't want them to get untied.
|
|
|
1771 |
#
|
|
|
1772 |
|
|
|
1773 |
sub copy_hash {
|
|
|
1774 |
my($self, $orig, @extra) = @_;
|
|
|
1775 |
|
|
|
1776 |
return { @extra, %$orig };
|
|
|
1777 |
}
|
|
|
1778 |
|
|
|
1779 |
##############################################################################
|
|
|
1780 |
# Methods required for building trees from SAX events
|
|
|
1781 |
##############################################################################
|
|
|
1782 |
|
|
|
1783 |
sub start_document {
|
|
|
1784 |
my $self = shift;
|
|
|
1785 |
|
|
|
1786 |
$self->handle_options('in') unless($self->{opt});
|
|
|
1787 |
|
|
|
1788 |
$self->{lists} = [];
|
|
|
1789 |
$self->{curlist} = $self->{tree} = [];
|
|
|
1790 |
}
|
|
|
1791 |
|
|
|
1792 |
|
|
|
1793 |
sub start_element {
|
|
|
1794 |
my $self = shift;
|
|
|
1795 |
my $element = shift;
|
|
|
1796 |
|
|
|
1797 |
my $name = $element->{Name};
|
|
|
1798 |
if($self->{opt}->{nsexpand}) {
|
|
|
1799 |
$name = $element->{LocalName} || '';
|
|
|
1800 |
if($element->{NamespaceURI}) {
|
|
|
1801 |
$name = '{' . $element->{NamespaceURI} . '}' . $name;
|
|
|
1802 |
}
|
|
|
1803 |
}
|
|
|
1804 |
my $attributes = {};
|
|
|
1805 |
if($element->{Attributes}) { # Might be undef
|
|
|
1806 |
foreach my $attr (values %{$element->{Attributes}}) {
|
|
|
1807 |
if($self->{opt}->{nsexpand}) {
|
|
|
1808 |
my $name = $attr->{LocalName} || '';
|
|
|
1809 |
if($attr->{NamespaceURI}) {
|
|
|
1810 |
$name = '{' . $attr->{NamespaceURI} . '}' . $name
|
|
|
1811 |
}
|
|
|
1812 |
$name = 'xmlns' if($name eq $bad_def_ns_jcn);
|
|
|
1813 |
$attributes->{$name} = $attr->{Value};
|
|
|
1814 |
}
|
|
|
1815 |
else {
|
|
|
1816 |
$attributes->{$attr->{Name}} = $attr->{Value};
|
|
|
1817 |
}
|
|
|
1818 |
}
|
|
|
1819 |
}
|
|
|
1820 |
my $newlist = [ $attributes ];
|
|
|
1821 |
push @{ $self->{lists} }, $self->{curlist};
|
|
|
1822 |
push @{ $self->{curlist} }, $name => $newlist;
|
|
|
1823 |
$self->{curlist} = $newlist;
|
|
|
1824 |
}
|
|
|
1825 |
|
|
|
1826 |
|
|
|
1827 |
sub characters {
|
|
|
1828 |
my $self = shift;
|
|
|
1829 |
my $chars = shift;
|
|
|
1830 |
|
|
|
1831 |
my $text = $chars->{Data};
|
|
|
1832 |
my $clist = $self->{curlist};
|
|
|
1833 |
my $pos = $#$clist;
|
|
|
1834 |
|
|
|
1835 |
if ($pos > 0 and $clist->[$pos - 1] eq '0') {
|
|
|
1836 |
$clist->[$pos] .= $text;
|
|
|
1837 |
}
|
|
|
1838 |
else {
|
|
|
1839 |
push @$clist, 0 => $text;
|
|
|
1840 |
}
|
|
|
1841 |
}
|
|
|
1842 |
|
|
|
1843 |
|
|
|
1844 |
sub end_element {
|
|
|
1845 |
my $self = shift;
|
|
|
1846 |
|
|
|
1847 |
$self->{curlist} = pop @{ $self->{lists} };
|
|
|
1848 |
}
|
|
|
1849 |
|
|
|
1850 |
|
|
|
1851 |
sub end_document {
|
|
|
1852 |
my $self = shift;
|
|
|
1853 |
|
|
|
1854 |
delete($self->{curlist});
|
|
|
1855 |
delete($self->{lists});
|
|
|
1856 |
|
|
|
1857 |
my $tree = $self->{tree};
|
|
|
1858 |
delete($self->{tree});
|
|
|
1859 |
|
|
|
1860 |
|
|
|
1861 |
# Return tree as-is to XMLin()
|
|
|
1862 |
|
|
|
1863 |
return($tree) if($self->{nocollapse});
|
|
|
1864 |
|
|
|
1865 |
|
|
|
1866 |
# Or collapse it before returning it to SAX parser class
|
|
|
1867 |
|
|
|
1868 |
if($self->{opt}->{keeproot}) {
|
|
|
1869 |
$tree = $self->collapse({}, @$tree);
|
|
|
1870 |
}
|
|
|
1871 |
else {
|
|
|
1872 |
$tree = $self->collapse(@{$tree->[1]});
|
|
|
1873 |
}
|
|
|
1874 |
|
|
|
1875 |
if($self->{opt}->{datahandler}) {
|
|
|
1876 |
return($self->{opt}->{datahandler}->($self, $tree));
|
|
|
1877 |
}
|
|
|
1878 |
|
|
|
1879 |
return($tree);
|
|
|
1880 |
}
|
|
|
1881 |
|
|
|
1882 |
*xml_in = \&XMLin;
|
|
|
1883 |
*xml_out = \&XMLout;
|
|
|
1884 |
|
|
|
1885 |
1;
|
|
|
1886 |
|
|
|
1887 |
__END__
|
|
|
1888 |
|
|
|
1889 |
=head1 STATUS OF THIS MODULE
|
|
|
1890 |
|
|
|
1891 |
The use of this module in new code is discouraged. Other modules are available
|
|
|
1892 |
which provide more straightforward and consistent interfaces. In particular,
|
|
|
1893 |
L<XML::LibXML> is highly recommended.
|
|
|
1894 |
|
|
|
1895 |
The major problems with this module are the large number of options and the
|
|
|
1896 |
arbitrary ways in which these options interact - often with unexpected results.
|
|
|
1897 |
|
|
|
1898 |
Patches with bug fixes and documentation fixes are welcome, but new features
|
|
|
1899 |
are unlikely to be added.
|
|
|
1900 |
|
|
|
1901 |
=head1 QUICK START
|
|
|
1902 |
|
|
|
1903 |
Say you have a script called B<foo> and a file of configuration options
|
|
|
1904 |
called B<foo.xml> containing the following:
|
|
|
1905 |
|
|
|
1906 |
<config logdir="/var/log/foo/" debugfile="/tmp/foo.debug">
|
|
|
1907 |
<server name="sahara" osname="solaris" osversion="2.6">
|
|
|
1908 |
<address>10.0.0.101</address>
|
|
|
1909 |
<address>10.0.1.101</address>
|
|
|
1910 |
</server>
|
|
|
1911 |
<server name="gobi" osname="irix" osversion="6.5">
|
|
|
1912 |
<address>10.0.0.102</address>
|
|
|
1913 |
</server>
|
|
|
1914 |
<server name="kalahari" osname="linux" osversion="2.0.34">
|
|
|
1915 |
<address>10.0.0.103</address>
|
|
|
1916 |
<address>10.0.1.103</address>
|
|
|
1917 |
</server>
|
|
|
1918 |
</config>
|
|
|
1919 |
|
|
|
1920 |
The following lines of code in B<foo>:
|
|
|
1921 |
|
|
|
1922 |
use XML::Simple qw(:strict);
|
|
|
1923 |
|
|
|
1924 |
my $config = XMLin(undef, KeyAttr => { server => 'name' }, ForceArray => [ 'server', 'address' ]);
|
|
|
1925 |
|
|
|
1926 |
will 'slurp' the configuration options into the hashref $config (because no
|
|
|
1927 |
filename or XML string was passed as the first argument to C<XMLin()> the name
|
|
|
1928 |
and location of the XML file will be inferred from name and location of the
|
|
|
1929 |
script). You can dump out the contents of the hashref using Data::Dumper:
|
|
|
1930 |
|
|
|
1931 |
use Data::Dumper;
|
|
|
1932 |
|
|
|
1933 |
print Dumper($config);
|
|
|
1934 |
|
|
|
1935 |
which will produce something like this (formatting has been adjusted for
|
|
|
1936 |
brevity):
|
|
|
1937 |
|
|
|
1938 |
{
|
|
|
1939 |
'logdir' => '/var/log/foo/',
|
|
|
1940 |
'debugfile' => '/tmp/foo.debug',
|
|
|
1941 |
'server' => {
|
|
|
1942 |
'sahara' => {
|
|
|
1943 |
'osversion' => '2.6',
|
|
|
1944 |
'osname' => 'solaris',
|
|
|
1945 |
'address' => [ '10.0.0.101', '10.0.1.101' ]
|
|
|
1946 |
},
|
|
|
1947 |
'gobi' => {
|
|
|
1948 |
'osversion' => '6.5',
|
|
|
1949 |
'osname' => 'irix',
|
|
|
1950 |
'address' => [ '10.0.0.102' ]
|
|
|
1951 |
},
|
|
|
1952 |
'kalahari' => {
|
|
|
1953 |
'osversion' => '2.0.34',
|
|
|
1954 |
'osname' => 'linux',
|
|
|
1955 |
'address' => [ '10.0.0.103', '10.0.1.103' ]
|
|
|
1956 |
}
|
|
|
1957 |
}
|
|
|
1958 |
}
|
|
|
1959 |
|
|
|
1960 |
Your script could then access the name of the log directory like this:
|
|
|
1961 |
|
|
|
1962 |
print $config->{logdir};
|
|
|
1963 |
|
|
|
1964 |
similarly, the second address on the server 'kalahari' could be referenced as:
|
|
|
1965 |
|
|
|
1966 |
print $config->{server}->{kalahari}->{address}->[1];
|
|
|
1967 |
|
|
|
1968 |
Note: If the mapping between the output of Data::Dumper and the print
|
|
|
1969 |
statements above is not obvious to you, then please refer to the 'references'
|
|
|
1970 |
tutorial (AKA: "Mark's very short tutorial about references") at L<perlreftut>.
|
|
|
1971 |
|
|
|
1972 |
In this example, the C<< ForceArray >> option was used to list elements that
|
|
|
1973 |
might occur multiple times and should therefore be represented as arrayrefs
|
|
|
1974 |
(even when only one element is present).
|
|
|
1975 |
|
|
|
1976 |
The C<< KeyAttr >> option was used to indicate that each C<< <server> >>
|
|
|
1977 |
element has a unique identifier in the C<< name >> attribute. This allows you
|
|
|
1978 |
to index directly to a particular server record using the name as a hash key
|
|
|
1979 |
(as shown above).
|
|
|
1980 |
|
|
|
1981 |
For simple requirements, that's really all there is to it. If you want to
|
|
|
1982 |
store your XML in a different directory or file, or pass it in as a string or
|
|
|
1983 |
even pass it in via some derivative of an IO::Handle, you'll need to check out
|
|
|
1984 |
L<"OPTIONS">. If you want to turn off or tweak the array folding feature (that
|
|
|
1985 |
neat little transformation that produced $config->{server}) you'll find options
|
|
|
1986 |
for that as well.
|
|
|
1987 |
|
|
|
1988 |
If you want to generate XML (for example to write a modified version of
|
|
|
1989 |
$config back out as XML), check out C<XMLout()>.
|
|
|
1990 |
|
|
|
1991 |
If your needs are not so simple, this may not be the module for you. In that
|
|
|
1992 |
case, you might want to read L<"WHERE TO FROM HERE?">.
|
|
|
1993 |
|
|
|
1994 |
=head1 DESCRIPTION
|
|
|
1995 |
|
|
|
1996 |
The XML::Simple module provides a simple API layer on top of an underlying XML
|
|
|
1997 |
parsing module (either XML::Parser or one of the SAX2 parser modules). Two
|
|
|
1998 |
functions are exported: C<XMLin()> and C<XMLout()>. Note: you can explicity
|
|
|
1999 |
request the lower case versions of the function names: C<xml_in()> and
|
|
|
2000 |
C<xml_out()>.
|
|
|
2001 |
|
|
|
2002 |
The simplest approach is to call these two functions directly, but an
|
|
|
2003 |
optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below)
|
|
|
2004 |
allows them to be called as methods of an B<XML::Simple> object. The object
|
|
|
2005 |
interface can also be used at either end of a SAX pipeline.
|
|
|
2006 |
|
|
|
2007 |
=head2 XMLin()
|
|
|
2008 |
|
|
|
2009 |
Parses XML formatted data and returns a reference to a data structure which
|
|
|
2010 |
contains the same information in a more readily accessible form. (Skip
|
|
|
2011 |
down to L<"EXAMPLES"> below, for more sample code).
|
|
|
2012 |
|
|
|
2013 |
C<XMLin()> accepts an optional XML specifier followed by zero or more 'name =>
|
|
|
2014 |
value' option pairs. The XML specifier can be one of the following:
|
|
|
2015 |
|
|
|
2016 |
=over 4
|
|
|
2017 |
|
|
|
2018 |
=item A filename
|
|
|
2019 |
|
|
|
2020 |
If the filename contains no directory components C<XMLin()> will look for the
|
|
|
2021 |
file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the
|
|
|
2022 |
current directory if the SearchPath option is not defined. eg:
|
|
|
2023 |
|
|
|
2024 |
$ref = XMLin('/etc/params.xml');
|
|
|
2025 |
|
|
|
2026 |
Note, the filename '-' can be used to parse from STDIN.
|
|
|
2027 |
|
|
|
2028 |
=item undef
|
|
|
2029 |
|
|
|
2030 |
If there is no XML specifier, C<XMLin()> will check the script directory and
|
|
|
2031 |
each of the SearchPath directories for a file with the same name as the script
|
|
|
2032 |
but with the extension '.xml'. Note: if you wish to specify options, you
|
|
|
2033 |
must specify the value 'undef'. eg:
|
|
|
2034 |
|
|
|
2035 |
$ref = XMLin(undef, ForceArray => 1);
|
|
|
2036 |
|
|
|
2037 |
=item A string of XML
|
|
|
2038 |
|
|
|
2039 |
A string containing XML (recognised by the presence of '<' and '>' characters)
|
|
|
2040 |
will be parsed directly. eg:
|
|
|
2041 |
|
|
|
2042 |
$ref = XMLin('<opt username="bob" password="flurp" />');
|
|
|
2043 |
|
|
|
2044 |
=item An IO::Handle object
|
|
|
2045 |
|
|
|
2046 |
An IO::Handle object will be read to EOF and its contents parsed. eg:
|
|
|
2047 |
|
|
|
2048 |
$fh = IO::File->new('/etc/params.xml');
|
|
|
2049 |
$ref = XMLin($fh);
|
|
|
2050 |
|
|
|
2051 |
=back
|
|
|
2052 |
|
|
|
2053 |
=head2 XMLout()
|
|
|
2054 |
|
|
|
2055 |
Takes a data structure (generally a hashref) and returns an XML encoding of
|
|
|
2056 |
that structure. If the resulting XML is parsed using C<XMLin()>, it should
|
|
|
2057 |
return a data structure equivalent to the original (see caveats below).
|
|
|
2058 |
|
|
|
2059 |
The C<XMLout()> function can also be used to output the XML as SAX events
|
|
|
2060 |
see the C<Handler> option and L<"SAX SUPPORT"> for more details).
|
|
|
2061 |
|
|
|
2062 |
When translating hashes to XML, hash keys which have a leading '-' will be
|
|
|
2063 |
silently skipped. This is the approved method for marking elements of a
|
|
|
2064 |
data structure which should be ignored by C<XMLout>. (Note: If these items
|
|
|
2065 |
were not skipped the key names would be emitted as element or attribute names
|
|
|
2066 |
with a leading '-' which would not be valid XML).
|
|
|
2067 |
|
|
|
2068 |
=head2 Caveats
|
|
|
2069 |
|
|
|
2070 |
Some care is required in creating data structures which will be passed to
|
|
|
2071 |
C<XMLout()>. Hash keys from the data structure will be encoded as either XML
|
|
|
2072 |
element names or attribute names. Therefore, you should use hash key names
|
|
|
2073 |
which conform to the relatively strict XML naming rules:
|
|
|
2074 |
|
|
|
2075 |
Names in XML must begin with a letter. The remaining characters may be
|
|
|
2076 |
letters, digits, hyphens (-), underscores (_) or full stops (.). It is also
|
|
|
2077 |
allowable to include one colon (:) in an element name but this should only be
|
|
|
2078 |
used when working with namespaces (B<XML::Simple> can only usefully work with
|
|
|
2079 |
namespaces when teamed with a SAX Parser).
|
|
|
2080 |
|
|
|
2081 |
You can use other punctuation characters in hash values (just not in hash
|
|
|
2082 |
keys) however B<XML::Simple> does not support dumping binary data.
|
|
|
2083 |
|
|
|
2084 |
If you break these rules, the current implementation of C<XMLout()> will
|
|
|
2085 |
simply emit non-compliant XML which will be rejected if you try to read it
|
|
|
2086 |
back in. (A later version of B<XML::Simple> might take a more proactive
|
|
|
2087 |
approach).
|
|
|
2088 |
|
|
|
2089 |
Note also that although you can nest hashes and arrays to arbitrary levels,
|
|
|
2090 |
circular data structures are not supported and will cause C<XMLout()> to die.
|
|
|
2091 |
|
|
|
2092 |
If you wish to 'round-trip' arbitrary data structures from Perl to XML and back
|
|
|
2093 |
to Perl, then you should probably disable array folding (using the KeyAttr
|
|
|
2094 |
option) both with C<XMLout()> and with C<XMLin()>. If you still don't get the
|
|
|
2095 |
expected results, you may prefer to use L<XML::Dumper> which is designed for
|
|
|
2096 |
exactly that purpose.
|
|
|
2097 |
|
|
|
2098 |
Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs.
|
|
|
2099 |
|
|
|
2100 |
|
|
|
2101 |
=head1 OPTIONS
|
|
|
2102 |
|
|
|
2103 |
B<XML::Simple> supports a number of options (in fact as each release of
|
|
|
2104 |
B<XML::Simple> adds more options, the module's claim to the name 'Simple'
|
|
|
2105 |
becomes increasingly tenuous). If you find yourself repeatedly having to
|
|
|
2106 |
specify the same options, you might like to investigate L<"OPTIONAL OO
|
|
|
2107 |
INTERFACE"> below.
|
|
|
2108 |
|
|
|
2109 |
If you can't be bothered reading the documentation, refer to
|
|
|
2110 |
L<"STRICT MODE"> to automatically catch common mistakes.
|
|
|
2111 |
|
|
|
2112 |
Because there are so many options, it's hard for new users to know which ones
|
|
|
2113 |
are important, so here are the two you really need to know about:
|
|
|
2114 |
|
|
|
2115 |
=over 4
|
|
|
2116 |
|
|
|
2117 |
=item *
|
|
|
2118 |
|
|
|
2119 |
check out C<ForceArray> because you'll almost certainly want to turn it on
|
|
|
2120 |
|
|
|
2121 |
=item *
|
|
|
2122 |
|
|
|
2123 |
make sure you know what the C<KeyAttr> option does and what its default value is
|
|
|
2124 |
because it may surprise you otherwise (note in particular that 'KeyAttr'
|
|
|
2125 |
affects both C<XMLin> and C<XMLout>)
|
|
|
2126 |
|
|
|
2127 |
=back
|
|
|
2128 |
|
|
|
2129 |
The option name headings below have a trailing 'comment' - a hash followed by
|
|
|
2130 |
two pieces of metadata:
|
|
|
2131 |
|
|
|
2132 |
=over 4
|
|
|
2133 |
|
|
|
2134 |
=item *
|
|
|
2135 |
|
|
|
2136 |
Options are marked with 'I<in>' if they are recognised by C<XMLin()> and
|
|
|
2137 |
'I<out>' if they are recognised by C<XMLout()>.
|
|
|
2138 |
|
|
|
2139 |
=item *
|
|
|
2140 |
|
|
|
2141 |
Each option is also flagged to indicate whether it is:
|
|
|
2142 |
|
|
|
2143 |
'important' - don't use the module until you understand this one
|
|
|
2144 |
'handy' - you can skip this on the first time through
|
|
|
2145 |
'advanced' - you can skip this on the second time through
|
|
|
2146 |
'SAX only' - don't worry about this unless you're using SAX (or
|
|
|
2147 |
alternatively if you need this, you also need SAX)
|
|
|
2148 |
'seldom used' - you'll probably never use this unless you were the
|
|
|
2149 |
person that requested the feature
|
|
|
2150 |
|
|
|
2151 |
=back
|
|
|
2152 |
|
|
|
2153 |
The options are listed alphabetically:
|
|
|
2154 |
|
|
|
2155 |
Note: option names are no longer case sensitive so you can use the mixed case
|
|
|
2156 |
versions shown here; all lower case as required by versions 2.03 and earlier;
|
|
|
2157 |
or you can add underscores between the words (eg: key_attr).
|
|
|
2158 |
|
|
|
2159 |
|
|
|
2160 |
=head2 AttrIndent => 1 I<# out - handy>
|
|
|
2161 |
|
|
|
2162 |
When you are using C<XMLout()>, enable this option to have attributes printed
|
|
|
2163 |
one-per-line with sensible indentation rather than all on one line.
|
|
|
2164 |
|
|
|
2165 |
=head2 Cache => [ cache schemes ] I<# in - advanced>
|
|
|
2166 |
|
|
|
2167 |
Because loading the B<XML::Parser> module and parsing an XML file can consume a
|
|
|
2168 |
significant number of CPU cycles, it is often desirable to cache the output of
|
|
|
2169 |
C<XMLin()> for later reuse.
|
|
|
2170 |
|
|
|
2171 |
When parsing from a named file, B<XML::Simple> supports a number of caching
|
|
|
2172 |
schemes. The 'Cache' option may be used to specify one or more schemes (using
|
|
|
2173 |
an anonymous array). Each scheme will be tried in turn in the hope of finding
|
|
|
2174 |
a cached pre-parsed representation of the XML file. If no cached copy is
|
|
|
2175 |
found, the file will be parsed and the first cache scheme in the list will be
|
|
|
2176 |
used to save a copy of the results. The following cache schemes have been
|
|
|
2177 |
implemented:
|
|
|
2178 |
|
|
|
2179 |
=over 4
|
|
|
2180 |
|
|
|
2181 |
=item storable
|
|
|
2182 |
|
|
|
2183 |
Utilises B<Storable.pm> to read/write a cache file with the same name as the
|
|
|
2184 |
XML file but with the extension .stor
|
|
|
2185 |
|
|
|
2186 |
=item memshare
|
|
|
2187 |
|
|
|
2188 |
When a file is first parsed, a copy of the resulting data structure is retained
|
|
|
2189 |
in memory in the B<XML::Simple> module's namespace. Subsequent calls to parse
|
|
|
2190 |
the same file will return a reference to this structure. This cached version
|
|
|
2191 |
will persist only for the life of the Perl interpreter (which in the case of
|
|
|
2192 |
mod_perl for example, may be some significant time).
|
|
|
2193 |
|
|
|
2194 |
Because each caller receives a reference to the same data structure, a change
|
|
|
2195 |
made by one caller will be visible to all. For this reason, the reference
|
|
|
2196 |
returned should be treated as read-only.
|
|
|
2197 |
|
|
|
2198 |
=item memcopy
|
|
|
2199 |
|
|
|
2200 |
This scheme works identically to 'memshare' (above) except that each caller
|
|
|
2201 |
receives a reference to a new data structure which is a copy of the cached
|
|
|
2202 |
version. Copying the data structure will add a little processing overhead,
|
|
|
2203 |
therefore this scheme should only be used where the caller intends to modify
|
|
|
2204 |
the data structure (or wishes to protect itself from others who might). This
|
|
|
2205 |
scheme uses B<Storable.pm> to perform the copy.
|
|
|
2206 |
|
|
|
2207 |
=back
|
|
|
2208 |
|
|
|
2209 |
Warning! The memory-based caching schemes compare the timestamp on the file to
|
|
|
2210 |
the time when it was last parsed. If the file is stored on an NFS filesystem
|
|
|
2211 |
(or other network share) and the clock on the file server is not exactly
|
|
|
2212 |
synchronised with the clock where your script is run, updates to the source XML
|
|
|
2213 |
file may appear to be ignored.
|
|
|
2214 |
|
|
|
2215 |
=head2 ContentKey => 'keyname' I<# in+out - seldom used>
|
|
|
2216 |
|
|
|
2217 |
When text content is parsed to a hash value, this option let's you specify a
|
|
|
2218 |
name for the hash key to override the default 'content'. So for example:
|
|
|
2219 |
|
|
|
2220 |
XMLin('<opt one="1">Text</opt>', ContentKey => 'text')
|
|
|
2221 |
|
|
|
2222 |
will parse to:
|
|
|
2223 |
|
|
|
2224 |
{ 'one' => 1, 'text' => 'Text' }
|
|
|
2225 |
|
|
|
2226 |
instead of:
|
|
|
2227 |
|
|
|
2228 |
{ 'one' => 1, 'content' => 'Text' }
|
|
|
2229 |
|
|
|
2230 |
C<XMLout()> will also honour the value of this option when converting a hashref
|
|
|
2231 |
to XML.
|
|
|
2232 |
|
|
|
2233 |
You can also prefix your selected key name with a '-' character to have
|
|
|
2234 |
C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after
|
|
|
2235 |
array folding. For example:
|
|
|
2236 |
|
|
|
2237 |
XMLin(
|
|
|
2238 |
'<opt><item name="one">First</item><item name="two">Second</item></opt>',
|
|
|
2239 |
KeyAttr => {item => 'name'},
|
|
|
2240 |
ForceArray => [ 'item' ],
|
|
|
2241 |
ContentKey => '-content'
|
|
|
2242 |
)
|
|
|
2243 |
|
|
|
2244 |
will parse to:
|
|
|
2245 |
|
|
|
2246 |
{
|
|
|
2247 |
'item' => {
|
|
|
2248 |
'one' => 'First'
|
|
|
2249 |
'two' => 'Second'
|
|
|
2250 |
}
|
|
|
2251 |
}
|
|
|
2252 |
|
|
|
2253 |
rather than this (without the '-'):
|
|
|
2254 |
|
|
|
2255 |
{
|
|
|
2256 |
'item' => {
|
|
|
2257 |
'one' => { 'content' => 'First' }
|
|
|
2258 |
'two' => { 'content' => 'Second' }
|
|
|
2259 |
}
|
|
|
2260 |
}
|
|
|
2261 |
|
|
|
2262 |
=head2 DataHandler => code_ref I<# in - SAX only>
|
|
|
2263 |
|
|
|
2264 |
When you use an B<XML::Simple> object as a SAX handler, it will return a
|
|
|
2265 |
'simple tree' data structure in the same format as C<XMLin()> would return. If
|
|
|
2266 |
this option is set (to a subroutine reference), then when the tree is built the
|
|
|
2267 |
subroutine will be called and passed two arguments: a reference to the
|
|
|
2268 |
B<XML::Simple> object and a reference to the data tree. The return value from
|
|
|
2269 |
the subroutine will be returned to the SAX driver. (See L<"SAX SUPPORT"> for
|
|
|
2270 |
more details).
|
|
|
2271 |
|
|
|
2272 |
=head2 ForceArray => 1 I<# in - important>
|
|
|
2273 |
|
|
|
2274 |
This option should be set to '1' to force nested elements to be represented
|
|
|
2275 |
as arrays even when there is only one. Eg, with ForceArray enabled, this
|
|
|
2276 |
XML:
|
|
|
2277 |
|
|
|
2278 |
<opt>
|
|
|
2279 |
<name>value</name>
|
|
|
2280 |
</opt>
|
|
|
2281 |
|
|
|
2282 |
would parse to this:
|
|
|
2283 |
|
|
|
2284 |
{
|
|
|
2285 |
'name' => [
|
|
|
2286 |
'value'
|
|
|
2287 |
]
|
|
|
2288 |
}
|
|
|
2289 |
|
|
|
2290 |
instead of this (the default):
|
|
|
2291 |
|
|
|
2292 |
{
|
|
|
2293 |
'name' => 'value'
|
|
|
2294 |
}
|
|
|
2295 |
|
|
|
2296 |
This option is especially useful if the data structure is likely to be written
|
|
|
2297 |
back out as XML and the default behaviour of rolling single nested elements up
|
|
|
2298 |
into attributes is not desirable.
|
|
|
2299 |
|
|
|
2300 |
If you are using the array folding feature, you should almost certainly enable
|
|
|
2301 |
this option. If you do not, single nested elements will not be parsed to
|
|
|
2302 |
arrays and therefore will not be candidates for folding to a hash. (Given that
|
|
|
2303 |
the default value of 'KeyAttr' enables array folding, the default value of this
|
|
|
2304 |
option should probably also have been enabled too - sorry).
|
|
|
2305 |
|
|
|
2306 |
=head2 ForceArray => [ names ] I<# in - important>
|
|
|
2307 |
|
|
|
2308 |
This alternative (and preferred) form of the 'ForceArray' option allows you to
|
|
|
2309 |
specify a list of element names which should always be forced into an array
|
|
|
2310 |
representation, rather than the 'all or nothing' approach above.
|
|
|
2311 |
|
|
|
2312 |
It is also possible (since version 2.05) to include compiled regular
|
|
|
2313 |
expressions in the list - any element names which match the pattern will be
|
|
|
2314 |
forced to arrays. If the list contains only a single regex, then it is not
|
|
|
2315 |
necessary to enclose it in an arrayref. Eg:
|
|
|
2316 |
|
|
|
2317 |
ForceArray => qr/_list$/
|
|
|
2318 |
|
|
|
2319 |
=head2 ForceContent => 1 I<# in - seldom used>
|
|
|
2320 |
|
|
|
2321 |
When C<XMLin()> parses elements which have text content as well as attributes,
|
|
|
2322 |
the text content must be represented as a hash value rather than a simple
|
|
|
2323 |
scalar. This option allows you to force text content to always parse to
|
|
|
2324 |
a hash value even when there are no attributes. So for example:
|
|
|
2325 |
|
|
|
2326 |
XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1)
|
|
|
2327 |
|
|
|
2328 |
will parse to:
|
|
|
2329 |
|
|
|
2330 |
{
|
|
|
2331 |
'x' => { 'content' => 'text1' },
|
|
|
2332 |
'y' => { 'a' => 2, 'content' => 'text2' }
|
|
|
2333 |
}
|
|
|
2334 |
|
|
|
2335 |
instead of:
|
|
|
2336 |
|
|
|
2337 |
{
|
|
|
2338 |
'x' => 'text1',
|
|
|
2339 |
'y' => { 'a' => 2, 'content' => 'text2' }
|
|
|
2340 |
}
|
|
|
2341 |
|
|
|
2342 |
=head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy>
|
|
|
2343 |
|
|
|
2344 |
You can use this option to eliminate extra levels of indirection in your Perl
|
|
|
2345 |
data structure. For example this XML:
|
|
|
2346 |
|
|
|
2347 |
<opt>
|
|
|
2348 |
<searchpath>
|
|
|
2349 |
<dir>/usr/bin</dir>
|
|
|
2350 |
<dir>/usr/local/bin</dir>
|
|
|
2351 |
<dir>/usr/X11/bin</dir>
|
|
|
2352 |
</searchpath>
|
|
|
2353 |
</opt>
|
|
|
2354 |
|
|
|
2355 |
Would normally be read into a structure like this:
|
|
|
2356 |
|
|
|
2357 |
{
|
|
|
2358 |
searchpath => {
|
|
|
2359 |
dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
|
|
|
2360 |
}
|
|
|
2361 |
}
|
|
|
2362 |
|
|
|
2363 |
But when read in with the appropriate value for 'GroupTags':
|
|
|
2364 |
|
|
|
2365 |
my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' });
|
|
|
2366 |
|
|
|
2367 |
It will return this simpler structure:
|
|
|
2368 |
|
|
|
2369 |
{
|
|
|
2370 |
searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
|
|
|
2371 |
}
|
|
|
2372 |
|
|
|
2373 |
The grouping element (C<< <searchpath> >> in the example) must not contain any
|
|
|
2374 |
attributes or elements other than the grouped element.
|
|
|
2375 |
|
|
|
2376 |
You can specify multiple 'grouping element' to 'grouped element' mappings in
|
|
|
2377 |
the same hashref. If this option is combined with C<KeyAttr>, the array
|
|
|
2378 |
folding will occur first and then the grouped element names will be eliminated.
|
|
|
2379 |
|
|
|
2380 |
C<XMLout> will also use the grouptag mappings to re-introduce the tags around
|
|
|
2381 |
the grouped elements. Beware though that this will occur in all places that
|
|
|
2382 |
the 'grouping tag' name occurs - you probably don't want to use the same name
|
|
|
2383 |
for elements as well as attributes.
|
|
|
2384 |
|
|
|
2385 |
=head2 Handler => object_ref I<# out - SAX only>
|
|
|
2386 |
|
|
|
2387 |
Use the 'Handler' option to have C<XMLout()> generate SAX events rather than
|
|
|
2388 |
returning a string of XML. For more details see L<"SAX SUPPORT"> below.
|
|
|
2389 |
|
|
|
2390 |
Note: the current implementation of this option generates a string of XML
|
|
|
2391 |
and uses a SAX parser to translate it into SAX events. The normal encoding
|
|
|
2392 |
rules apply here - your data must be UTF8 encoded unless you specify an
|
|
|
2393 |
alternative encoding via the 'XMLDecl' option; and by the time the data reaches
|
|
|
2394 |
the handler object, it will be in UTF8 form regardless of the encoding you
|
|
|
2395 |
supply. A future implementation of this option may generate the events
|
|
|
2396 |
directly.
|
|
|
2397 |
|
|
|
2398 |
=head2 KeepRoot => 1 I<# in+out - handy>
|
|
|
2399 |
|
|
|
2400 |
In its attempt to return a data structure free of superfluous detail and
|
|
|
2401 |
unnecessary levels of indirection, C<XMLin()> normally discards the root
|
|
|
2402 |
element name. Setting the 'KeepRoot' option to '1' will cause the root element
|
|
|
2403 |
name to be retained. So after executing this code:
|
|
|
2404 |
|
|
|
2405 |
$config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1)
|
|
|
2406 |
|
|
|
2407 |
You'll be able to reference the tempdir as
|
|
|
2408 |
C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default
|
|
|
2409 |
C<$config-E<gt>{tempdir}>.
|
|
|
2410 |
|
|
|
2411 |
Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the
|
|
|
2412 |
data structure already contains a root element name and it is not necessary to
|
|
|
2413 |
add another.
|
|
|
2414 |
|
|
|
2415 |
=head2 KeyAttr => [ list ] I<# in+out - important>
|
|
|
2416 |
|
|
|
2417 |
This option controls the 'array folding' feature which translates nested
|
|
|
2418 |
elements from an array to a hash. It also controls the 'unfolding' of hashes
|
|
|
2419 |
to arrays.
|
|
|
2420 |
|
|
|
2421 |
For example, this XML:
|
|
|
2422 |
|
|
|
2423 |
<opt>
|
|
|
2424 |
<user login="grep" fullname="Gary R Epstein" />
|
|
|
2425 |
<user login="stty" fullname="Simon T Tyson" />
|
|
|
2426 |
</opt>
|
|
|
2427 |
|
|
|
2428 |
would, by default, parse to this:
|
|
|
2429 |
|
|
|
2430 |
{
|
|
|
2431 |
'user' => [
|
|
|
2432 |
{
|
|
|
2433 |
'login' => 'grep',
|
|
|
2434 |
'fullname' => 'Gary R Epstein'
|
|
|
2435 |
},
|
|
|
2436 |
{
|
|
|
2437 |
'login' => 'stty',
|
|
|
2438 |
'fullname' => 'Simon T Tyson'
|
|
|
2439 |
}
|
|
|
2440 |
]
|
|
|
2441 |
}
|
|
|
2442 |
|
|
|
2443 |
If the option 'KeyAttr => "login"' were used to specify that the 'login'
|
|
|
2444 |
attribute is a key, the same XML would parse to:
|
|
|
2445 |
|
|
|
2446 |
{
|
|
|
2447 |
'user' => {
|
|
|
2448 |
'stty' => {
|
|
|
2449 |
'fullname' => 'Simon T Tyson'
|
|
|
2450 |
},
|
|
|
2451 |
'grep' => {
|
|
|
2452 |
'fullname' => 'Gary R Epstein'
|
|
|
2453 |
}
|
|
|
2454 |
}
|
|
|
2455 |
}
|
|
|
2456 |
|
|
|
2457 |
The key attribute names should be supplied in an arrayref if there is more
|
|
|
2458 |
than one. C<XMLin()> will attempt to match attribute names in the order
|
|
|
2459 |
supplied. C<XMLout()> will use the first attribute name supplied when
|
|
|
2460 |
'unfolding' a hash into an array.
|
|
|
2461 |
|
|
|
2462 |
Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do
|
|
|
2463 |
not want folding on input or unfolding on output you must set this option
|
|
|
2464 |
to an empty list to disable the feature.
|
|
|
2465 |
|
|
|
2466 |
Note 2: If you wish to use this option, you should also enable the
|
|
|
2467 |
C<ForceArray> option. Without 'ForceArray', a single nested element will be
|
|
|
2468 |
rolled up into a scalar rather than an array and therefore will not be folded
|
|
|
2469 |
(since only arrays get folded).
|
|
|
2470 |
|
|
|
2471 |
=head2 KeyAttr => { list } I<# in+out - important>
|
|
|
2472 |
|
|
|
2473 |
This alternative (and preferred) method of specifiying the key attributes
|
|
|
2474 |
allows more fine grained control over which elements are folded and on which
|
|
|
2475 |
attributes. For example the option 'KeyAttr => { package => 'id' } will cause
|
|
|
2476 |
any package elements to be folded on the 'id' attribute. No other elements
|
|
|
2477 |
which have an 'id' attribute will be folded at all.
|
|
|
2478 |
|
|
|
2479 |
Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">)
|
|
|
2480 |
if this syntax is used and an element which does not have the specified key
|
|
|
2481 |
attribute is encountered (eg: a 'package' element without an 'id' attribute, to
|
|
|
2482 |
use the example above). Warnings will only be generated if B<-w> is in force.
|
|
|
2483 |
|
|
|
2484 |
Two further variations are made possible by prefixing a '+' or a '-' character
|
|
|
2485 |
to the attribute name:
|
|
|
2486 |
|
|
|
2487 |
The option 'KeyAttr => { user => "+login" }' will cause this XML:
|
|
|
2488 |
|
|
|
2489 |
<opt>
|
|
|
2490 |
<user login="grep" fullname="Gary R Epstein" />
|
|
|
2491 |
<user login="stty" fullname="Simon T Tyson" />
|
|
|
2492 |
</opt>
|
|
|
2493 |
|
|
|
2494 |
to parse to this data structure:
|
|
|
2495 |
|
|
|
2496 |
{
|
|
|
2497 |
'user' => {
|
|
|
2498 |
'stty' => {
|
|
|
2499 |
'fullname' => 'Simon T Tyson',
|
|
|
2500 |
'login' => 'stty'
|
|
|
2501 |
},
|
|
|
2502 |
'grep' => {
|
|
|
2503 |
'fullname' => 'Gary R Epstein',
|
|
|
2504 |
'login' => 'grep'
|
|
|
2505 |
}
|
|
|
2506 |
}
|
|
|
2507 |
}
|
|
|
2508 |
|
|
|
2509 |
The '+' indicates that the value of the key attribute should be copied rather
|
|
|
2510 |
than moved to the folded hash key.
|
|
|
2511 |
|
|
|
2512 |
A '-' prefix would produce this result:
|
|
|
2513 |
|
|
|
2514 |
{
|
|
|
2515 |
'user' => {
|
|
|
2516 |
'stty' => {
|
|
|
2517 |
'fullname' => 'Simon T Tyson',
|
|
|
2518 |
'-login' => 'stty'
|
|
|
2519 |
},
|
|
|
2520 |
'grep' => {
|
|
|
2521 |
'fullname' => 'Gary R Epstein',
|
|
|
2522 |
'-login' => 'grep'
|
|
|
2523 |
}
|
|
|
2524 |
}
|
|
|
2525 |
}
|
|
|
2526 |
|
|
|
2527 |
As described earlier, C<XMLout> will ignore hash keys starting with a '-'.
|
|
|
2528 |
|
|
|
2529 |
=head2 NoAttr => 1 I<# in+out - handy>
|
|
|
2530 |
|
|
|
2531 |
When used with C<XMLout()>, the generated XML will contain no attributes.
|
|
|
2532 |
All hash key/values will be represented as nested elements instead.
|
|
|
2533 |
|
|
|
2534 |
When used with C<XMLin()>, any attributes in the XML will be ignored.
|
|
|
2535 |
|
|
|
2536 |
=head2 NoEscape => 1 I<# out - seldom used>
|
|
|
2537 |
|
|
|
2538 |
By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and
|
|
|
2539 |
'"' to '<', '>', '&' and '"' respectively. Use this option to
|
|
|
2540 |
suppress escaping (presumably because you've already escaped the data in some
|
|
|
2541 |
more sophisticated manner).
|
|
|
2542 |
|
|
|
2543 |
=head2 NoIndent => 1 I<# out - seldom used>
|
|
|
2544 |
|
|
|
2545 |
Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode.
|
|
|
2546 |
With this option enabled, the XML output will all be on one line (unless there
|
|
|
2547 |
are newlines in the data) - this may be easier for downstream processing.
|
|
|
2548 |
|
|
|
2549 |
=head2 NoSort => 1 I<# out - seldom used>
|
|
|
2550 |
|
|
|
2551 |
Newer versions of XML::Simple sort elements and attributes alphabetically (*),
|
|
|
2552 |
by default. Enable this option to suppress the sorting - possibly for
|
|
|
2553 |
backwards compatibility.
|
|
|
2554 |
|
|
|
2555 |
* Actually, sorting is alphabetical but 'key' attribute or element names (as in
|
|
|
2556 |
'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements
|
|
|
2557 |
are sorted alphabetically by the value of the key field.
|
|
|
2558 |
|
|
|
2559 |
=head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy>
|
|
|
2560 |
|
|
|
2561 |
This option controls how whitespace in text content is handled. Recognised
|
|
|
2562 |
values for the option are:
|
|
|
2563 |
|
|
|
2564 |
=over 4
|
|
|
2565 |
|
|
|
2566 |
=item *
|
|
|
2567 |
|
|
|
2568 |
|
|
|
2569 |
normalisation of whitespace in attribute values which is mandated by the XML
|
|
|
2570 |
recommendation)
|
|
|
2571 |
|
|
|
2572 |
=item *
|
|
|
2573 |
|
|
|
2574 |
1 = whitespace is normalised in any value used as a hash key (normalising means
|
|
|
2575 |
removing leading and trailing whitespace and collapsing sequences of whitespace
|
|
|
2576 |
characters to a single space)
|
|
|
2577 |
|
|
|
2578 |
=item *
|
|
|
2579 |
|
|
|
2580 |
2 = whitespace is normalised in all text content
|
|
|
2581 |
|
|
|
2582 |
=back
|
|
|
2583 |
|
|
|
2584 |
Note: you can spell this option with a 'z' if that is more natural for you.
|
|
|
2585 |
|
|
|
2586 |
=head2 NSExpand => 1 I<# in+out handy - SAX only>
|
|
|
2587 |
|
|
|
2588 |
This option controls namespace expansion - the translation of element and
|
|
|
2589 |
attribute names of the form 'prefix:name' to '{uri}name'. For example the
|
|
|
2590 |
element name 'xsl:template' might be expanded to:
|
|
|
2591 |
'{http://www.w3.org/1999/XSL/Transform}template'.
|
|
|
2592 |
|
|
|
2593 |
By default, C<XMLin()> will return element names and attribute names exactly as
|
|
|
2594 |
they appear in the XML. Setting this option to 1 will cause all element and
|
|
|
2595 |
attribute names to be expanded to include their namespace prefix.
|
|
|
2596 |
|
|
|
2597 |
I<Note: You must be using a SAX parser for this option to work (ie: it does not
|
|
|
2598 |
work with XML::Parser)>.
|
|
|
2599 |
|
|
|
2600 |
This option also controls whether C<XMLout()> performs the reverse translation
|
|
|
2601 |
from '{uri}name' back to 'prefix:name'. The default is no translation. If
|
|
|
2602 |
your data contains expanded names, you should set this option to 1 otherwise
|
|
|
2603 |
C<XMLout> will emit XML which is not well formed.
|
|
|
2604 |
|
|
|
2605 |
I<Note: You must have the XML::NamespaceSupport module installed if you want
|
|
|
2606 |
C<XMLout()> to translate URIs back to prefixes>.
|
|
|
2607 |
|
|
|
2608 |
=head2 NumericEscape => 0 | 1 | 2 I<# out - handy>
|
|
|
2609 |
|
|
|
2610 |
Use this option to have 'high' (non-ASCII) characters in your Perl data
|
|
|
2611 |
structure converted to numeric entities (eg: €) in the XML output. Three
|
|
|
2612 |
levels are possible:
|
|
|
2613 |
|
|
|
2614 |
|
|
|
2615 |
|
|
|
2616 |
1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output
|
|
|
2617 |
|
|
|
2618 |
2 - all characters above 0x7F are escaped (good for plain ASCII output)
|
|
|
2619 |
|
|
|
2620 |
=head2 OutputFile => <file specifier> I<# out - handy>
|
|
|
2621 |
|
|
|
2622 |
The default behaviour of C<XMLout()> is to return the XML as a string. If you
|
|
|
2623 |
wish to write the XML to a file, simply supply the filename using the
|
|
|
2624 |
'OutputFile' option.
|
|
|
2625 |
|
|
|
2626 |
This option also accepts an IO handle object - especially useful in Perl 5.8.0
|
|
|
2627 |
and later for output using an encoding other than UTF-8, eg:
|
|
|
2628 |
|
|
|
2629 |
open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!";
|
|
|
2630 |
XMLout($ref, OutputFile => $fh);
|
|
|
2631 |
|
|
|
2632 |
Note, XML::Simple does not require that the object you pass in to the
|
|
|
2633 |
OutputFile option inherits from L<IO::Handle> - it simply assumes the object
|
|
|
2634 |
supports a C<print> method.
|
|
|
2635 |
|
|
|
2636 |
=head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this>
|
|
|
2637 |
|
|
|
2638 |
I<Note: This option is now officially deprecated. If you find it useful, email
|
|
|
2639 |
the author with an example of what you use it for. Do not use this option to
|
|
|
2640 |
set the ProtocolEncoding, that's just plain wrong - fix the XML>.
|
|
|
2641 |
|
|
|
2642 |
This option allows you to pass parameters to the constructor of the underlying
|
|
|
2643 |
XML::Parser object (which of course assumes you're not using SAX).
|
|
|
2644 |
|
|
|
2645 |
=head2 RootName => 'string' I<# out - handy>
|
|
|
2646 |
|
|
|
2647 |
By default, when C<XMLout()> generates XML, the root element will be named
|
|
|
2648 |
'opt'. This option allows you to specify an alternative name.
|
|
|
2649 |
|
|
|
2650 |
Specifying either undef or the empty string for the RootName option will
|
|
|
2651 |
produce XML with no root elements. In most cases the resulting XML fragment
|
|
|
2652 |
will not be 'well formed' and therefore could not be read back in by C<XMLin()>.
|
|
|
2653 |
Nevertheless, the option has been found to be useful in certain circumstances.
|
|
|
2654 |
|
|
|
2655 |
=head2 SearchPath => [ list ] I<# in - handy>
|
|
|
2656 |
|
|
|
2657 |
If you pass C<XMLin()> a filename, but the filename include no directory
|
|
|
2658 |
component, you can use this option to specify which directories should be
|
|
|
2659 |
searched to locate the file. You might use this option to search first in the
|
|
|
2660 |
user's home directory, then in a global directory such as /etc.
|
|
|
2661 |
|
|
|
2662 |
If a filename is provided to C<XMLin()> but SearchPath is not defined, the
|
|
|
2663 |
file is assumed to be in the current directory.
|
|
|
2664 |
|
|
|
2665 |
If the first parameter to C<XMLin()> is undefined, the default SearchPath
|
|
|
2666 |
will contain only the directory in which the script itself is located.
|
|
|
2667 |
Otherwise the default SearchPath will be empty.
|
|
|
2668 |
|
|
|
2669 |
=head2 StrictMode => 1 | 0 I<# in+out seldom used>
|
|
|
2670 |
|
|
|
2671 |
This option allows you to turn L<STRICT MODE> on or off for a particular call,
|
|
|
2672 |
regardless of whether it was enabled at the time XML::Simple was loaded.
|
|
|
2673 |
|
|
|
2674 |
=head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy>
|
|
|
2675 |
|
|
|
2676 |
This option controls what C<XMLin()> should do with empty elements (no
|
|
|
2677 |
attributes and no content). The default behaviour is to represent them as
|
|
|
2678 |
empty hashes. Setting this option to a true value (eg: 1) will cause empty
|
|
|
2679 |
elements to be skipped altogether. Setting the option to 'undef' or the empty
|
|
|
2680 |
string will cause empty elements to be represented as the undefined value or
|
|
|
2681 |
the empty string respectively. The latter two alternatives are a little
|
|
|
2682 |
easier to test for in your code than a hash with no keys.
|
|
|
2683 |
|
|
|
2684 |
The option also controls what C<XMLout()> does with undefined values. Setting
|
|
|
2685 |
the option to undef causes undefined values to be output as empty elements
|
|
|
2686 |
(rather than empty attributes), it also suppresses the generation of warnings
|
|
|
2687 |
about undefined values. Setting the option to a true value (eg: 1) causes
|
|
|
2688 |
undefined values to be skipped altogether on output.
|
|
|
2689 |
|
|
|
2690 |
=head2 ValueAttr => [ names ] I<# in - handy>
|
|
|
2691 |
|
|
|
2692 |
Use this option to deal elements which always have a single attribute and no
|
|
|
2693 |
content. Eg:
|
|
|
2694 |
|
|
|
2695 |
<opt>
|
|
|
2696 |
<colour value="red" />
|
|
|
2697 |
<size value="XXL" />
|
|
|
2698 |
</opt>
|
|
|
2699 |
|
|
|
2700 |
Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to:
|
|
|
2701 |
|
|
|
2702 |
{
|
|
|
2703 |
colour => 'red',
|
|
|
2704 |
size => 'XXL'
|
|
|
2705 |
}
|
|
|
2706 |
|
|
|
2707 |
instead of this (the default):
|
|
|
2708 |
|
|
|
2709 |
{
|
|
|
2710 |
colour => { value => 'red' },
|
|
|
2711 |
size => { value => 'XXL' }
|
|
|
2712 |
}
|
|
|
2713 |
|
|
|
2714 |
Note: This form of the ValueAttr option is not compatible with C<XMLout()> -
|
|
|
2715 |
since the attribute name is discarded at parse time, the original XML cannot be
|
|
|
2716 |
reconstructed.
|
|
|
2717 |
|
|
|
2718 |
=head2 ValueAttr => { element => attribute, ... } I<# in+out - handy>
|
|
|
2719 |
|
|
|
2720 |
This (preferred) form of the ValueAttr option requires you to specify both
|
|
|
2721 |
the element and the attribute names. This is not only safer, it also allows
|
|
|
2722 |
the original XML to be reconstructed by C<XMLout()>.
|
|
|
2723 |
|
|
|
2724 |
Note: You probably don't want to use this option and the NoAttr option at the
|
|
|
2725 |
same time.
|
|
|
2726 |
|
|
|
2727 |
=head2 Variables => { name => value } I<# in - handy>
|
|
|
2728 |
|
|
|
2729 |
This option allows variables in the XML to be expanded when the file is read.
|
|
|
2730 |
(there is no facility for putting the variable names back if you regenerate
|
|
|
2731 |
XML using C<XMLout>).
|
|
|
2732 |
|
|
|
2733 |
A 'variable' is any text of the form C<${name}> which occurs in an attribute
|
|
|
2734 |
value or in the text content of an element. If 'name' matches a key in the
|
|
|
2735 |
supplied hashref, C<${name}> will be replaced with the corresponding value from
|
|
|
2736 |
the hashref. If no matching key is found, the variable will not be replaced.
|
|
|
2737 |
Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are
|
|
|
2738 |
allowed).
|
|
|
2739 |
|
|
|
2740 |
=head2 VarAttr => 'attr_name' I<# in - handy>
|
|
|
2741 |
|
|
|
2742 |
In addition to the variables defined using C<Variables>, this option allows
|
|
|
2743 |
variables to be defined in the XML. A variable definition consists of an
|
|
|
2744 |
element with an attribute called 'attr_name' (the value of the C<VarAttr>
|
|
|
2745 |
option). The value of the attribute will be used as the variable name and the
|
|
|
2746 |
text content of the element will be used as the value. A variable defined in
|
|
|
2747 |
this way will override a variable defined using the C<Variables> option. For
|
|
|
2748 |
example:
|
|
|
2749 |
|
|
|
2750 |
XMLin( '<opt>
|
|
|
2751 |
<dir name="prefix">/usr/local/apache</dir>
|
|
|
2752 |
<dir name="exec_prefix">${prefix}</dir>
|
|
|
2753 |
<dir name="bindir">${exec_prefix}/bin</dir>
|
|
|
2754 |
</opt>',
|
|
|
2755 |
VarAttr => 'name', ContentKey => '-content'
|
|
|
2756 |
);
|
|
|
2757 |
|
|
|
2758 |
produces the following data structure:
|
|
|
2759 |
|
|
|
2760 |
{
|
|
|
2761 |
dir => {
|
|
|
2762 |
prefix => '/usr/local/apache',
|
|
|
2763 |
exec_prefix => '/usr/local/apache',
|
|
|
2764 |
bindir => '/usr/local/apache/bin',
|
|
|
2765 |
}
|
|
|
2766 |
}
|
|
|
2767 |
|
|
|
2768 |
=head2 XMLDecl => 1 or XMLDecl => 'string' I<# out - handy>
|
|
|
2769 |
|
|
|
2770 |
If you want the output from C<XMLout()> to start with the optional XML
|
|
|
2771 |
declaration, simply set the option to '1'. The default XML declaration is:
|
|
|
2772 |
|
|
|
2773 |
<?xml version='1.0' standalone='yes'?>
|
|
|
2774 |
|
|
|
2775 |
If you want some other string (for example to declare an encoding value), set
|
|
|
2776 |
the value of this option to the complete string you require.
|
|
|
2777 |
|
|
|
2778 |
|
|
|
2779 |
=head1 OPTIONAL OO INTERFACE
|
|
|
2780 |
|
|
|
2781 |
The procedural interface is both simple and convenient however there are a
|
|
|
2782 |
couple of reasons why you might prefer to use the object oriented (OO)
|
|
|
2783 |
interface:
|
|
|
2784 |
|
|
|
2785 |
=over 4
|
|
|
2786 |
|
|
|
2787 |
=item *
|
|
|
2788 |
|
|
|
2789 |
to define a set of default values which should be used on all subsequent calls
|
|
|
2790 |
to C<XMLin()> or C<XMLout()>
|
|
|
2791 |
|
|
|
2792 |
=item *
|
|
|
2793 |
|
|
|
2794 |
to override methods in B<XML::Simple> to provide customised behaviour
|
|
|
2795 |
|
|
|
2796 |
=back
|
|
|
2797 |
|
|
|
2798 |
The default values for the options described above are unlikely to suit
|
|
|
2799 |
everyone. The OO interface allows you to effectively override B<XML::Simple>'s
|
|
|
2800 |
defaults with your preferred values. It works like this:
|
|
|
2801 |
|
|
|
2802 |
First create an XML::Simple parser object with your preferred defaults:
|
|
|
2803 |
|
|
|
2804 |
my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1);
|
|
|
2805 |
|
|
|
2806 |
then call C<XMLin()> or C<XMLout()> as a method of that object:
|
|
|
2807 |
|
|
|
2808 |
my $ref = $xs->XMLin($xml);
|
|
|
2809 |
my $xml = $xs->XMLout($ref);
|
|
|
2810 |
|
|
|
2811 |
You can also specify options when you make the method calls and these values
|
|
|
2812 |
will be merged with the values specified when the object was created. Values
|
|
|
2813 |
specified in a method call take precedence.
|
|
|
2814 |
|
|
|
2815 |
Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be
|
|
|
2816 |
called as C<xml_in()> or C<xml_out()>. The method names are aliased so the
|
|
|
2817 |
only difference is the aesthetics.
|
|
|
2818 |
|
|
|
2819 |
=head2 Parsing Methods
|
|
|
2820 |
|
|
|
2821 |
You can explicitly call one of the following methods rather than rely on the
|
|
|
2822 |
C<xml_in()> method automatically determining whether the target to be parsed is
|
|
|
2823 |
a string, a file or a filehandle:
|
|
|
2824 |
|
|
|
2825 |
=over 4
|
|
|
2826 |
|
|
|
2827 |
=item parse_string(text)
|
|
|
2828 |
|
|
|
2829 |
Works exactly like the C<xml_in()> method but assumes the first argument is
|
|
|
2830 |
a string of XML (or a reference to a scalar containing a string of XML).
|
|
|
2831 |
|
|
|
2832 |
=item parse_file(filename)
|
|
|
2833 |
|
|
|
2834 |
Works exactly like the C<xml_in()> method but assumes the first argument is
|
|
|
2835 |
the name of a file containing XML.
|
|
|
2836 |
|
|
|
2837 |
=item parse_fh(file_handle)
|
|
|
2838 |
|
|
|
2839 |
Works exactly like the C<xml_in()> method but assumes the first argument is
|
|
|
2840 |
a filehandle which can be read to get XML.
|
|
|
2841 |
|
|
|
2842 |
=back
|
|
|
2843 |
|
|
|
2844 |
=head2 Hook Methods
|
|
|
2845 |
|
|
|
2846 |
You can make your own class which inherits from XML::Simple and overrides
|
|
|
2847 |
certain behaviours. The following methods may provide useful 'hooks' upon
|
|
|
2848 |
which to hang your modified behaviour. You may find other undocumented methods
|
|
|
2849 |
by examining the source, but those may be subject to change in future releases.
|
|
|
2850 |
|
|
|
2851 |
=over 4
|
|
|
2852 |
|
|
|
2853 |
=item handle_options(direction, name => value ...)
|
|
|
2854 |
|
|
|
2855 |
This method will be called when one of the parsing methods or the C<XMLout()>
|
|
|
2856 |
method is called. The initial argument will be a string (either 'in' or 'out')
|
|
|
2857 |
and the remaining arguments will be name value pairs.
|
|
|
2858 |
|
|
|
2859 |
=item default_config_file()
|
|
|
2860 |
|
|
|
2861 |
Calculates and returns the name of the file which should be parsed if no
|
|
|
2862 |
filename is passed to C<XMLin()> (default: C<$0.xml>).
|
|
|
2863 |
|
|
|
2864 |
=item build_simple_tree(filename, string)
|
|
|
2865 |
|
|
|
2866 |
Called from C<XMLin()> or any of the parsing methods. Takes either a file name
|
|
|
2867 |
as the first argument or C<undef> followed by a 'string' as the second
|
|
|
2868 |
argument. Returns a simple tree data structure. You could override this
|
|
|
2869 |
method to apply your own transformations before the data structure is returned
|
|
|
2870 |
to the caller.
|
|
|
2871 |
|
|
|
2872 |
=item new_hashref()
|
|
|
2873 |
|
|
|
2874 |
When the 'simple tree' data structure is being built, this method will be
|
|
|
2875 |
called to create any required anonymous hashrefs.
|
|
|
2876 |
|
|
|
2877 |
=item sorted_keys(name, hashref)
|
|
|
2878 |
|
|
|
2879 |
Called when C<XMLout()> is translating a hashref to XML. This routine returns
|
|
|
2880 |
a list of hash keys in the order that the corresponding attributes/elements
|
|
|
2881 |
should appear in the output.
|
|
|
2882 |
|
|
|
2883 |
=item escape_value(string)
|
|
|
2884 |
|
|
|
2885 |
Called from C<XMLout()>, takes a string and returns a copy of the string with
|
|
|
2886 |
XML character escaping rules applied.
|
|
|
2887 |
|
|
|
2888 |
=item numeric_escape(string)
|
|
|
2889 |
|
|
|
2890 |
Called from C<escape_value()>, to handle non-ASCII characters (depending on the
|
|
|
2891 |
value of the NumericEscape option).
|
|
|
2892 |
|
|
|
2893 |
=item copy_hash(hashref, extra_key => value, ...)
|
|
|
2894 |
|
|
|
2895 |
Called from C<XMLout()>, when 'unfolding' a hash of hashes into an array of
|
|
|
2896 |
hashes. You might wish to override this method if you're using tied hashes and
|
|
|
2897 |
don't want them to get untied.
|
|
|
2898 |
|
|
|
2899 |
=back
|
|
|
2900 |
|
|
|
2901 |
=head2 Cache Methods
|
|
|
2902 |
|
|
|
2903 |
XML::Simple implements three caching schemes ('storable', 'memshare' and
|
|
|
2904 |
'memcopy'). You can implement a custom caching scheme by implementing
|
|
|
2905 |
two methods - one for reading from the cache and one for writing to it.
|
|
|
2906 |
|
|
|
2907 |
For example, you might implement a new 'dbm' scheme that stores cached data
|
|
|
2908 |
structures using the L<MLDBM> module. First, you would add a
|
|
|
2909 |
C<cache_read_dbm()> method which accepted a filename for use as a lookup key
|
|
|
2910 |
and returned a data structure on success, or undef on failure. Then, you would
|
|
|
2911 |
implement a C<cache_read_dbm()> method which accepted a data structure and a
|
|
|
2912 |
filename.
|
|
|
2913 |
|
|
|
2914 |
You would use this caching scheme by specifying the option:
|
|
|
2915 |
|
|
|
2916 |
Cache => [ 'dbm' ]
|
|
|
2917 |
|
|
|
2918 |
=head1 STRICT MODE
|
|
|
2919 |
|
|
|
2920 |
If you import the B<XML::Simple> routines like this:
|
|
|
2921 |
|
|
|
2922 |
use XML::Simple qw(:strict);
|
|
|
2923 |
|
|
|
2924 |
the following common mistakes will be detected and treated as fatal errors
|
|
|
2925 |
|
|
|
2926 |
=over 4
|
|
|
2927 |
|
|
|
2928 |
=item *
|
|
|
2929 |
|
|
|
2930 |
Failing to explicitly set the C<KeyAttr> option - if you can't be bothered
|
|
|
2931 |
reading about this option, turn it off with: KeyAttr => [ ]
|
|
|
2932 |
|
|
|
2933 |
=item *
|
|
|
2934 |
|
|
|
2935 |
Failing to explicitly set the C<ForceArray> option - if you can't be bothered
|
|
|
2936 |
reading about this option, set it to the safest mode with: ForceArray => 1
|
|
|
2937 |
|
|
|
2938 |
=item *
|
|
|
2939 |
|
|
|
2940 |
Setting ForceArray to an array, but failing to list all the elements from the
|
|
|
2941 |
KeyAttr hash.
|
|
|
2942 |
|
|
|
2943 |
=item *
|
|
|
2944 |
|
|
|
2945 |
Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains
|
|
|
2946 |
one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested
|
|
|
2947 |
element). Note: if strict mode is not set but -w is, this condition triggers a
|
|
|
2948 |
warning.
|
|
|
2949 |
|
|
|
2950 |
=item *
|
|
|
2951 |
|
|
|
2952 |
Data error - as above, but non-unique values are present in the key attribute
|
|
|
2953 |
(eg: more than one E<lt>partE<gt> element with the same partnum). This will
|
|
|
2954 |
also trigger a warning if strict mode is not enabled.
|
|
|
2955 |
|
|
|
2956 |
=item *
|
|
|
2957 |
|
|
|
2958 |
Data error - as above, but value of key attribute (eg: partnum) is not a
|
|
|
2959 |
scalar string (due to nested elements etc). This will also trigger a warning
|
|
|
2960 |
if strict mode is not enabled.
|
|
|
2961 |
|
|
|
2962 |
=back
|
|
|
2963 |
|
|
|
2964 |
=head1 SAX SUPPORT
|
|
|
2965 |
|
|
|
2966 |
From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API
|
|
|
2967 |
for XML) - specifically SAX2.
|
|
|
2968 |
|
|
|
2969 |
In a typical SAX application, an XML parser (or SAX 'driver') module generates
|
|
|
2970 |
SAX events (start of element, character data, end of element, etc) as it parses
|
|
|
2971 |
an XML document and a 'handler' module processes the events to extract the
|
|
|
2972 |
required data. This simple model allows for some interesting and powerful
|
|
|
2973 |
possibilities:
|
|
|
2974 |
|
|
|
2975 |
=over 4
|
|
|
2976 |
|
|
|
2977 |
=item *
|
|
|
2978 |
|
|
|
2979 |
Applications written to the SAX API can extract data from huge XML documents
|
|
|
2980 |
without the memory overheads of a DOM or tree API.
|
|
|
2981 |
|
|
|
2982 |
=item *
|
|
|
2983 |
|
|
|
2984 |
The SAX API allows for plug and play interchange of parser modules without
|
|
|
2985 |
having to change your code to fit a new module's API. A number of SAX parsers
|
|
|
2986 |
are available with capabilities ranging from extreme portability to blazing
|
|
|
2987 |
performance.
|
|
|
2988 |
|
|
|
2989 |
=item *
|
|
|
2990 |
|
|
|
2991 |
A SAX 'filter' module can implement both a handler interface for receiving
|
|
|
2992 |
data and a generator interface for passing modified data on to a downstream
|
|
|
2993 |
handler. Filters can be chained together in 'pipelines'.
|
|
|
2994 |
|
|
|
2995 |
=item *
|
|
|
2996 |
|
|
|
2997 |
One filter module might split a data stream to direct data to two or more
|
|
|
2998 |
downstream handlers.
|
|
|
2999 |
|
|
|
3000 |
=item *
|
|
|
3001 |
|
|
|
3002 |
Generating SAX events is not the exclusive preserve of XML parsing modules.
|
|
|
3003 |
For example, a module might extract data from a relational database using DBI
|
|
|
3004 |
and pass it on to a SAX pipeline for filtering and formatting.
|
|
|
3005 |
|
|
|
3006 |
=back
|
|
|
3007 |
|
|
|
3008 |
B<XML::Simple> can operate at either end of a SAX pipeline. For example,
|
|
|
3009 |
you can take a data structure in the form of a hashref and pass it into a
|
|
|
3010 |
SAX pipeline using the 'Handler' option on C<XMLout()>:
|
|
|
3011 |
|
|
|
3012 |
use XML::Simple;
|
|
|
3013 |
use Some::SAX::Filter;
|
|
|
3014 |
use XML::SAX::Writer;
|
|
|
3015 |
|
|
|
3016 |
my $ref = {
|
|
|
3017 |
.... # your data here
|
|
|
3018 |
};
|
|
|
3019 |
|
|
|
3020 |
my $writer = XML::SAX::Writer->new();
|
|
|
3021 |
my $filter = Some::SAX::Filter->new(Handler => $writer);
|
|
|
3022 |
my $simple = XML::Simple->new(Handler => $filter);
|
|
|
3023 |
$simple->XMLout($ref);
|
|
|
3024 |
|
|
|
3025 |
You can also put B<XML::Simple> at the opposite end of the pipeline to take
|
|
|
3026 |
advantage of the simple 'tree' data structure once the relevant data has been
|
|
|
3027 |
isolated through filtering:
|
|
|
3028 |
|
|
|
3029 |
use XML::SAX;
|
|
|
3030 |
use Some::SAX::Filter;
|
|
|
3031 |
use XML::Simple;
|
|
|
3032 |
|
|
|
3033 |
my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']);
|
|
|
3034 |
my $filter = Some::SAX::Filter->new(Handler => $simple);
|
|
|
3035 |
my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
|
|
|
3036 |
|
|
|
3037 |
my $ref = $parser->parse_uri('some_huge_file.xml');
|
|
|
3038 |
|
|
|
3039 |
print $ref->{part}->{'555-1234'};
|
|
|
3040 |
|
|
|
3041 |
You can build a filter by using an XML::Simple object as a handler and setting
|
|
|
3042 |
its DataHandler option to point to a routine which takes the resulting tree,
|
|
|
3043 |
modifies it and sends it off as SAX events to a downstream handler:
|
|
|
3044 |
|
|
|
3045 |
my $writer = XML::SAX::Writer->new();
|
|
|
3046 |
my $filter = XML::Simple->new(
|
|
|
3047 |
DataHandler => sub {
|
|
|
3048 |
my $simple = shift;
|
|
|
3049 |
my $data = shift;
|
|
|
3050 |
|
|
|
3051 |
# Modify $data here
|
|
|
3052 |
|
|
|
3053 |
$simple->XMLout($data, Handler => $writer);
|
|
|
3054 |
}
|
|
|
3055 |
);
|
|
|
3056 |
my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
|
|
|
3057 |
|
|
|
3058 |
$parser->parse_uri($filename);
|
|
|
3059 |
|
|
|
3060 |
I<Note: In this last example, the 'Handler' option was specified in the call to
|
|
|
3061 |
C<XMLout()> but it could also have been specified in the constructor>.
|
|
|
3062 |
|
|
|
3063 |
=head1 ENVIRONMENT
|
|
|
3064 |
|
|
|
3065 |
If you don't care which parser module B<XML::Simple> uses then skip this
|
|
|
3066 |
section entirely (it looks more complicated than it really is).
|
|
|
3067 |
|
|
|
3068 |
B<XML::Simple> will default to using a B<SAX> parser if one is available or
|
|
|
3069 |
B<XML::Parser> if SAX is not available.
|
|
|
3070 |
|
|
|
3071 |
You can dictate which parser module is used by setting either the environment
|
|
|
3072 |
variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable
|
|
|
3073 |
$XML::Simple::PREFERRED_PARSER to contain the module name. The following rules
|
|
|
3074 |
are used:
|
|
|
3075 |
|
|
|
3076 |
=over 4
|
|
|
3077 |
|
|
|
3078 |
=item *
|
|
|
3079 |
|
|
|
3080 |
The package variable takes precedence over the environment variable if both are defined. To force B<XML::Simple> to ignore the environment settings and use
|
|
|
3081 |
its default rules, you can set the package variable to an empty string.
|
|
|
3082 |
|
|
|
3083 |
=item *
|
|
|
3084 |
|
|
|
3085 |
If the 'preferred parser' is set to the string 'XML::Parser', then
|
|
|
3086 |
L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not
|
|
|
3087 |
installed).
|
|
|
3088 |
|
|
|
3089 |
=item *
|
|
|
3090 |
|
|
|
3091 |
If the 'preferred parser' is set to some other value, then it is assumed to be
|
|
|
3092 |
the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory.>
|
|
|
3093 |
If L<XML::SAX> is not installed, or the requested parser module is not
|
|
|
3094 |
installed, then C<XMLin()> will die.
|
|
|
3095 |
|
|
|
3096 |
=item *
|
|
|
3097 |
|
|
|
3098 |
If the 'preferred parser' is not defined at all (the normal default
|
|
|
3099 |
state), an attempt will be made to load L<XML::SAX>. If L<XML::SAX> is
|
|
|
3100 |
installed, then a parser module will be selected according to
|
|
|
3101 |
L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX
|
|
|
3102 |
parser installed).
|
|
|
3103 |
|
|
|
3104 |
=item *
|
|
|
3105 |
|
|
|
3106 |
if the 'preferred parser' is not defined and B<XML::SAX> is not
|
|
|
3107 |
installed, then B<XML::Parser> will be used. C<XMLin()> will die if
|
|
|
3108 |
L<XML::Parser> is not installed.
|
|
|
3109 |
|
|
|
3110 |
=back
|
|
|
3111 |
|
|
|
3112 |
Note: The B<XML::SAX> distribution includes an XML parser written entirely in
|
|
|
3113 |
Perl. It is very portable but it is not very fast. You should consider
|
|
|
3114 |
installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your
|
|
|
3115 |
platform.
|
|
|
3116 |
|
|
|
3117 |
=head1 ERROR HANDLING
|
|
|
3118 |
|
|
|
3119 |
The XML standard is very clear on the issue of non-compliant documents. An
|
|
|
3120 |
error in parsing any single element (for example a missing end tag) must cause
|
|
|
3121 |
the whole document to be rejected. B<XML::Simple> will die with an appropriate
|
|
|
3122 |
message if it encounters a parsing error.
|
|
|
3123 |
|
|
|
3124 |
If dying is not appropriate for your application, you should arrange to call
|
|
|
3125 |
C<XMLin()> in an eval block and look for errors in $@. eg:
|
|
|
3126 |
|
|
|
3127 |
my $config = eval { XMLin() };
|
|
|
3128 |
PopUpMessage($@) if($@);
|
|
|
3129 |
|
|
|
3130 |
Note, there is a common misconception that use of B<eval> will significantly
|
|
|
3131 |
slow down a script. While that may be true when the code being eval'd is in a
|
|
|
3132 |
string, it is not true of code like the sample above.
|
|
|
3133 |
|
|
|
3134 |
=head1 EXAMPLES
|
|
|
3135 |
|
|
|
3136 |
When C<XMLin()> reads the following very simple piece of XML:
|
|
|
3137 |
|
|
|
3138 |
<opt username="testuser" password="frodo"></opt>
|
|
|
3139 |
|
|
|
3140 |
it returns the following data structure:
|
|
|
3141 |
|
|
|
3142 |
{
|
|
|
3143 |
'username' => 'testuser',
|
|
|
3144 |
'password' => 'frodo'
|
|
|
3145 |
}
|
|
|
3146 |
|
|
|
3147 |
The identical result could have been produced with this alternative XML:
|
|
|
3148 |
|
|
|
3149 |
<opt username="testuser" password="frodo" />
|
|
|
3150 |
|
|
|
3151 |
Or this (although see 'ForceArray' option for variations):
|
|
|
3152 |
|
|
|
3153 |
<opt>
|
|
|
3154 |
<username>testuser</username>
|
|
|
3155 |
<password>frodo</password>
|
|
|
3156 |
</opt>
|
|
|
3157 |
|
|
|
3158 |
Repeated nested elements are represented as anonymous arrays:
|
|
|
3159 |
|
|
|
3160 |
<opt>
|
|
|
3161 |
<person firstname="Joe" lastname="Smith">
|
|
|
3162 |
<email>joe@smith.com</email>
|
|
|
3163 |
<email>jsmith@yahoo.com</email>
|
|
|
3164 |
</person>
|
|
|
3165 |
<person firstname="Bob" lastname="Smith">
|
|
|
3166 |
<email>bob@smith.com</email>
|
|
|
3167 |
</person>
|
|
|
3168 |
</opt>
|
|
|
3169 |
|
|
|
3170 |
{
|
|
|
3171 |
'person' => [
|
|
|
3172 |
{
|
|
|
3173 |
'email' => [
|
|
|
3174 |
'joe@smith.com',
|
|
|
3175 |
'jsmith@yahoo.com'
|
|
|
3176 |
],
|
|
|
3177 |
'firstname' => 'Joe',
|
|
|
3178 |
'lastname' => 'Smith'
|
|
|
3179 |
},
|
|
|
3180 |
{
|
|
|
3181 |
'email' => 'bob@smith.com',
|
|
|
3182 |
'firstname' => 'Bob',
|
|
|
3183 |
'lastname' => 'Smith'
|
|
|
3184 |
}
|
|
|
3185 |
]
|
|
|
3186 |
}
|
|
|
3187 |
|
|
|
3188 |
Nested elements with a recognised key attribute are transformed (folded) from
|
|
|
3189 |
an array into a hash keyed on the value of that attribute (see the C<KeyAttr>
|
|
|
3190 |
option):
|
|
|
3191 |
|
|
|
3192 |
<opt>
|
|
|
3193 |
<person key="jsmith" firstname="Joe" lastname="Smith" />
|
|
|
3194 |
<person key="tsmith" firstname="Tom" lastname="Smith" />
|
|
|
3195 |
<person key="jbloggs" firstname="Joe" lastname="Bloggs" />
|
|
|
3196 |
</opt>
|
|
|
3197 |
|
|
|
3198 |
{
|
|
|
3199 |
'person' => {
|
|
|
3200 |
'jbloggs' => {
|
|
|
3201 |
'firstname' => 'Joe',
|
|
|
3202 |
'lastname' => 'Bloggs'
|
|
|
3203 |
},
|
|
|
3204 |
'tsmith' => {
|
|
|
3205 |
'firstname' => 'Tom',
|
|
|
3206 |
'lastname' => 'Smith'
|
|
|
3207 |
},
|
|
|
3208 |
'jsmith' => {
|
|
|
3209 |
'firstname' => 'Joe',
|
|
|
3210 |
'lastname' => 'Smith'
|
|
|
3211 |
}
|
|
|
3212 |
}
|
|
|
3213 |
}
|
|
|
3214 |
|
|
|
3215 |
|
|
|
3216 |
The <anon> tag can be used to form anonymous arrays:
|
|
|
3217 |
|
|
|
3218 |
<opt>
|
|
|
3219 |
<head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head>
|
|
|
3220 |
<data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data>
|
|
|
3221 |
<data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data>
|
|
|
3222 |
<data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data>
|
|
|
3223 |
</opt>
|
|
|
3224 |
|
|
|
3225 |
{
|
|
|
3226 |
'head' => [
|
|
|
3227 |
[ 'Col 1', 'Col 2', 'Col 3' ]
|
|
|
3228 |
],
|
|
|
3229 |
'data' => [
|
|
|
3230 |
[ 'R1C1', 'R1C2', 'R1C3' ],
|
|
|
3231 |
[ 'R2C1', 'R2C2', 'R2C3' ],
|
|
|
3232 |
[ 'R3C1', 'R3C2', 'R3C3' ]
|
|
|
3233 |
]
|
|
|
3234 |
}
|
|
|
3235 |
|
|
|
3236 |
Anonymous arrays can be nested to arbirtrary levels and as a special case, if
|
|
|
3237 |
the surrounding tags for an XML document contain only an anonymous array the
|
|
|
3238 |
arrayref will be returned directly rather than the usual hashref:
|
|
|
3239 |
|
|
|
3240 |
<opt>
|
|
|
3241 |
<anon><anon>Col 1</anon><anon>Col 2</anon></anon>
|
|
|
3242 |
<anon><anon>R1C1</anon><anon>R1C2</anon></anon>
|
|
|
3243 |
<anon><anon>R2C1</anon><anon>R2C2</anon></anon>
|
|
|
3244 |
</opt>
|
|
|
3245 |
|
|
|
3246 |
[
|
|
|
3247 |
[ 'Col 1', 'Col 2' ],
|
|
|
3248 |
[ 'R1C1', 'R1C2' ],
|
|
|
3249 |
[ 'R2C1', 'R2C2' ]
|
|
|
3250 |
]
|
|
|
3251 |
|
|
|
3252 |
Elements which only contain text content will simply be represented as a
|
|
|
3253 |
scalar. Where an element has both attributes and text content, the element
|
|
|
3254 |
will be represented as a hashref with the text content in the 'content' key
|
|
|
3255 |
(see the C<ContentKey> option):
|
|
|
3256 |
|
|
|
3257 |
<opt>
|
|
|
3258 |
<one>first</one>
|
|
|
3259 |
<two attr="value">second</two>
|
|
|
3260 |
</opt>
|
|
|
3261 |
|
|
|
3262 |
{
|
|
|
3263 |
'one' => 'first',
|
|
|
3264 |
'two' => { 'attr' => 'value', 'content' => 'second' }
|
|
|
3265 |
}
|
|
|
3266 |
|
|
|
3267 |
Mixed content (elements which contain both text content and nested elements)
|
|
|
3268 |
will be not be represented in a useful way - element order and significant
|
|
|
3269 |
whitespace will be lost. If you need to work with mixed content, then
|
|
|
3270 |
XML::Simple is not the right tool for your job - check out the next section.
|
|
|
3271 |
|
|
|
3272 |
=head1 WHERE TO FROM HERE?
|
|
|
3273 |
|
|
|
3274 |
B<XML::Simple> is able to present a simple API because it makes some
|
|
|
3275 |
assumptions on your behalf. These include:
|
|
|
3276 |
|
|
|
3277 |
=over 4
|
|
|
3278 |
|
|
|
3279 |
=item *
|
|
|
3280 |
|
|
|
3281 |
You're not interested in text content consisting only of whitespace
|
|
|
3282 |
|
|
|
3283 |
=item *
|
|
|
3284 |
|
|
|
3285 |
You don't mind that when things get slurped into a hash the order is lost
|
|
|
3286 |
|
|
|
3287 |
=item *
|
|
|
3288 |
|
|
|
3289 |
You don't want fine-grained control of the formatting of generated XML
|
|
|
3290 |
|
|
|
3291 |
=item *
|
|
|
3292 |
|
|
|
3293 |
You would never use a hash key that was not a legal XML element name
|
|
|
3294 |
|
|
|
3295 |
=item *
|
|
|
3296 |
|
|
|
3297 |
You don't need help converting between different encodings
|
|
|
3298 |
|
|
|
3299 |
=back
|
|
|
3300 |
|
|
|
3301 |
In a serious XML project, you'll probably outgrow these assumptions fairly
|
|
|
3302 |
quickly. This section of the document used to offer some advice on chosing a
|
|
|
3303 |
more powerful option. That advice has now grown into the 'Perl-XML FAQ'
|
|
|
3304 |
document which you can find at: L<http://perl-xml.sourceforge.net/faq/>
|
|
|
3305 |
|
|
|
3306 |
The advice in the FAQ boils down to a quick explanation of tree versus
|
|
|
3307 |
event based parsers and then recommends:
|
|
|
3308 |
|
|
|
3309 |
For event based parsing, use SAX (do not set out to write any new code for
|
|
|
3310 |
XML::Parser's handler API - it is obselete).
|
|
|
3311 |
|
|
|
3312 |
For tree-based parsing, you could choose between the 'Perlish' approach of
|
|
|
3313 |
L<XML::Twig> and more standards based DOM implementations - preferably one with
|
|
|
3314 |
XPath support such as L<XML::LibXML>.
|
|
|
3315 |
|
|
|
3316 |
|
|
|
3317 |
=head1 SEE ALSO
|
|
|
3318 |
|
|
|
3319 |
B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>.
|
|
|
3320 |
|
|
|
3321 |
To generate documents with namespaces, L<XML::NamespaceSupport> is required.
|
|
|
3322 |
|
|
|
3323 |
The optional caching functions require L<Storable>.
|
|
|
3324 |
|
|
|
3325 |
Answers to Frequently Asked Questions about XML::Simple are bundled with this
|
|
|
3326 |
distribution as: L<XML::Simple::FAQ>
|
|
|
3327 |
|
|
|
3328 |
=head1 COPYRIGHT
|
|
|
3329 |
|
|
|
3330 |
Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt>
|
|
|
3331 |
|
|
|
3332 |
This library is free software; you can redistribute it and/or modify it
|
|
|
3333 |
under the same terms as Perl itself.
|
|
|
3334 |
|
|
|
3335 |
=cut
|
|
|
3336 |
|
|
|
3337 |
|