Subversion Repositories DevTools

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
311 dpurdie 1
#############################################################################
2
# Pod/Checker.pm -- check pod documents for syntax errors
3
#
4
# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved.
5
# This file is part of "PodParser". PodParser is free software;
6
# you can redistribute it and/or modify it under the same terms
7
# as Perl itself.
8
#############################################################################
9
 
10
package Pod::Checker;
11
use strict;
12
 
13
use vars qw($VERSION @ISA @EXPORT %VALID_COMMANDS %VALID_SEQUENCES);
14
$VERSION = '1.45';  ## Current version of this package
15
require  5.005;    ## requires this Perl version or later
16
 
17
use Pod::ParseUtils; ## for hyperlinks and lists
18
 
19
=head1 NAME
20
 
21
Pod::Checker, podchecker() - check pod documents for syntax errors
22
 
23
=head1 SYNOPSIS
24
 
25
  use Pod::Checker;
26
 
27
  $syntax_okay = podchecker($filepath, $outputpath, %options);
28
 
29
  my $checker = new Pod::Checker %options;
30
  $checker->parse_from_file($filepath, \*STDERR);
31
 
32
=head1 OPTIONS/ARGUMENTS
33
 
34
C<$filepath> is the input POD to read and C<$outputpath> is
35
where to write POD syntax error messages. Either argument may be a scalar
36
indicating a file-path, or else a reference to an open filehandle.
37
If unspecified, the input-file it defaults to C<\*STDIN>, and
38
the output-file defaults to C<\*STDERR>.
39
 
40
=head2 podchecker()
41
 
42
This function can take a hash of options:
43
 
44
=over 4
45
 
46
=item B<-warnings> =E<gt> I<val>
47
 
48
Turn warnings on/off. I<val> is usually 1 for on, but higher values
49
trigger additional warnings. See L<"Warnings">.
50
 
51
=back
52
 
53
=head1 DESCRIPTION
54
 
55
B<podchecker> will perform syntax checking of Perl5 POD format documentation.
56
 
57
Curious/ambitious users are welcome to propose additional features they wish
58
to see in B<Pod::Checker> and B<podchecker> and verify that the checks are
59
consistent with L<perlpod>.
60
 
61
The following checks are currently performed:
62
 
63
=over 4
64
 
65
=item *
66
 
67
Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
68
and unterminated interior sequences.
69
 
70
=item *
71
 
72
Check for proper balancing of C<=begin> and C<=end>. The contents of such
73
a block are generally ignored, i.e. no syntax checks are performed.
74
 
75
=item *
76
 
77
Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
78
 
79
=item *
80
 
81
Check for same nested interior-sequences (e.g.
82
C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
83
 
84
=item *
85
 
86
Check for malformed or non-existing entities C<EE<lt>...E<gt>>.
87
 
88
=item *
89
 
90
Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
91
for details.
92
 
93
=item *
94
 
95
Check for unresolved document-internal links. This check may also reveal
96
misspelled links that seem to be internal links but should be links
97
to something else.
98
 
99
=back
100
 
101
=head1 DIAGNOSTICS
102
 
103
=head2 Errors
104
 
105
=over 4
106
 
107
=item * empty =headn
108
 
109
A heading (C<=head1> or C<=head2>) without any text? That ain't no
110
heading!
111
 
112
=item * =over on line I<N> without closing =back
113
 
114
The C<=over> command does not have a corresponding C<=back> before the
115
next heading (C<=head1> or C<=head2>) or the end of the file.
116
 
117
=item * =item without previous =over
118
 
119
=item * =back without previous =over
120
 
121
An C<=item> or C<=back> command has been found outside a
122
C<=over>/C<=back> block.
123
 
124
=item * No argument for =begin
125
 
126
A C<=begin> command was found that is not followed by the formatter
127
specification.
128
 
129
=item * =end without =begin
130
 
131
A standalone C<=end> command was found.
132
 
133
=item * Nested =begin's
134
 
135
There were at least two consecutive C<=begin> commands without
136
the corresponding C<=end>. Only one C<=begin> may be active at
137
a time.
138
 
139
=item * =for without formatter specification
140
 
141
There is no specification of the formatter after the C<=for> command.
142
 
143
=item * unresolved internal link I<NAME>
144
 
145
The given link to I<NAME> does not have a matching node in the current
146
POD. This also happened when a single word node name is not enclosed in
147
C<"">.
148
 
149
=item * Unknown command "I<CMD>"
150
 
151
An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
152
C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>,
153
C<=for>, C<=pod>, C<=cut>
154
 
155
=item * Unknown interior-sequence "I<SEQ>"
156
 
157
An invalid markup command has been encountered. Valid are:
158
C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
159
C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
160
C<ZE<lt>E<gt>>
161
 
162
=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
163
 
164
Two nested identical markup commands have been found. Generally this
165
does not make sense.
166
 
167
=item * garbled entity I<STRING>
168
 
169
The I<STRING> found cannot be interpreted as a character entity.
170
 
171
=item * Entity number out of range
172
 
173
An entity specified by number (dec, hex, oct) is out of range (1-255).
174
 
175
=item * malformed link LE<lt>E<gt>
176
 
177
The link found cannot be parsed because it does not conform to the
178
syntax described in L<perlpod>.
179
 
180
=item * nonempty ZE<lt>E<gt>
181
 
182
The C<ZE<lt>E<gt>> sequence is supposed to be empty.
183
 
184
=item * empty XE<lt>E<gt>
185
 
186
The index entry specified contains nothing but whitespace.
187
 
188
=item * Spurious text after =pod / =cut
189
 
190
The commands C<=pod> and C<=cut> do not take any arguments.
191
 
192
=item * Spurious character(s) after =back
193
 
194
The C<=back> command does not take any arguments.
195
 
196
=back
197
 
198
=head2 Warnings
199
 
200
These may not necessarily cause trouble, but indicate mediocre style.
201
 
202
=over 4
203
 
204
=item * multiple occurrence of link target I<name>
205
 
206
The POD file has some C<=item> and/or C<=head> commands that have
207
the same text. Potential hyperlinks to such a text cannot be unique then.
208
This warning is printed only with warning level greater than one.
209
 
210
=item * line containing nothing but whitespace in paragraph
211
 
212
There is some whitespace on a seemingly empty line. POD is very sensitive
213
to such things, so this is flagged. B<vi> users switch on the B<list>
214
option to avoid this problem.
215
 
216
=begin _disabled_
217
 
218
=item * file does not start with =head
219
 
220
The file starts with a different POD directive than head.
221
This is most probably something you do not want.
222
 
223
=end _disabled_
224
 
225
=item * previous =item has no contents
226
 
227
There is a list C<=item> right above the flagged line that has no
228
text contents. You probably want to delete empty items.
229
 
230
=item * preceding non-item paragraph(s)
231
 
232
A list introduced by C<=over> starts with a text or verbatim paragraph,
233
but continues with C<=item>s. Move the non-item paragraph out of the
234
C<=over>/C<=back> block.
235
 
236
=item * =item type mismatch (I<one> vs. I<two>)
237
 
238
A list started with e.g. a bullet-like C<=item> and continued with a
239
numbered one. This is obviously inconsistent. For most translators the
240
type of the I<first> C<=item> determines the type of the list.
241
 
242
=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
243
 
244
Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
245
can potentially cause errors as they could be misinterpreted as
246
markup commands. This is only printed when the -warnings level is
247
greater than 1.
248
 
249
=item * Unknown entity
250
 
251
A character entity was found that does not belong to the standard
252
ISO set or the POD specials C<verbar> and C<sol>.
253
 
254
=item * No items in =over
255
 
256
The list opened with C<=over> does not contain any items.
257
 
258
=item * No argument for =item
259
 
260
C<=item> without any parameters is deprecated. It should either be followed
261
by C<*> to indicate an unordered list, by a number (optionally followed
262
by a dot) to indicate an ordered (numbered) list or simple text for a
263
definition list.
264
 
265
=item * empty section in previous paragraph
266
 
267
The previous section (introduced by a C<=head> command) does not contain
268
any text. This usually indicates that something is missing. Note: A
269
C<=head1> followed immediately by C<=head2> does not trigger this warning.
270
 
271
=item * Verbatim paragraph in NAME section
272
 
273
The NAME section (C<=head1 NAME>) should consist of a single paragraph
274
with the script/module name, followed by a dash `-' and a very short
275
description of what the thing is good for.
276
 
277
=item * =headI<n> without preceding higher level
278
 
279
For example if there is a C<=head2> in the POD file prior to a
280
C<=head1>.
281
 
282
=back
283
 
284
=head2 Hyperlinks
285
 
286
There are some warnings with respect to malformed hyperlinks:
287
 
288
=over 4
289
 
290
=item * ignoring leading/trailing whitespace in link
291
 
292
There is whitespace at the beginning or the end of the contents of
293
LE<lt>...E<gt>.
294
 
295
=item * (section) in '$page' deprecated
296
 
297
There is a section detected in the page name of LE<lt>...E<gt>, e.g.
298
C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only.
299
Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able
300
to expand this to appropriate code. For links to (builtin) functions,
301
please say C<LE<lt>perlfunc/mkdirE<gt>>, without ().
302
 
303
=item * alternative text/node '%s' contains non-escaped | or /
304
 
305
The characters C<|> and C</> are special in the LE<lt>...E<gt> context.
306
Although the hyperlink parser does its best to determine which "/" is
307
text and which is a delimiter in case of doubt, one ought to escape
308
these literal characters like this:
309
 
310
  /     E<sol>
311
  |     E<verbar>
312
 
313
=back
314
 
315
=head1 RETURN VALUE
316
 
317
B<podchecker> returns the number of POD syntax errors found or -1 if
318
there were no POD commands at all found in the file.
319
 
320
=head1 EXAMPLES
321
 
322
See L</SYNOPSIS>
323
 
324
=head1 INTERFACE
325
 
326
While checking, this module collects document properties, e.g. the nodes
327
for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
328
POD translators can use this feature to syntax-check and get the nodes in
329
a first pass before actually starting to convert. This is expensive in terms
330
of execution time, but allows for very robust conversions.
331
 
332
Since PodParser-1.24 the B<Pod::Checker> module uses only the B<poderror>
333
method to print errors and warnings. The summary output (e.g.
334
"Pod syntax OK") has been dropped from the module and has been included in
335
B<podchecker> (the script). This allows users of B<Pod::Checker> to
336
control completely the output behavior. Users of B<podchecker> (the script)
337
get the well-known behavior.
338
 
339
=cut
340
 
341
#############################################################################
342
 
343
#use diagnostics;
344
use Carp qw(croak);
345
use Exporter;
346
use Pod::Parser;
347
 
348
@ISA = qw(Pod::Parser);
349
@EXPORT = qw(&podchecker);
350
 
351
my %VALID_COMMANDS = (
352
    'pod'    =>  1,
353
    'cut'    =>  1,
354
    'head1'  =>  1,
355
    'head2'  =>  1,
356
    'head3'  =>  1,
357
    'head4'  =>  1,
358
    'over'   =>  1,
359
    'back'   =>  1,
360
    'item'   =>  1,
361
    'for'    =>  1,
362
    'begin'  =>  1,
363
    'end'    =>  1,
364
    'encoding' =>  1,
365
);
366
 
367
my %VALID_SEQUENCES = (
368
    'I'  =>  1,
369
    'B'  =>  1,
370
    'S'  =>  1,
371
    'C'  =>  1,
372
    'L'  =>  1,
373
    'F'  =>  1,
374
    'X'  =>  1,
375
    'Z'  =>  1,
376
    'E'  =>  1,
377
);
378
 
379
# stolen from HTML::Entities
380
my %ENTITIES = (
381
 # Some normal chars that have special meaning in SGML context
382
 amp    => '&',  # ampersand
383
'gt'    => '>',  # greater than
384
'lt'    => '<',  # less than
385
 quot   => '"',  # double quote
386
 
387
 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
388
 AElig  => 'Æ',  # capital AE diphthong (ligature)
389
 Aacute => 'Á',  # capital A, acute accent
390
 Acirc  => 'Â',  # capital A, circumflex accent
391
 Agrave => 'À',  # capital A, grave accent
392
 Aring  => 'Å',  # capital A, ring
393
 Atilde => 'Ã',  # capital A, tilde
394
 Auml   => 'Ä',  # capital A, dieresis or umlaut mark
395
 Ccedil => 'Ç',  # capital C, cedilla
396
 ETH    => 'Ð',  # capital Eth, Icelandic
397
 Eacute => 'É',  # capital E, acute accent
398
 Ecirc  => 'Ê',  # capital E, circumflex accent
399
 Egrave => 'È',  # capital E, grave accent
400
 Euml   => 'Ë',  # capital E, dieresis or umlaut mark
401
 Iacute => 'Í',  # capital I, acute accent
402
 Icirc  => 'Î',  # capital I, circumflex accent
403
 Igrave => 'Ì',  # capital I, grave accent
404
 Iuml   => 'Ï',  # capital I, dieresis or umlaut mark
405
 Ntilde => 'Ñ',  # capital N, tilde
406
 Oacute => 'Ó',  # capital O, acute accent
407
 Ocirc  => 'Ô',  # capital O, circumflex accent
408
 Ograve => 'Ò',  # capital O, grave accent
409
 Oslash => 'Ø',  # capital O, slash
410
 Otilde => 'Õ',  # capital O, tilde
411
 Ouml   => 'Ö',  # capital O, dieresis or umlaut mark
412
 THORN  => 'Þ',  # capital THORN, Icelandic
413
 Uacute => 'Ú',  # capital U, acute accent
414
 Ucirc  => 'Û',  # capital U, circumflex accent
415
 Ugrave => 'Ù',  # capital U, grave accent
416
 Uuml   => 'Ü',  # capital U, dieresis or umlaut mark
417
 Yacute => 'Ý',  # capital Y, acute accent
418
 aacute => 'á',  # small a, acute accent
419
 acirc  => 'â',  # small a, circumflex accent
420
 aelig  => 'æ',  # small ae diphthong (ligature)
421
 agrave => 'à',  # small a, grave accent
422
 aring  => 'å',  # small a, ring
423
 atilde => 'ã',  # small a, tilde
424
 auml   => 'ä',  # small a, dieresis or umlaut mark
425
 ccedil => 'ç',  # small c, cedilla
426
 eacute => 'é',  # small e, acute accent
427
 ecirc  => 'ê',  # small e, circumflex accent
428
 egrave => 'è',  # small e, grave accent
429
 eth    => 'ð',  # small eth, Icelandic
430
 euml   => 'ë',  # small e, dieresis or umlaut mark
431
 iacute => 'í',  # small i, acute accent
432
 icirc  => 'î',  # small i, circumflex accent
433
 igrave => 'ì',  # small i, grave accent
434
 iuml   => 'ï',  # small i, dieresis or umlaut mark
435
 ntilde => 'ñ',  # small n, tilde
436
 oacute => 'ó',  # small o, acute accent
437
 ocirc  => 'ô',  # small o, circumflex accent
438
 ograve => 'ò',  # small o, grave accent
439
 oslash => 'ø',  # small o, slash
440
 otilde => 'õ',  # small o, tilde
441
 ouml   => 'ö',  # small o, dieresis or umlaut mark
442
 szlig  => 'ß',  # small sharp s, German (sz ligature)
443
 thorn  => 'þ',  # small thorn, Icelandic
444
 uacute => 'ú',  # small u, acute accent
445
 ucirc  => 'û',  # small u, circumflex accent
446
 ugrave => 'ù',  # small u, grave accent
447
 uuml   => 'ü',  # small u, dieresis or umlaut mark
448
 yacute => 'ý',  # small y, acute accent
449
 yuml   => 'ÿ',  # small y, dieresis or umlaut mark
450
 
451
 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
452
 copy   => '©',  # copyright sign
453
 reg    => '®',  # registered sign
454
 nbsp   => "\240", # non breaking space
455
 
456
 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
457
 iexcl  => '¡',
458
 cent   => '¢',
459
 pound  => '£',
460
 curren => '¤',
461
 yen    => '¥',
462
 brvbar => '¦',
463
 sect   => '§',
464
 uml    => '¨',
465
 ordf   => 'ª',
466
 laquo  => '«',
467
'not'   => '¬',    # not is a keyword in perl
468
 shy    => '­',
469
 macr   => '¯',
470
 deg    => '°',
471
 plusmn => '±',
472
 sup1   => '¹',
473
 sup2   => '²',
474
 sup3   => '³',
475
 acute  => '´',
476
 micro  => 'µ',
477
 para   => '¶',
478
 middot => '·',
479
 cedil  => '¸',
480
 ordm   => 'º',
481
 raquo  => '»',
482
 frac14 => '¼',
483
 frac12 => '½',
484
 frac34 => '¾',
485
 iquest => '¿',
486
'times' => '×',    # times is a keyword in perl
487
 divide => '÷',
488
 
489
# some POD special entities
490
 verbar => '|',
491
 sol => '/'
492
);
493
 
494
##---------------------------------------------------------------------------
495
 
496
##---------------------------------
497
## Function definitions begin here
498
##---------------------------------
499
 
500
sub podchecker {
501
    my ($infile, $outfile, %options) = @_;
502
    local $_;
503
 
504
    ## Set defaults
505
    $infile  ||= \*STDIN;
506
    $outfile ||= \*STDERR;
507
 
508
    ## Now create a pod checker
509
    my $checker = new Pod::Checker(%options);
510
 
511
    ## Now check the pod document for errors
512
    $checker->parse_from_file($infile, $outfile);
513
 
514
    ## Return the number of errors found
515
    return $checker->num_errors();
516
}
517
 
518
##---------------------------------------------------------------------------
519
 
520
##-------------------------------
521
## Method definitions begin here
522
##-------------------------------
523
 
524
##################################
525
 
526
=over 4
527
 
528
=item C<Pod::Checker-E<gt>new( %options )>
529
 
530
Return a reference to a new Pod::Checker object that inherits from
531
Pod::Parser and is used for calling the required methods later. The
532
following options are recognized:
533
 
534
C<-warnings =E<gt> num>
535
  Print warnings if C<num> is true. The higher the value of C<num>,
536
the more warnings are printed. Currently there are only levels 1 and 2.
537
 
538
C<-quiet =E<gt> num>
539
  If C<num> is true, do not print any errors/warnings. This is useful
540
when Pod::Checker is used to munge POD code into plain text from within
541
POD formatters.
542
 
543
=cut
544
 
545
## sub new {
546
##     my $this = shift;
547
##     my $class = ref($this) || $this;
548
##     my %params = @_;
549
##     my $self = {%params};
550
##     bless $self, $class;
551
##     $self->initialize();
552
##     return $self;
553
## }
554
 
555
sub initialize {
556
    my $self = shift;
557
    ## Initialize number of errors, and setup an error function to
558
    ## increment this number and then print to the designated output.
559
    $self->{_NUM_ERRORS} = 0;
560
    $self->{_NUM_WARNINGS} = 0;
561
    $self->{-quiet} ||= 0;
562
    # set the error handling subroutine
563
    $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror');
564
    $self->{_commands} = 0; # total number of POD commands encountered
565
    $self->{_list_stack} = []; # stack for nested lists
566
    $self->{_have_begin} = ''; # stores =begin
567
    $self->{_links} = []; # stack for internal hyperlinks
568
    $self->{_nodes} = []; # stack for =head/=item nodes
569
    $self->{_index} = []; # text in X<>
570
    # print warnings?
571
    $self->{-warnings} = 1 unless(defined $self->{-warnings});
572
    $self->{_current_head1} = ''; # the current =head1 block
573
    $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings});
574
}
575
 
576
##################################
577
 
578
=item C<$checker-E<gt>poderror( @args )>
579
 
580
=item C<$checker-E<gt>poderror( {%opts}, @args )>
581
 
582
Internal method for printing errors and warnings. If no options are
583
given, simply prints "@_". The following options are recognized and used
584
to form the output:
585
 
586
  -msg
587
 
588
A message to print prior to C<@args>.
589
 
590
  -line
591
 
592
The line number the error occurred in.
593
 
594
  -file
595
 
596
The file (name) the error occurred in.
597
 
598
  -severity
599
 
600
The error level, should be 'WARNING' or 'ERROR'.
601
 
602
=cut
603
 
604
# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
605
sub poderror {
606
    my $self = shift;
607
    my %opts = (ref $_[0]) ? %{shift()} : ();
608
 
609
    ## Retrieve options
610
    chomp( my $msg  = ($opts{-msg} || '')."@_" );
611
    my $line = (exists $opts{-line}) ? " at line $opts{-line}" : '';
612
    my $file = (exists $opts{-file}) ? " in file $opts{-file}" : '';
613
    unless (exists $opts{-severity}) {
614
       ## See if can find severity in message prefix
615
       $opts{-severity} = $1  if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
616
    }
617
    my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : '';
618
 
619
    ## Increment error count and print message "
620
    ++($self->{_NUM_ERRORS})
621
        if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
622
    ++($self->{_NUM_WARNINGS})
623
        if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING'));
624
    unless($self->{-quiet}) {
625
      my $out_fh = $self->output_handle() || \*STDERR;
626
      print $out_fh ($severity, $msg, $line, $file, "\n")
627
        if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
628
    }
629
}
630
 
631
##################################
632
 
633
=item C<$checker-E<gt>num_errors()>
634
 
635
Set (if argument specified) and retrieve the number of errors found.
636
 
637
=cut
638
 
639
sub num_errors {
640
   return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
641
}
642
 
643
##################################
644
 
645
=item C<$checker-E<gt>num_warnings()>
646
 
647
Set (if argument specified) and retrieve the number of warnings found.
648
 
649
=cut
650
 
651
sub num_warnings {
652
   return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS};
653
}
654
 
655
##################################
656
 
657
=item C<$checker-E<gt>name()>
658
 
659
Set (if argument specified) and retrieve the canonical name of POD as
660
found in the C<=head1 NAME> section.
661
 
662
=cut
663
 
664
sub name {
665
    return (@_ > 1 && $_[1]) ?
666
        ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
667
}
668
 
669
##################################
670
 
671
=item C<$checker-E<gt>node()>
672
 
673
Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
674
and C<=item>) of the current POD. The nodes are returned in the order of
675
their occurrence. They consist of plain text, each piece of whitespace is
676
collapsed to a single blank.
677
 
678
=cut
679
 
680
sub node {
681
    my ($self,$text) = @_;
682
    if(defined $text) {
683
        $text =~ s/\s+$//s; # strip trailing whitespace
684
        $text =~ s/\s+/ /gs; # collapse whitespace
685
        # add node, order important!
686
        push(@{$self->{_nodes}}, $text);
687
        # keep also a uniqueness counter
688
        $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
689
        return $text;
690
    }
691
    @{$self->{_nodes}};
692
}
693
 
694
##################################
695
 
696
=item C<$checker-E<gt>idx()>
697
 
698
Add (if argument specified) and retrieve the index entries (as defined by
699
C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
700
of whitespace is collapsed to a single blank.
701
 
702
=cut
703
 
704
# set/return index entries of current POD
705
sub idx {
706
    my ($self,$text) = @_;
707
    if(defined $text) {
708
        $text =~ s/\s+$//s; # strip trailing whitespace
709
        $text =~ s/\s+/ /gs; # collapse whitespace
710
        # add node, order important!
711
        push(@{$self->{_index}}, $text);
712
        # keep also a uniqueness counter
713
        $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
714
        return $text;
715
    }
716
    @{$self->{_index}};
717
}
718
 
719
##################################
720
 
721
=item C<$checker-E<gt>hyperlink()>
722
 
723
Add (if argument specified) and retrieve the hyperlinks (as defined by
724
C<LE<lt>E<gt>>) of the current POD. They consist of a 2-item array: line
725
number and C<Pod::Hyperlink> object.
726
 
727
=back
728
 
729
=cut
730
 
731
# set/return hyperlinks of the current POD
732
sub hyperlink {
733
    my $self = shift;
734
    if($_[0]) {
735
        push(@{$self->{_links}}, $_[0]);
736
        return $_[0];
737
    }
738
    @{$self->{_links}};
739
}
740
 
741
## overrides for Pod::Parser
742
 
743
sub end_pod {
744
    ## Do some final checks and
745
    ## print the number of errors found
746
    my $self   = shift;
747
    my $infile = $self->input_file();
748
 
749
    if(@{$self->{_list_stack}}) {
750
        my $list;
751
        while(($list = $self->_close_list('EOF',$infile)) &&
752
          $list->indent() ne 'auto') {
753
            $self->poderror({ -line => 'EOF', -file => $infile,
754
                -severity => 'ERROR', -msg => '=over on line ' .
755
                $list->start() . ' without closing =back' });
756
        }
757
    }
758
 
759
    # check validity of document internal hyperlinks
760
    # first build the node names from the paragraph text
761
    my %nodes;
762
    foreach($self->node()) {
763
        $nodes{$_} = 1;
764
        if(/^(\S+)\s+\S/) {
765
            # we have more than one word. Use the first as a node, too.
766
            # This is used heavily in perlfunc.pod
767
            $nodes{$1} ||= 2; # derived node
768
        }
769
    }
770
    foreach($self->idx()) {
771
        $nodes{$_} = 3; # index node
772
    }
773
    foreach($self->hyperlink()) {
774
        my ($line,$link) = @$_;
775
        # _TODO_ what if there is a link to the page itself by the name,
776
        # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
777
        if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
778
            my $node = $self->_check_ptree($self->parse_text($link->node(),
779
                $line), $line, $infile, 'L');
780
            if($node && !$nodes{$node}) {
781
                $self->poderror({ -line => $line || '', -file => $infile,
782
                    -severity => 'ERROR',
783
                    -msg => "unresolved internal link '$node'"});
784
            }
785
        }
786
    }
787
 
788
    # check the internal nodes for uniqueness. This pertains to
789
    # =headX, =item and X<...>
790
    if($self->{-warnings} && $self->{-warnings}>1) {
791
      foreach(grep($self->{_unique_nodes}->{$_} > 1,
792
        keys %{$self->{_unique_nodes}})) {
793
          $self->poderror({ -line => '-', -file => $infile,
794
            -severity => 'WARNING',
795
            -msg => "multiple occurrence of link target '$_'"});
796
      }
797
    }
798
 
799
    # no POD found here
800
    $self->num_errors(-1) if($self->{_commands} == 0);
801
}
802
 
803
# check a POD command directive
804
sub command {
805
    my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
806
    my ($file, $line) = $pod_para->file_line;
807
    ## Check the command syntax
808
    my $arg; # this will hold the command argument
809
    if (! $VALID_COMMANDS{$cmd}) {
810
       $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
811
                         -msg => "Unknown command '$cmd'" });
812
    }
813
    else { # found a valid command
814
        $self->{_commands}++; # delete this line if below is enabled again
815
 
816
        ##### following check disabled due to strong request
817
        #if(!$self->{_commands}++ && $cmd !~ /^head/) {
818
        #    $self->poderror({ -line => $line, -file => $file,
819
        #         -severity => 'WARNING',
820
        #         -msg => "file does not start with =head" });
821
        #}
822
 
823
        # check syntax of particular command
824
        if($cmd eq 'over') {
825
            # check for argument
826
            $arg = $self->interpolate_and_check($paragraph, $line,$file);
827
            my $indent = 4; # default
828
            if($arg && $arg =~ /^\s*(\d+)\s*$/) {
829
                $indent = $1;
830
            }
831
            # start a new list
832
            $self->_open_list($indent,$line,$file);
833
        }
834
        elsif($cmd eq 'item') {
835
            # are we in a list?
836
            unless(@{$self->{_list_stack}}) {
837
                $self->poderror({ -line => $line, -file => $file,
838
                     -severity => 'ERROR',
839
                     -msg => '=item without previous =over' });
840
                # auto-open in case we encounter many more
841
                $self->_open_list('auto',$line,$file);
842
            }
843
            my $list = $self->{_list_stack}->[0];
844
            # check whether the previous item had some contents
845
            if(defined $self->{_list_item_contents} &&
846
              $self->{_list_item_contents} == 0) {
847
                $self->poderror({ -line => $line, -file => $file,
848
                     -severity => 'WARNING',
849
                     -msg => 'previous =item has no contents' });
850
            }
851
            if($list->{_has_par}) {
852
                $self->poderror({ -line => $line, -file => $file,
853
                     -severity => 'WARNING',
854
                     -msg => 'preceding non-item paragraph(s)' });
855
                delete $list->{_has_par};
856
            }
857
            # check for argument
858
            $arg = $self->interpolate_and_check($paragraph, $line, $file);
859
            if($arg && $arg =~ /(\S+)/) {
860
                $arg =~ s/[\s\n]+$//;
861
                my $type;
862
                if($arg =~ /^[*]\s*(\S*.*)/) {
863
                  $type = 'bullet';
864
                  $self->{_list_item_contents} = $1 ? 1 : 0;
865
                  $arg = $1;
866
                }
867
                elsif($arg =~ /^\d+\.?\s+(\S*)/) {
868
                  $type = 'number';
869
                  $self->{_list_item_contents} = $1 ? 1 : 0;
870
                  $arg = $1;
871
                }
872
                else {
873
                  $type = 'definition';
874
                  $self->{_list_item_contents} = 1;
875
                }
876
                my $first = $list->type();
877
                if($first && $first ne $type) {
878
                    $self->poderror({ -line => $line, -file => $file,
879
                       -severity => 'WARNING',
880
                       -msg => "=item type mismatch ('$first' vs. '$type')"});
881
                }
882
                else { # first item
883
                    $list->type($type);
884
                }
885
            }
886
            else {
887
                $self->poderror({ -line => $line, -file => $file,
888
                     -severity => 'WARNING',
889
                     -msg => 'No argument for =item' });
890
                $arg = ' '; # empty
891
                $self->{_list_item_contents} = 0;
892
            }
893
            # add this item
894
            $list->item($arg);
895
            # remember this node
896
            $self->node($arg);
897
        }
898
        elsif($cmd eq 'back') {
899
            # check if we have an open list
900
            unless(@{$self->{_list_stack}}) {
901
                $self->poderror({ -line => $line, -file => $file,
902
                         -severity => 'ERROR',
903
                         -msg => '=back without previous =over' });
904
            }
905
            else {
906
                # check for spurious characters
907
                $arg = $self->interpolate_and_check($paragraph, $line,$file);
908
                if($arg && $arg =~ /\S/) {
909
                    $self->poderror({ -line => $line, -file => $file,
910
                         -severity => 'ERROR',
911
                         -msg => 'Spurious character(s) after =back' });
912
                }
913
                # close list
914
                my $list = $self->_close_list($line,$file);
915
                # check for empty lists
916
                if(!$list->item() && $self->{-warnings}) {
917
                    $self->poderror({ -line => $line, -file => $file,
918
                         -severity => 'WARNING',
919
                         -msg => 'No items in =over (at line ' .
920
                         $list->start() . ') / =back list'});
921
                }
922
            }
923
        }
924
        elsif($cmd =~ /^head(\d+)/) {
925
            my $hnum = $1;
926
            $self->{"_have_head_$hnum"}++; # count head types
927
            if($hnum > 1 && !$self->{'_have_head_'.($hnum -1)}) {
928
              $self->poderror({ -line => $line, -file => $file,
929
                   -severity => 'WARNING',
930
                   -msg => "=head$hnum without preceding higher level"});
931
            }
932
            # check whether the previous =head section had some contents
933
            if(defined $self->{_commands_in_head} &&
934
              $self->{_commands_in_head} == 0 &&
935
              defined $self->{_last_head} &&
936
              $self->{_last_head} >= $hnum) {
937
                $self->poderror({ -line => $line, -file => $file,
938
                     -severity => 'WARNING',
939
                     -msg => 'empty section in previous paragraph'});
940
            }
941
            $self->{_commands_in_head} = -1;
942
            $self->{_last_head} = $hnum;
943
            # check if there is an open list
944
            if(@{$self->{_list_stack}}) {
945
                my $list;
946
                while(($list = $self->_close_list($line,$file)) &&
947
                  $list->indent() ne 'auto') {
948
                    $self->poderror({ -line => $line, -file => $file,
949
                         -severity => 'ERROR',
950
                         -msg => '=over on line '. $list->start() .
951
                         " without closing =back (at $cmd)" });
952
                }
953
            }
954
            # remember this node
955
            $arg = $self->interpolate_and_check($paragraph, $line,$file);
956
            $arg =~ s/[\s\n]+$//s;
957
            $self->node($arg);
958
            unless(length($arg)) {
959
                $self->poderror({ -line => $line, -file => $file,
960
                     -severity => 'ERROR',
961
                     -msg => "empty =$cmd"});
962
            }
963
            if($cmd eq 'head1') {
964
                $self->{_current_head1} = $arg;
965
            } else {
966
                $self->{_current_head1} = '';
967
            }
968
        }
969
        elsif($cmd eq 'begin') {
970
            if($self->{_have_begin}) {
971
                # already have a begin
972
                $self->poderror({ -line => $line, -file => $file,
973
                     -severity => 'ERROR',
974
                     -msg => q{Nested =begin's (first at line } .
975
                     $self->{_have_begin} . ')'});
976
            }
977
            else {
978
                # check for argument
979
                $arg = $self->interpolate_and_check($paragraph, $line,$file);
980
                unless($arg && $arg =~ /(\S+)/) {
981
                    $self->poderror({ -line => $line, -file => $file,
982
                         -severity => 'ERROR',
983
                         -msg => 'No argument for =begin'});
984
                }
985
                # remember the =begin
986
                $self->{_have_begin} = "$line:$1";
987
            }
988
        }
989
        elsif($cmd eq 'end') {
990
            if($self->{_have_begin}) {
991
                # close the existing =begin
992
                $self->{_have_begin} = '';
993
                # check for spurious characters
994
                $arg = $self->interpolate_and_check($paragraph, $line,$file);
995
                # the closing argument is optional
996
                #if($arg && $arg =~ /\S/) {
997
                #    $self->poderror({ -line => $line, -file => $file,
998
                #         -severity => 'WARNING',
999
                #         -msg => "Spurious character(s) after =end" });
1000
                #}
1001
            }
1002
            else {
1003
                # don't have a matching =begin
1004
                $self->poderror({ -line => $line, -file => $file,
1005
                     -severity => 'ERROR',
1006
                     -msg => '=end without =begin' });
1007
            }
1008
        }
1009
        elsif($cmd eq 'for') {
1010
            unless($paragraph =~ /\s*(\S+)\s*/) {
1011
                $self->poderror({ -line => $line, -file => $file,
1012
                     -severity => 'ERROR',
1013
                     -msg => '=for without formatter specification' });
1014
            }
1015
            $arg = ''; # do not expand paragraph below
1016
        }
1017
        elsif($cmd =~ /^(pod|cut)$/) {
1018
            # check for argument
1019
            $arg = $self->interpolate_and_check($paragraph, $line,$file);
1020
            if($arg && $arg =~ /(\S+)/) {
1021
                $self->poderror({ -line => $line, -file => $file,
1022
                      -severity => 'ERROR',
1023
                      -msg => "Spurious text after =$cmd"});
1024
            }
1025
        }
1026
    $self->{_commands_in_head}++;
1027
    ## Check the interior sequences in the command-text
1028
    $self->interpolate_and_check($paragraph, $line,$file)
1029
        unless(defined $arg);
1030
    }
1031
}
1032
 
1033
sub _open_list
1034
{
1035
    my ($self,$indent,$line,$file) = @_;
1036
    my $list = Pod::List->new(
1037
           -indent => $indent,
1038
           -start => $line,
1039
           -file => $file);
1040
    unshift(@{$self->{_list_stack}}, $list);
1041
    undef $self->{_list_item_contents};
1042
    $list;
1043
}
1044
 
1045
sub _close_list
1046
{
1047
    my ($self,$line,$file) = @_;
1048
    my $list = shift(@{$self->{_list_stack}});
1049
    if(defined $self->{_list_item_contents} &&
1050
      $self->{_list_item_contents} == 0) {
1051
        $self->poderror({ -line => $line, -file => $file,
1052
            -severity => 'WARNING',
1053
            -msg => 'previous =item has no contents' });
1054
    }
1055
    undef $self->{_list_item_contents};
1056
    $list;
1057
}
1058
 
1059
# process a block of some text
1060
sub interpolate_and_check {
1061
    my ($self, $paragraph, $line, $file) = @_;
1062
    ## Check the interior sequences in the command-text
1063
    # and return the text
1064
    $self->_check_ptree(
1065
        $self->parse_text($paragraph,$line), $line, $file, '');
1066
}
1067
 
1068
sub _check_ptree {
1069
    my ($self,$ptree,$line,$file,$nestlist) = @_;
1070
    local($_);
1071
    my $text = '';
1072
    # process each node in the parse tree
1073
    foreach(@$ptree) {
1074
        # regular text chunk
1075
        unless(ref) {
1076
            # count the unescaped angle brackets
1077
            # complain only when warning level is greater than 1
1078
            if($self->{-warnings} && $self->{-warnings}>1) {
1079
              my $count;
1080
              if($count = tr/<>/<>/) {
1081
                $self->poderror({ -line => $line, -file => $file,
1082
                     -severity => 'WARNING',
1083
                     -msg => "$count unescaped <> in paragraph" });
1084
                }
1085
            }
1086
            $text .= $_;
1087
            next;
1088
        }
1089
        # have an interior sequence
1090
        my $cmd = $_->cmd_name();
1091
        my $contents = $_->parse_tree();
1092
        ($file,$line) = $_->file_line();
1093
        # check for valid tag
1094
        if (! $VALID_SEQUENCES{$cmd}) {
1095
            $self->poderror({ -line => $line, -file => $file,
1096
                 -severity => 'ERROR',
1097
                 -msg => qq(Unknown interior-sequence '$cmd')});
1098
            # expand it anyway
1099
            $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1100
            next;
1101
        }
1102
        if($nestlist =~ /$cmd/) {
1103
            $self->poderror({ -line => $line, -file => $file,
1104
                 -severity => 'WARNING',
1105
                 -msg => "nested commands $cmd<...$cmd<...>...>"});
1106
            # _TODO_ should we add the contents anyway?
1107
            # expand it anyway, see below
1108
        }
1109
        if($cmd eq 'E') {
1110
            # preserve entities
1111
            if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1112
                $self->poderror({ -line => $line, -file => $file,
1113
                    -severity => 'ERROR',
1114
                    -msg => 'garbled entity ' . $_->raw_text()});
1115
                next;
1116
            }
1117
            my $ent = $$contents[0];
1118
            my $val;
1119
            if($ent =~ /^0x[0-9a-f]+$/i) {
1120
                # hexadec entity
1121
                $val = hex($ent);
1122
            }
1123
            elsif($ent =~ /^0\d+$/) {
1124
                # octal
1125
                $val = oct($ent);
1126
            }
1127
            elsif($ent =~ /^\d+$/) {
1128
                # numeric entity
1129
                $val = $ent;
1130
            }
1131
            if(defined $val) {
1132
                if($val>0 && $val<256) {
1133
                    $text .= chr($val);
1134
                }
1135
                else {
1136
                    $self->poderror({ -line => $line, -file => $file,
1137
                        -severity => 'ERROR',
1138
                        -msg => 'Entity number out of range ' . $_->raw_text()});
1139
                }
1140
            }
1141
            elsif($ENTITIES{$ent}) {
1142
                # known ISO entity
1143
                $text .= $ENTITIES{$ent};
1144
            }
1145
            else {
1146
                $self->poderror({ -line => $line, -file => $file,
1147
                    -severity => 'WARNING',
1148
                    -msg => 'Unknown entity ' . $_->raw_text()});
1149
                $text .= "E<$ent>";
1150
            }
1151
        }
1152
        elsif($cmd eq 'L') {
1153
            # try to parse the hyperlink
1154
            my $link = Pod::Hyperlink->new($contents->raw_text());
1155
            unless(defined $link) {
1156
                $self->poderror({ -line => $line, -file => $file,
1157
                    -severity => 'ERROR',
1158
                    -msg => 'malformed link ' . $_->raw_text() ." : $@"});
1159
                next;
1160
            }
1161
            $link->line($line); # remember line
1162
            if($self->{-warnings}) {
1163
                foreach my $w ($link->warning()) {
1164
                    $self->poderror({ -line => $line, -file => $file,
1165
                        -severity => 'WARNING',
1166
                        -msg => $w });
1167
                }
1168
            }
1169
            # check the link text
1170
            $text .= $self->_check_ptree($self->parse_text($link->text(),
1171
                $line), $line, $file, "$nestlist$cmd");
1172
            # remember link
1173
            $self->hyperlink([$line,$link]);
1174
        }
1175
        elsif($cmd =~ /[BCFIS]/) {
1176
            # add the guts
1177
            $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1178
        }
1179
        elsif($cmd eq 'Z') {
1180
            if(length($contents->raw_text())) {
1181
                $self->poderror({ -line => $line, -file => $file,
1182
                    -severity => 'ERROR',
1183
                    -msg => 'Nonempty Z<>'});
1184
            }
1185
        }
1186
        elsif($cmd eq 'X') {
1187
            my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1188
            if($idx =~ /^\s*$/s) {
1189
                $self->poderror({ -line => $line, -file => $file,
1190
                    -severity => 'ERROR',
1191
                    -msg => 'Empty X<>'});
1192
            }
1193
            else {
1194
                # remember this node
1195
                $self->idx($idx);
1196
            }
1197
        }
1198
        else {
1199
            # not reached
1200
            croak 'internal error';
1201
        }
1202
    }
1203
    $text;
1204
}
1205
 
1206
# process a block of verbatim text
1207
sub verbatim {
1208
    ## Nothing particular to check
1209
    my ($self, $paragraph, $line_num, $pod_para) = @_;
1210
 
1211
    $self->_preproc_par($paragraph);
1212
 
1213
    if($self->{_current_head1} eq 'NAME') {
1214
        my ($file, $line) = $pod_para->file_line;
1215
        $self->poderror({ -line => $line, -file => $file,
1216
            -severity => 'WARNING',
1217
            -msg => 'Verbatim paragraph in NAME section' });
1218
    }
1219
}
1220
 
1221
# process a block of regular text
1222
sub textblock {
1223
    my ($self, $paragraph, $line_num, $pod_para) = @_;
1224
    my ($file, $line) = $pod_para->file_line;
1225
 
1226
    $self->_preproc_par($paragraph);
1227
 
1228
    # skip this paragraph if in a =begin block
1229
    unless($self->{_have_begin}) {
1230
        my $block = $self->interpolate_and_check($paragraph, $line,$file);
1231
        if($self->{_current_head1} eq 'NAME') {
1232
            if($block =~ /^\s*(\S+?)\s*[,-]/) {
1233
                # this is the canonical name
1234
                $self->{-name} = $1 unless(defined $self->{-name});
1235
            }
1236
        }
1237
    }
1238
}
1239
 
1240
sub _preproc_par
1241
{
1242
    my $self = shift;
1243
    $_[0] =~ s/[\s\n]+$//;
1244
    if($_[0]) {
1245
        $self->{_commands_in_head}++;
1246
        $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1247
        if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1248
            $self->{_list_stack}->[0]->{_has_par} = 1;
1249
        }
1250
    }
1251
}
1252
 
1253
1;
1254
 
1255
__END__
1256
 
1257
=head1 AUTHOR
1258
 
1259
Please report bugs using L<http://rt.cpan.org>.
1260
 
1261
Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
1262
Marek Rouchal E<lt>marekr@cpan.orgE<gt>
1263
 
1264
Based on code for B<Pod::Text::pod2text()> written by
1265
Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
1266
 
1267
=cut
1268