Subversion Repositories DevTools

Rev

Rev 361 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
361 dpurdie 1
#
2
#   Pinched from ActiveState v5.8.8 , Binary build 822 [280952]
3
#   For the same reason as the other POD:: bits
4
#       Fix bugs
5
#       Ensure that they are fixed independently of the Perl Pelease
6
#
7
#   In this module
8
#       * Remove AvtiveState mailto:
9
#
10
 
11
package Pod::Html;
12
use strict;
13
require Exporter;
14
 
15
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
16
$VERSION = "1.0801";
17
@ISA = qw(Exporter);
18
@EXPORT = qw(pod2html htmlify);
19
@EXPORT_OK = qw(anchorify);
20
 
21
use Carp;
22
use Config;
23
use Cwd;
24
use File::Spec;
25
use File::Spec::Unix;
26
use Getopt::Long;
27
 
28
use locale;     # make \w work right in non-ASCII lands
29
 
30
=head1 NAME
31
 
32
Pod::Html - module to convert pod files to HTML
33
 
34
=head1 SYNOPSIS
35
 
36
    use Pod::Html;
37
    pod2html([options]);
38
 
39
=head1 DESCRIPTION
40
 
41
Converts files from pod format (see L<perlpod>) to HTML format.  It
42
can automatically generate indexes and cross-references, and it keeps
43
a cache of things it knows how to cross-reference.
44
 
45
=head1 FUNCTIONS
46
 
47
=head2 pod2html
48
 
49
    pod2html("pod2html",
50
             "--podpath=lib:ext:pod:vms",
51
             "--podroot=/usr/src/perl",
52
             "--htmlroot=/perl/nmanual",
53
             "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
54
             "--recurse",
55
             "--infile=foo.pod",
56
             "--outfile=/perl/nmanual/foo.html");
57
 
58
pod2html takes the following arguments:
59
 
60
=over 4
61
 
62
=item backlink
63
 
64
    --backlink="Back to Top"
65
 
66
Adds "Back to Top" links in front of every C<head1> heading (except for
67
the first).  By default, no backlinks are generated.
68
 
69
=item cachedir
70
 
71
    --cachedir=name
72
 
73
Creates the item and directory caches in the given directory.
74
 
75
=item css
76
 
77
    --css=stylesheet
78
 
79
Specify the URL of a cascading style sheet.  Also disables all HTML/CSS
80
C<style> attributes that are output by default (to avoid conflicts).
81
 
82
=item flush
83
 
84
    --flush
85
 
86
Flushes the item and directory caches.
87
 
88
=item header
89
 
90
    --header
91
    --noheader
92
 
93
Creates header and footer blocks containing the text of the C<NAME>
94
section.  By default, no headers are generated.
95
 
96
=item help
97
 
98
    --help
99
 
100
Displays the usage message.
101
 
102
=item hiddendirs
103
 
104
    --hiddendirs
105
    --nohiddendirs
106
 
107
Include hidden directories in the search for POD's in podpath if recurse
108
is set.
109
The default is not to traverse any directory whose name begins with C<.>.
110
See L</"podpath"> and L</"recurse">.
111
 
112
[This option is for backward compatibility only.
113
It's hard to imagine that one would usefully create a module with a
114
name component beginning with C<.>.]
115
 
116
=item htmldir
117
 
118
    --htmldir=name
119
 
120
Sets the directory in which the resulting HTML file is placed.  This
121
is used to generate relative links to other files. Not passing this
122
causes all links to be absolute, since this is the value that tells
123
Pod::Html the root of the documentation tree.
124
 
125
=item htmlroot
126
 
127
    --htmlroot=name
128
 
129
Sets the base URL for the HTML files.  When cross-references are made,
130
the HTML root is prepended to the URL.
131
 
132
=item index
133
 
134
    --index
135
    --noindex
136
 
137
Generate an index at the top of the HTML file.  This is the default
138
behaviour.
139
 
140
=item infile
141
 
142
    --infile=name
143
 
144
Specify the pod file to convert.  Input is taken from STDIN if no
145
infile is specified.
146
 
147
=item libpods
148
 
149
    --libpods=name:...:name
150
 
151
List of page names (eg, "perlfunc") which contain linkable C<=item>s.
152
 
153
=item netscape
154
 
155
    --netscape
156
    --nonetscape
157
 
158
B<Deprecated>, has no effect. For backwards compatibility only.
159
 
160
=item outfile
161
 
162
    --outfile=name
163
 
164
Specify the HTML file to create.  Output goes to STDOUT if no outfile
165
is specified.
166
 
167
=item podpath
168
 
169
    --podpath=name:...:name
170
 
171
Specify which subdirectories of the podroot contain pod files whose
172
HTML converted forms can be linked to in cross references.
173
 
174
=item podroot
175
 
176
    --podroot=name
177
 
178
Specify the base directory for finding library pods.
179
 
180
=item quiet
181
 
182
    --quiet
183
    --noquiet
184
 
185
Don't display I<mostly harmless> warning messages.  These messages
186
will be displayed by default.  But this is not the same as C<verbose>
187
mode.
188
 
189
=item recurse
190
 
191
    --recurse
192
    --norecurse
193
 
194
Recurse into subdirectories specified in podpath (default behaviour).
195
 
196
=item title
197
 
198
    --title=title
199
 
200
Specify the title of the resulting HTML file.
201
 
202
=item verbose
203
 
204
    --verbose
205
    --noverbose
206
 
207
Display progress messages.  By default, they won't be displayed.
208
 
209
=back
210
 
211
=head2 htmlify
212
 
213
    htmlify($heading);
214
 
215
Converts a pod section specification to a suitable section specification
216
for HTML. Note that we keep spaces and special characters except 
217
C<", ?> (Netscape problem) and the hyphen (writer's problem...).
218
 
219
=head2 anchorify
220
 
221
    anchorify(@heading);
222
 
223
Similar to C<htmlify()>, but turns non-alphanumerics into underscores.  Note
224
that C<anchorify()> is not exported by default.
225
 
226
=head1 ENVIRONMENT
227
 
228
Uses C<$Config{pod2html}> to setup default options.
229
 
230
=head1 AUTHOR
231
 
232
Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
233
 
234
=head1 SEE ALSO
235
 
236
L<perlpod>
237
 
238
=head1 COPYRIGHT
239
 
240
This program is distributed under the Artistic License.
241
 
242
=cut
243
 
244
my($Cachedir);
245
my($Dircache, $Itemcache, $Toccache);
246
my @Begin_Stack;
247
my @Libpods;
248
my($Htmlroot, $Htmldir, $Htmlfile, $Htmlfileurl);
249
my($Podfile, @Podpath, $Podroot);
250
my $Css;
251
 
252
my $Recurse;
253
my $Quiet;
254
my $HiddenDirs;
255
my $Verbose;
256
my $Doindex;
257
 
258
my $Backlink;
259
my($Listlevel, @Listend);
260
my $After_Lpar;
261
use vars qw($Ignore);  # need to localize it later.
262
 
263
my(%Items_Named, @Items_Seen);
264
my($Title, $Header);
265
 
266
my $Top;
267
my $Paragraph;
268
 
269
my %Sections;
270
 
271
# Caches
272
my %Pages = ();                 # associative array used to find the location
273
                                #   of pages referenced by L<> links.
274
my %Items = ();                 # associative array used to find the location
275
                                #   of =item directives referenced by C<> links
276
 
277
my %Toc = ();                   # Hash of Table of content metadata
278
 
279
my %Local_Items;
280
my $Is83;
281
 
282
my $Curdir = File::Spec->curdir;
283
my $set_p_class;
284
 
285
init_globals();
286
 
287
sub init_globals {
288
    $Cachedir = ".";            # The directory to which item and directory
289
                                # caches will be written.
290
 
291
    $Dircache = "pod2htmd.tmp";
292
    $Itemcache = "pod2htmi.tmp";
293
    $Toccache = "pod2htmlt.tmp";
294
 
295
    @Begin_Stack = ();          # begin/end stack
296
 
297
    @Libpods = ();              # files to search for links from C<> directives
298
    $Htmlroot = "/";            # http-server base directory from which all
299
                                #   relative paths in $podpath stem.
300
    $Htmldir = "";              # The directory to which the html pages
301
                                # will (eventually) be written.
302
    $Htmlfile = "";             # write to stdout by default
303
    $Htmlfileurl = "" ;         # The url that other files would use to
304
                                # refer to this file.  This is only used
305
                                # to make relative urls that point to
306
                                # other files.
307
 
308
    $Podfile = "";              # read from stdin by default
309
    @Podpath = ();              # list of directories containing library pods.
310
    $Podroot = $Curdir;         # filesystem base directory from which all
311
                                #   relative paths in $podpath stem.
312
    $Css = '';                  # Cascading style sheet
313
    $Recurse = 1;               # recurse on subdirectories in $podpath.
314
    $Quiet = 0;                 # not quiet by default
315
    $Verbose = 0;               # not verbose by default
316
    $Doindex = 1;               # non-zero if we should generate an index
317
    $Backlink = '';             # text for "back to top" links
318
    $Listlevel = 0;             # current list depth
319
    @Listend = ();              # the text to use to end the list.
320
    $After_Lpar = 0;            # set to true after a par in an =item
321
    $Ignore = 1;                # whether or not to format text.  we don't
322
                                #   format text until we hit our first pod
323
                                #   directive.
324
 
325
    @Items_Seen = ();           # for multiples of the same item in perlfunc
326
    %Items_Named = ();
327
    $Header = 0;                # produce block header/footer
328
    $Title = '';                # title to give the pod(s)
329
    $Top = 1;                   # true if we are at the top of the doc.  used
330
                                #   to prevent the first <hr /> directive.
331
    $Paragraph = '';            # which paragraph we're processing (used
332
                                #   for error messages)
333
    %Sections = ();             # sections within this page
334
 
335
    %Local_Items = ();
336
    $Is83 = $^O eq 'dos';       # Is it an 8.3 filesystem?
337
#    $set_p_class = '';
338
}
339
 
340
#
341
# clean_data: global clean-up of pod data
342
#
343
sub clean_data($){
344
    my( $dataref ) = @_;
345
    for my $i ( 0..$#{$dataref} ) {
346
 
347
        ${$dataref}[$i] =~ s/\s+\Z//;
348
        ${$dataref}[$i] =~ s/^\n+(.+)/$1/;
349
 
350
        # have a look for all-space lines
351
      if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){
352
            my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
353
            splice( @$dataref, $i, 1, @chunks );
354
        }
355
    }
356
}
357
 
358
 
359
sub pod2html {
360
    local(@ARGV) = @_;
361
    local($/);
362
    local $_;
363
 
364
    init_globals();
365
 
366
    $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
367
 
368
    # cache of %Pages and %Items from last time we ran pod2html
369
 
370
    #undef $opt_help if defined $opt_help;
371
 
372
    # parse the command-line parameters
373
    parse_command_line();
374
 
375
    # escape the backlink argument (same goes for title but is done later...)
376
    $Backlink = html_escape($Backlink) if defined $Backlink;
377
 
378
    # set some variables to their default values if necessary
379
    local *POD;
380
    unless (@ARGV && $ARGV[0]) {
381
        $Podfile  = "-" unless $Podfile;        # stdin
382
        open(POD, "<$Podfile")
383
                || die "$0: cannot open $Podfile file for input: $!\n";
384
    } else {
385
        $Podfile = $ARGV[0];  # XXX: might be more filenames
386
        *POD = *ARGV;
387
    }
388
    $Htmlfile = "-" unless $Htmlfile;   # stdout
389
    $Htmlroot = "" if $Htmlroot eq "/"; # so we don't get a //
390
    $Htmldir =~ s#/\z## ;               # so we don't get a //
391
    if (  $Htmlroot eq ''
392
       && defined( $Htmldir )
393
       && $Htmldir ne ''
394
       && substr( $Htmlfile, 0, length( $Htmldir ) ) eq $Htmldir
395
       )
396
    {
397
        # Set the 'base' url for this file, so that we can use it
398
        # as the location from which to calculate relative links
399
        # to other files. If this is '', then absolute links will
400
        # be used throughout.
401
        $Htmlfileurl= "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1);
402
    }
403
 
404
    # read the pod a paragraph at a time
405
    warn "Scanning for sections in input file(s)\n" if $Verbose;
406
    $/ = "";
407
    my @poddata  = <POD>;
408
    close(POD);
409
 
410
    # be eol agnostic
411
    for (@poddata) {
412
        if (/\r/) {
413
            if (/\r\n/) {
414
                @poddata = map { s/\r\n/\n/g;
415
                                 /\n\n/ ?
416
                                     map { "$_\n\n" } split /\n\n/ :
417
                                     $_ } @poddata;
418
            } else {
419
                @poddata = map { s/\r/\n/g;
420
                                 /\n\n/ ?
421
                                     map { "$_\n\n" } split /\n\n/ :
422
                                     $_ } @poddata;
423
            }
424
            last;
425
        }
426
    }
427
    clean_data( \@poddata );
428
 
429
 
430
    # scan the pod for =head[1-6] directives and build an index
431
    my $index = scan_headings(\%Sections, @poddata);
432
 
433
    unless($index) {
434
        warn "No headings in $Podfile\n" if $Verbose;
435
    }
436
 
437
    # open the output file
438
    open(HTML, ">$Htmlfile")
439
            || die "$0: cannot open $Htmlfile file for output: $!\n";
440
 
441
    # put a title in the HTML file if one wasn't specified
442
    if ($Title eq '') {
443
        TITLE_SEARCH: {
444
            for (my $i = 0; $i < @poddata; $i++) {
445
                if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
446
                    for my $para ( @poddata[$i, $i+1] ) {
447
                        last TITLE_SEARCH
448
                            if ($Title) = $para =~ /(\S+\s+-+.*\S)/s;
449
                    }
450
                }
451
 
452
            }
453
        }
454
    }
455
    if (!$Title and $Podfile =~ /\.pod\z/) {
456
        # probably a split pod so take first =head[12] as title
457
        for (my $i = 0; $i < @poddata; $i++) {
458
            last if ($Title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
459
        }
460
        warn "adopted '$Title' as title for $Podfile\n"
461
            if $Verbose and $Title;
462
    }
463
    if ($Title) {
464
        $Title =~ s/\s*\(.*\)//;
465
    } else {
466
        warn "$0: no title for $Podfile.\n" unless $Quiet;
467
        $Podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
468
        $Title = ($Podfile eq "-" ? 'No Title' : $1);
469
        warn "using $Title" if $Verbose;
470
    }
471
    $Title = html_escape($Title);
472
 
473
    my $csslink = '';
474
    my $bodystyle = ' style="background-color: white"';
475
    my $tdstyle = ' style="background-color: #cccccc"';
476
 
477
    if ($Css) {
478
      $csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />);
479
      $csslink =~ s,\\,/,g;
480
      $csslink =~ s,(/.):,$1|,;
481
      $bodystyle = '';
482
      $tdstyle = '';
483
    }
484
 
485
      my $block = $Header ? <<END_OF_BLOCK : '';
486
<table border="0" width="100%" cellspacing="0" cellpadding="3">
487
<tr><td class="block"$tdstyle valign="middle">
488
<big><strong><span class="block">&nbsp;$Title</span></strong></big>
489
</td></tr>
490
</table>
491
END_OF_BLOCK
492
 
493
    print HTML <<END_OF_HEAD;
494
<?xml version="1.0" ?>
495
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
496
<html xmlns="http://www.w3.org/1999/xhtml">
497
<head>
498
<title>$Title</title>$csslink
499
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
500
</head>
501
 
502
<body$bodystyle>
503
$block
504
END_OF_HEAD
505
 
506
    # load/reload/validate/cache %Pages and %Items
507
    get_cache($Dircache, $Itemcache, $Toccache, \@Podpath, $Podroot, $Recurse);
508
 
509
    # scan the pod for =item directives
510
    scan_items( \%Local_Items, "", @poddata);
511
 
512
    scan_toc( \%Toc, $Podfile, @poddata);
513
    save_toc($Podroot);
514
 
515
    # put an index at the top of the file.  note, if $Doindex is 0 we
516
    # still generate an index, but surround it with an html comment.
517
    # that way some other program can extract it if desired.
518
    $index =~ s/--+/-/g;
519
 
520
    my $hr = ($Doindex and $index) ? qq(<hr name="index" />) : "";
521
    unless ($Doindex)
522
    {
523
        $index = qq(<!--\n$index\n-->\n);
524
    }
525
 
526
    print HTML << "END_OF_INDEX";
527
 
528
<!-- INDEX BEGIN -->
529
<div name="index"  class="index">
530
<p><a name=\"__index__\"></a></p>
531
$index
532
$hr
533
</div>
534
<!-- INDEX END -->
535
 
536
END_OF_INDEX
537
 
538
    # now convert this file
539
    my $after_item;             # set to true after an =item
540
    my $need_dd = 0;
541
    warn "Converting input file $Podfile\n" if $Verbose;
542
    foreach my $i (0..$#poddata){
543
        $_ = $poddata[$i];
544
        $Paragraph = $i+1;
545
        if (/^(=.*)/s) {        # is it a pod directive?
546
            $Ignore = 0;
547
            $after_item = 0;
548
            $need_dd = 0;
549
            $_ = $1;
550
            if (/^=begin\s+(\S+)\s*(.*)/si) {   # =begin
551
                process_begin($1, $2);
552
            } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
553
                process_end($1, $2);
554
            } elsif (/^=cut/) {                 # =cut
555
                process_cut();
556
            } elsif (/^=pod/) {                 # =pod
557
                process_pod();
558
            } else {
559
                next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
560
 
561
                if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
562
                    process_head( $1, $2, $Doindex && $index );
563
                } elsif (/^=item\s*(.*\S)?/sm) {        # =item text
564
                    $need_dd = process_item( $1 );
565
                    $after_item = 1;
566
                } elsif (/^=over\s*(.*)/) {             # =over N
567
                    process_over();
568
                } elsif (/^=back/) {                    # =back
569
                    process_back($need_dd);
570
                } elsif (/^=for\s+(\S+)\s*(.*)/si) {    # =for
571
                    process_for($1,$2);
572
                } else {
573
                    /^=(\S*)\s*/;
574
                    warn "$0: $Podfile: unknown pod directive '$1' in "
575
                       . "paragraph $Paragraph.  ignoring.\n" unless $Quiet;
576
                }
577
            }
578
            $Top = 0;
579
        }
580
        else {
581
            next if $Ignore;
582
            next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
583
            print HTML and next if @Begin_Stack && $Begin_Stack[-1] eq 'html';
584
            print HTML "<dd>\n" if $need_dd;
585
            my $text = $_;
586
            if( $text =~ /\A\s+/ ){
587
                process_pre( \$text );
588
                print HTML "<pre>\n$text</pre>\n";
589
 
590
            } else {
591
                process_text( \$text );
592
 
593
                # experimental: check for a paragraph where all lines
594
                # have some ...\t...\t...\n pattern
595
                if( $text =~ /\t/ ){
596
                    my @lines = split( "\n", $text );
597
                    if( @lines > 1 ){
598
                        my $all = 2;
599
                        foreach my $line ( @lines ){
600
                            if( $line =~ /\S/ && $line !~ /\t/ ){
601
                                $all--;
602
                                last if $all == 0;
603
                            }
604
                        }
605
                        if( $all > 0 ){
606
                            $text =~ s/\t+/<td>/g;
607
                            $text =~ s/^/<tr><td>/gm;
608
                            $text = '<table cellspacing="0" cellpadding="0">' .
609
                                    $text . '</table>';
610
                        }
611
                    }
612
                }
613
                ## end of experimental
614
 
615
                if( $after_item ){
616
                    $After_Lpar = 1;
617
                }
618
 
619
        if ( $set_p_class )
620
        {
621
                print HTML "<p class=\"$set_p_class\">$text</p>\n";
622
            $set_p_class = 0;
623
        }
624
        else
625
        {
626
                    print HTML "<p>$text</p>\n";
627
        }
628
            }
629
            print HTML "</dd>\n" if $need_dd;
630
            $after_item = 0;
631
        }
632
    }
633
 
634
    # finish off any pending directives
635
    finish_list();
636
 
637
    # link to page index
638
    print HTML "<p><a href=\"#__index__\"><small>$Backlink</small></a></p>\n"
639
        if $Doindex and $index and $Backlink;
640
 
641
    print HTML <<END_OF_TAIL;
642
$block
643
</body>
644
 
645
</html>
646
END_OF_TAIL
647
 
648
    # close the html file
649
    close(HTML);
650
 
651
    warn "Finished\n" if $Verbose;
652
}
653
 
654
##############################################################################
655
 
656
sub usage {
657
    my $podfile = shift;
658
    warn "$0: $podfile: @_\n" if @_;
659
    die <<END_OF_USAGE;
660
Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
661
           --podpath=<name>:...:<name> --podroot=<name>
662
           --libpods=<name>:...:<name> --recurse --verbose --index
663
           --netscape --norecurse --noindex --cachedir=<name>
664
 
665
  --backlink     - set text for "back to top" links (default: none).
666
  --cachedir     - directory for the item and directory cache files.
667
  --css          - stylesheet URL
668
  --flush        - flushes the item and directory caches.
669
  --[no]header   - produce block header/footer (default is no headers).
670
  --help         - prints this message.
671
  --hiddendirs   - search hidden directories in podpath
672
  --htmldir      - directory for resulting HTML files.
673
  --htmlroot     - http-server base directory from which all relative paths
674
                   in podpath stem (default is /).
675
  --[no]index    - generate an index at the top of the resulting html
676
                   (default behaviour).
677
  --infile       - filename for the pod to convert (input taken from stdin
678
                   by default).
679
  --libpods      - colon-separated list of pages to search for =item pod
680
                   directives in as targets of C<> and implicit links (empty
681
                   by default).  note, these are not filenames, but rather
682
                   page names like those that appear in L<> links.
683
  --outfile      - filename for the resulting html file (output sent to
684
                   stdout by default).
685
  --podpath      - colon-separated list of directories containing library
686
                   pods (empty by default).
687
  --podroot      - filesystem base directory from which all relative paths
688
                   in podpath stem (default is .).
689
  --[no]quiet    - suppress some benign warning messages (default is off).
690
  --[no]recurse  - recurse on those subdirectories listed in podpath
691
                   (default behaviour).
692
  --title        - title that will appear in resulting html file.
693
  --[no]verbose  - self-explanatory (off by default).
694
  --[no]netscape - deprecated, has no effect. for backwards compatibility only.
695
 
696
END_OF_USAGE
697
 
698
}
699
 
700
sub parse_command_line {
701
    my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help,
702
        $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,
703
        $opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,
704
        $opt_recurse,$opt_title,$opt_verbose,$opt_hiddendirs);
705
 
706
    unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
707
    my $result = GetOptions(
708
                            'backlink=s' => \$opt_backlink,
709
                            'cachedir=s' => \$opt_cachedir,
710
                            'css=s'      => \$opt_css,
711
                            'flush'      => \$opt_flush,
712
                            'header!'    => \$opt_header,
713
                            'help'       => \$opt_help,
714
                            'hiddendirs!'=> \$opt_hiddendirs,
715
                            'htmldir=s'  => \$opt_htmldir,
716
                            'htmlroot=s' => \$opt_htmlroot,
717
                            'index!'     => \$opt_index,
718
                            'infile=s'   => \$opt_infile,
719
                            'libpods=s'  => \$opt_libpods,
720
                            'netscape!'  => \$opt_netscape,
721
                            'outfile=s'  => \$opt_outfile,
722
                            'podpath=s'  => \$opt_podpath,
723
                            'podroot=s'  => \$opt_podroot,
724
                            'quiet!'     => \$opt_quiet,
725
                            'recurse!'   => \$opt_recurse,
726
                            'title=s'    => \$opt_title,
727
                            'verbose!'   => \$opt_verbose,
728
                           );
729
    usage("-", "invalid parameters") if not $result;
730
 
731
    usage("-") if defined $opt_help;    # see if the user asked for help
732
    $opt_help = "";                     # just to make -w shut-up.
733
 
734
    @Podpath  = map { s/\|/:/g; $_ } split(":", $opt_podpath) if defined $opt_podpath;
735
    @Libpods  = split(":", $opt_libpods) if defined $opt_libpods;
736
 
737
    $Backlink = $opt_backlink if defined $opt_backlink;
738
    $Cachedir = $opt_cachedir if defined $opt_cachedir;
739
    $Css      = $opt_css      if defined $opt_css;
740
    $Header   = $opt_header   if defined $opt_header;
741
    $Htmldir  = $opt_htmldir  if defined $opt_htmldir;
742
    $Htmlroot = $opt_htmlroot if defined $opt_htmlroot;
743
    $Doindex  = $opt_index    if defined $opt_index;
744
    $Podfile  = $opt_infile   if defined $opt_infile;
745
    $HiddenDirs = $opt_hiddendirs if defined $opt_hiddendirs;
746
    $Htmlfile = $opt_outfile  if defined $opt_outfile;
747
    $Podroot  = $opt_podroot  if defined $opt_podroot;
748
    $Quiet    = $opt_quiet    if defined $opt_quiet;
749
    $Recurse  = $opt_recurse  if defined $opt_recurse;
750
    $Title    = $opt_title    if defined $opt_title;
751
    $Verbose  = $opt_verbose  if defined $opt_verbose;
752
 
753
    warn "Flushing item and directory caches\n"
754
        if $opt_verbose && defined $opt_flush;
755
    $Dircache = "$Cachedir/pod2htmd.tmp";
756
    $Itemcache = "$Cachedir/pod2htmi.tmp";
757
    $Toccache = "$Cachedir/pod2htmt.tmp";
758
    if (defined $opt_flush) {
759
        1 while unlink($Dircache, $Itemcache, $Toccache);
760
    }
761
}
762
 
763
 
764
my $Saved_Cache_Key;
765
 
766
sub get_cache {
767
    my($dircache, $itemcache, $toccache, $podpath, $podroot, $recurse) = @_;
768
    my @cache_key_args = @_;
769
 
770
    # A first-level cache:
771
    # Don't bother reading the cache files if they still apply
772
    # and haven't changed since we last read them.
773
    my $this_cache_key = cache_key(@cache_key_args);
774
 
775
    return if $Saved_Cache_Key and $this_cache_key eq $Saved_Cache_Key;
776
 
777
    # load the cache of %Pages and %Items if possible.  $tests will be
778
    # non-zero if successful.
779
    my $tests = 0;
780
    if (-f $dircache && -f $itemcache && -f $toccache) {
781
        warn "scanning for item cache\n" if $Verbose;
782
        $tests = load_cache($dircache, $itemcache, $toccache, $podpath, $podroot);
783
    }
784
 
785
    # if we didn't succeed in loading the cache then we must (re)build
786
    #  %Pages and %Items.
787
    if (!$tests) {
788
        warn "scanning directories in pod-path\n" if $Verbose;
789
        scan_podpath($podroot, $recurse, 0);
790
    }
791
    $Saved_Cache_Key = cache_key(@cache_key_args);
792
}
793
 
794
sub cache_key {
795
    my($dircache, $itemcache, $toccache, $podpath, $podroot, $recurse) = @_;
796
    return join('!', $dircache, $itemcache, $toccache, $recurse,
797
        @$podpath, $podroot, stat($dircache), stat($itemcache), stat($toccache));
798
}
799
 
800
#
801
# load_cache - tries to find if the caches stored in $dircache and $itemcache
802
#  are valid caches of %Pages and %Items.  if they are valid then it loads
803
#  them and returns a non-zero value.
804
#
805
sub load_cache {
806
    my($dircache, $itemcache, $toccache, $podpath, $podroot) = @_;
807
    my($tests);
808
    local $_;
809
 
810
    $tests = 0;
811
 
812
    open(CACHE, "<$itemcache") ||
813
        die "$0: error opening $itemcache for reading: $!\n";
814
    $/ = "\n";
815
 
816
    # is it the same podpath?
817
    $_ = <CACHE>;
818
    chomp($_);
819
    $tests++ if (join(":", @$podpath) eq $_);
820
 
821
    # is it the same podroot?
822
    $_ = <CACHE>;
823
    chomp($_);
824
    $tests++ if ($podroot eq $_);
825
 
826
    # load the cache if its good
827
    if ($tests != 2) {
828
        close(CACHE);
829
        return 0;
830
    }
831
 
832
    warn "loading item cache\n" if $Verbose;
833
    while (<CACHE>) {
834
        /(.*?) (.*)$/;
835
        $Items{$1} = $2;
836
    }
837
    close(CACHE);
838
 
839
    warn "scanning for directory cache\n" if $Verbose;
840
    open(CACHE, "<$dircache") ||
841
        die "$0: error opening $dircache for reading: $!\n";
842
    $/ = "\n";
843
    $tests = 0;
844
 
845
    # is it the same podpath?
846
    $_ = <CACHE>;
847
    chomp($_);
848
    $tests++ if (join(":", @$podpath) eq $_);
849
 
850
    # is it the same podroot?
851
    $_ = <CACHE>;
852
    chomp($_);
853
    $tests++ if ($podroot eq $_);
854
 
855
    # load the cache if its good
856
    if ($tests != 2) {
857
        close(CACHE);
858
        return 0;
859
    }
860
 
861
    warn "loading directory cache\n" if $Verbose;
862
    while (<CACHE>) {
863
        /(.*?) (.*)$/;
864
        $Pages{$1} = $2;
865
    }
866
 
867
    close(CACHE);
868
 
869
    #
870
    #   Load Toc Cache
871
    #
872
    open(CACHE, "<$toccache") ||
873
        die "$0: error opening $toccache for reading: $!\n";
874
    $/ = "\n";
875
    $tests = 0;
876
 
877
    # is it the same podpath?
878
    $_ = <CACHE>;
879
    chomp($_);
880
    $tests++ if (join(":", @$podpath) eq $_);
881
 
882
    # is it the same podroot?
883
    $_ = <CACHE>;
884
    chomp($_);
885
    $tests++ if ($podroot eq $_);
886
 
887
    # load the cache if its good
888
    if ($tests != 2) {
889
        close(CACHE);
890
        return 0;
891
    }
892
    warn "loading toc cache\n" if $Verbose;
893
    while (<CACHE>) {
894
        /(.*?) (.*)$/;
895
        $Toc{$1} = $2;
896
    }
897
    close(CACHE);
898
 
899
    return 1;
900
}
901
 
902
#
903
# scan_podpath - scans the directories specified in @podpath for directories,
904
#  .pod files, and .pm files.  it also scans the pod files specified in
905
#  @Libpods for =item directives.
906
#
907
sub scan_podpath {
908
    my($podroot, $recurse, $append) = @_;
909
    my($pwd, $dir);
910
    my($libpod, $dirname, $pod, @files, @poddata);
911
    unless($append) {
912
        %Items = ();
913
        %Pages = ();
914
        %Toc = ();
915
    }
916
 
917
    # scan each directory listed in @Podpath
918
    $pwd = getcwd();
919
    chdir($podroot)
920
        || die "$0: error changing to directory $podroot: $!\n";
921
    foreach $dir (@Podpath) {
922
        scan_dir($dir, $recurse);
923
    }
924
 
925
    # scan the pods listed in @Libpods for =item directives
926
    foreach $libpod (@Libpods) {
927
        # if the page isn't defined then we won't know where to find it
928
        # on the system.
929
        next unless defined $Pages{$libpod} && $Pages{$libpod};
930
 
931
        # if there is a directory then use the .pod and .pm files within it.
932
        # NOTE: Only finds the first so-named directory in the tree.
933
#       if ($Pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
934
        if ($Pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)(?<!\.pl)):/) {
935
            #  find all the .pod and .pm files within the directory
936
            $dirname = $1;
937
            opendir(DIR, $dirname) ||
938
                die "$0: scan_podpath:error opening directory $dirname: $!\n";
939
            @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
940
            closedir(DIR);
941
 
942
            # scan each .pod and .pm file for =item directives
943
            foreach $pod (@files) {
944
                open(POD, "<$dirname/$pod") ||
4386 dpurdie 945
                    die "$0: error opening(pod) $dirname/$pod for input: $!\n";
361 dpurdie 946
                @poddata = <POD>;
947
                close(POD);
948
                clean_data( \@poddata );
949
 
950
                scan_items( \%Items, "$dirname/$pod", @poddata);
951
            }
952
 
953
            # use the names of files as =item directives too.
954
### Don't think this should be done this way - confuses issues.(WL)
955
###         foreach $pod (@files) {
956
###             $pod =~ /^(.*)(\.pod|\.pm)$/;
957
###             $Items{$1} = "$dirname/$1.html" if $1;
958
###         }
959
        } elsif ($Pages{$libpod} =~ /([^:]*\.pod):/ ||
960
                 $Pages{$libpod} =~ /([^:]*\.p[ml]):/) {
961
            # scan the .pod or .pm file for =item directives
962
            $pod = $1;
963
            open(POD, "<$pod") ||
4386 dpurdie 964
                die "$0: error opening(pod1) $pod for input: $!\n";
361 dpurdie 965
            @poddata = <POD>;
966
            close(POD);
967
            clean_data( \@poddata );
968
 
969
            scan_items( \%Items, "$pod", @poddata);
970
        } else {
971
            warn "$0: shouldn't be here (line ".__LINE__."\n" unless $Quiet;
972
        }
973
    }
974
    @poddata = ();      # clean-up a bit
975
 
976
    #
977
    #   Scan all POD for TOC metadata
978
    #
979
    foreach my $libpod ( keys %Pages )
980
    {
981
        if ($Pages{$libpod} =~ /([^:]*\.pod):/ ||
982
                 $Pages{$libpod} =~ /([^:]*\.p[ml]):/) {
983
            # scan the .pod or .pm file for =for directives
984
            $pod = $1;
4386 dpurdie 985
            next if ($pod =~ m~\.keep\.pod~);
361 dpurdie 986
            open(POD, "<$pod") ||
4386 dpurdie 987
                die "$0: error opening(pod2) $pod for input: $!\n";
361 dpurdie 988
            @poddata = <POD>;
989
            close(POD);
990
            clean_data( \@poddata );
991
 
992
            scan_toc( \%Toc, "$pod", @poddata);
993
        }
994
    }
995
    @poddata = ();      # clean-up a bit
996
 
997
 
998
    chdir($pwd)
999
        || die "$0: error changing to directory $pwd: $!\n";
1000
 
1001
    # cache the item list for later use
1002
    warn "caching items for later use\n" if $Verbose;
1003
    open(CACHE, ">$Itemcache") ||
1004
        die "$0: error open $Itemcache for writing: $!\n";
1005
 
1006
    print CACHE join(":", @Podpath) . "\n$podroot\n";
1007
    foreach my $key (keys %Items) {
1008
        print CACHE "$key $Items{$key}\n";
1009
    }
1010
 
1011
    close(CACHE);
1012
 
1013
    # cache the directory list for later use
1014
    warn "caching directories for later use\n" if $Verbose;
1015
    open(CACHE, ">$Dircache") ||
1016
        die "$0: error open $Dircache for writing: $!\n";
1017
 
1018
    print CACHE join(":", @Podpath) . "\n$podroot\n";
1019
    foreach my $key (keys %Pages) {
1020
        print CACHE "$key $Pages{$key}\n";
1021
    }
1022
    close(CACHE);
1023
 
1024
    #
1025
    # cache the Toc Data for later use
1026
    #   Save Toc too
1027
    #
1028
    save_toc($podroot);
1029
}
1030
 
1031
sub save_toc
1032
{
1033
    my ($podroot) = @_;
1034
    warn "caching toc for later use\n" if $Verbose;
1035
    open(CACHE, ">$Toccache") ||
1036
        die "$0: error open $Toccache for writing: $!\n";
1037
 
1038
    print CACHE join(":", @Podpath) . "\n$podroot\n";
1039
    foreach my $key (keys %Toc) {
1040
        print CACHE "$key $Toc{$key}\n";
1041
    }
1042
    close(CACHE);
1043
}
1044
 
1045
#
1046
# scan_dir - scans the directory specified in $dir for subdirectories, .pod
1047
#  files, and .pm files.  notes those that it finds.  this information will
1048
#  be used later in order to figure out where the pages specified in L<>
1049
#  links are on the filesystem.
1050
#
1051
sub scan_dir {
1052
    my($dir, $recurse) = @_;
1053
    my($t, @subdirs, @pods, $pod, $dirname, @dirs);
1054
    local $_;
1055
 
1056
    @subdirs = ();
1057
    @pods = ();
1058
 
1059
    opendir(DIR, $dir) ||
1060
        die "$0: scan_dir:error opening directory $dir: $!\n";
1061
    while (defined($_ = readdir(DIR))) {
1062
        if (-d "$dir/$_" && $_ ne "." && $_ ne ".."
1063
            && ($HiddenDirs || !/^\./)
1064
        ) {         # directory
1065
            $Pages{$_}  = "" unless defined $Pages{$_};
1066
            $Pages{$_} .= "$dir/$_:";
1067
            push(@subdirs, $_);
1068
        } elsif (/\.pod\z/) {                               # .pod
1069
            s/\.pod\z//;
1070
            $Pages{$_}  = "" unless defined $Pages{$_};
1071
            $Pages{$_} .= "$dir/$_.pod:";
1072
            push(@pods, "$dir/$_.pod");
1073
        } elsif (/\.html\z/) {                              # .html
1074
            s/\.html\z//;
1075
            $Pages{$_}  = "" unless defined $Pages{$_};
1076
            $Pages{$_} .= "$dir/$_.pod:";
1077
        } elsif (/\.pm\z/) {                                # .pm
1078
            s/\.pm\z//;
1079
            $Pages{$_}  = "" unless defined $Pages{$_};
1080
            $Pages{$_} .= "$dir/$_.pm:";
1081
            push(@pods, "$dir/$_.pm");
1082
        } elsif (/\.pl\z/) {                                # .pl
1083
            s/\.pl\z//;
1084
            $Pages{$_}  = "" unless defined $Pages{$_};
1085
            $Pages{$_} .= "$dir/$_.pl:";
1086
            push(@pods, "$dir/$_.pl");
1087
        } elsif (/\.pod\.txt\z/) {                          # Skip .pod.txt
1088
        } elsif (-T "$dir/$_") {                            # script(?)
1089
            local *F;
1090
            if (open(F, "$dir/$_")) {
1091
                my $line;
1092
                while (defined($line = <F>)) {
1093
                    if ($line =~ /^=(?:pod|head1)/) {
1094
                        $Pages{$_}  = "" unless defined $Pages{$_};
1095
                        $Pages{$_} .= "$dir/$_.pod:";
1096
                        last;
1097
                    }
1098
                }
1099
                close(F);
1100
            }
1101
        }
1102
    }
1103
    closedir(DIR);
1104
 
1105
    # recurse on the subdirectories if necessary
1106
    if ($recurse) {
1107
        foreach my $subdir (@subdirs) {
1108
            scan_dir("$dir/$subdir", $recurse);
1109
        }
1110
    }
1111
}
1112
 
1113
#
1114
# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
1115
#  build an index.
1116
#
1117
sub scan_headings {
1118
    my($sections, @data) = @_;
1119
    my($tag, $which_head, $otitle, $listdepth, $index);
1120
 
1121
    local $Ignore = 0;
1122
 
1123
    $listdepth = 0;
1124
    $index = "";
1125
 
1126
    # scan for =head directives, note their name, and build an index
1127
    #  pointing to each of them.
1128
    foreach my $line (@data) {
1129
      if ($line =~ /^=(head)([1-6])\s+(.*)/) {
1130
        ($tag, $which_head, $otitle) = ($1,$2,$3);
1131
 
1132
        my $title = depod( $otitle );
1133
        my $name = anchorify( $title );
1134
        $$sections{$name} = 1;
1135
        $title = process_text( \$otitle );
1136
 
1137
            while ($which_head != $listdepth) {
1138
                if ($which_head > $listdepth) {
1139
                    $index .= "\n" . ("\t" x $listdepth) . "<ul>\n";
1140
                    $listdepth++;
1141
                } elsif ($which_head < $listdepth) {
1142
                    $listdepth--;
1143
                    $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1144
                }
1145
            }
1146
 
1147
            $index .= "\n" . ("\t" x $listdepth) . "<li>" .
1148
                      "<a href=\"#" . $name . "\">" .
1149
                      $title . "</a></li>";
1150
        }
1151
    }
1152
 
1153
    # finish off the lists
1154
    while ($listdepth--) {
1155
        $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1156
    }
1157
 
1158
    # get rid of bogus lists
1159
    $index =~ s,\t*<ul>\s*</ul>\n,,g;
1160
 
1161
    return $index;
1162
}
1163
 
1164
#
1165
# scan_items - scans the pod specified by $pod for =item directives.  we
1166
#  will use this information later on in resolving C<> links.
1167
#
1168
sub scan_items {
1169
    my( $itemref, $pod, @poddata ) = @_;
1170
    my($i, $item);
1171
    local $_;
1172
 
1173
    $pod =~ s/\.pod\z//;
1174
    $pod =~ s/\.p[lm]\z//;
1175
    $pod .= ".html" if $pod;
1176
 
1177
    foreach $i (0..$#poddata) {
1178
        my $txt = depod( $poddata[$i] );
1179
        $txt =~ s~^\n~~;
1180
        # figure out what kind of item it is.
1181
        # Build string for referencing this item.
1182
        if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
1183
            next unless $1;
1184
            $item = $1;
1185
        } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
1186
            $item = $1;
1187
        } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
1188
            $item = $1;
1189
        } elsif( $txt =~ /\A=head[234]\s+(.*)\Z/s ) { # plain item
1190
            $item = $1;
1191
        } else {
1192
            next;
1193
        }
1194
        my $fid = fragment_id( $item );
1195
        $$itemref{$fid} = "$pod" if $fid;
1196
    }
1197
}
1198
 
1199
sub scan_toc {
1200
    my( $tocref, $pod, @poddata ) = @_;
1201
    my($i, $item);
1202
    local $_;
1203
 
1204
    $pod =~ s/\.pod\z//;
1205
    $pod =~ s/\.p[lm]\z//;
1206
    $pod .= ".html" if $pod;
1207
 
1208
    foreach $i (0..$#poddata) {
1209
        my $txt = depod( $poddata[$i] );
1210
 
1211
        if ($txt =~ /^=for\s+htmltoc\s*(.*)/si) {# =for
1212
            $item = $1;
1213
            $$tocref{$pod} = $1 if $1;
1214
        }
1215
    }
1216
}
1217
 
1218
 
1219
#
1220
# process_head - convert a pod head[1-6] tag and convert it to HTML format.
1221
#
1222
sub process_head {
1223
    my($tag, $heading, $hasindex) = @_;
1224
 
1225
    # figure out the level of the =head
1226
    $tag =~ /head([1-6])/;
1227
    my $level = $1;
1228
 
1229
    if( $Listlevel ){
1230
        warn "$0: $Podfile: unterminated list at =head in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1231
        while( $Listlevel ){
1232
            process_back();
1233
        }
1234
    }
1235
 
1236
    print HTML "<p>\n";
1237
    if( $level == 1 && ! $Top ){
1238
      print HTML "<a href=\"#__index__\"><small>$Backlink</small></a>\n"
1239
        if $hasindex and $Backlink;
1240
      print HTML "</p>\n<hr />\n"
1241
    } else {
1242
      print HTML "</p>\n";
1243
    }
1244
 
1245
    my $name = anchorify( depod( $heading ) );
1246
    my $convert = process_text( \$heading );
1247
    print HTML "<h$level><a name=\"$name\">$convert</a></h$level>\n";
1248
}
1249
 
1250
 
1251
#
1252
# emit_item_tag - print an =item's text
1253
# Note: The global $EmittedItem is used for inhibiting self-references.
1254
#
1255
my $EmittedItem;
1256
 
1257
sub emit_item_tag($$$){
1258
    my( $otext, $text, $compact ) = @_;
1259
    my $item = fragment_id( depod($text) , -generate);
1260
    Carp::confess("Undefined fragment '$text' (".depod($text).") from fragment_id() in emit_item_tag() in $Podfile")
1261
        if !defined $item;
1262
    $EmittedItem = $item;
1263
### print STDERR "emit_item_tag=$item ($text)\n";
1264
 
1265
    print HTML '<strong>';
1266
    if ($Items_Named{$item}++) {
1267
        print HTML process_text( \$otext );
1268
    } else {
1269
        my $name = 'item_' .$item;
1270
        $name = anchorify($name);
1271
        print HTML qq{<a name="$name" class="item">}, process_text( \$otext ), '</a>';
1272
    }
1273
    print HTML "</strong>\n";
1274
    undef( $EmittedItem );
1275
}
1276
 
1277
sub emit_li {
1278
    my( $tag ) = @_;
1279
    if( $Items_Seen[$Listlevel]++ == 0 ){
1280
        push( @Listend, "</$tag>" );
1281
        print HTML "<$tag>\n";
1282
    }
1283
    my $emitted = $tag eq 'dl' ? 'dt' : 'li';
1284
    print HTML "<$emitted>";
1285
    return $emitted;
1286
}
1287
 
1288
#
1289
# process_item - convert a pod item tag and convert it to HTML format.
1290
#
1291
sub process_item {
1292
    my( $otext ) = @_;
1293
    my $need_dd = 0; # set to 1 if we need a <dd></dd> after an item
1294
 
1295
    # lots of documents start a list without doing an =over.  this is
1296
    # bad!  but, the proper thing to do seems to be to just assume
1297
    # they did do an =over.  so warn them once and then continue.
1298
    if( $Listlevel == 0 ){
1299
        warn "$0: $Podfile: unexpected =item directive in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1300
        process_over();
1301
    }
1302
 
1303
    # formatting: insert a paragraph if preceding item has >1 paragraph
1304
    if( $After_Lpar ){
1305
        print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar;
1306
        $After_Lpar = 0;
1307
    }
1308
 
1309
    # remove formatting instructions from the text
1310
    my $text = depod( $otext );
1311
 
1312
    my $emitted; # the tag actually emitted, used for closing
1313
 
1314
    # all the list variants:
1315
    if( $text =~ /\A\*/ ){ # bullet
1316
        $emitted = emit_li( 'ul' );
1317
        if ($text =~ /\A\*\s+(\S.*)\Z/s ) { # with additional text
1318
            my $tag = $1;
1319
            $otext =~ s/\A\*\s+//;
1320
            emit_item_tag( $otext, $tag, 1 );
1321
        }
1322
 
1323
    } elsif( $text =~ /\A\d+/ ){ # numbered list
1324
        $emitted = emit_li( 'ol' );
1325
        if ($text =~ /\A(?>\d+\.?)\s*(\S.*)\Z/s ) { # with additional text
1326
            my $tag = $1;
1327
            $otext =~ s/\A\d+\.?\s*//;
1328
            emit_item_tag( $otext, $tag, 1 );
1329
        }
1330
 
1331
    } else {                    # definition list
1332
        $emitted = emit_li( 'dl' );
1333
        if ($text =~ /\A(.+)\Z/s ){ # should have text
1334
            emit_item_tag( $otext, $text, 1 );
1335
        }
1336
        $need_dd = 1;
1337
    }
1338
    print HTML "\n";
1339
    return $need_dd;
1340
}
1341
 
1342
#
1343
# process_over - process a pod over tag and start a corresponding HTML list.
1344
#
1345
sub process_over {
1346
    # start a new list
1347
    $Listlevel++;
1348
    push( @Items_Seen, 0 );
1349
    $After_Lpar = 0;
1350
}
1351
 
1352
#
1353
# process_back - process a pod back tag and convert it to HTML format.
1354
#
1355
sub process_back {
1356
    my $need_dd = shift;
1357
    if( $Listlevel == 0 ){
1358
        warn "$0: $Podfile: unexpected =back directive in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1359
        return;
1360
    }
1361
 
1362
    # close off the list.  note, I check to see if $Listend[$Listlevel] is
1363
    # defined because an =item directive may have never appeared and thus
1364
    # $Listend[$Listlevel] may have never been initialized.
1365
    $Listlevel--;
1366
    if( defined $Listend[$Listlevel] ){
1367
        print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar;
1368
        print HTML $Listend[$Listlevel];
1369
        print HTML "\n";
1370
        pop( @Listend );
1371
    }
1372
    $After_Lpar = 0;
1373
 
1374
    # clean up item count
1375
    pop( @Items_Seen );
1376
}
1377
 
1378
#
1379
# process_cut - process a pod cut tag, thus start ignoring pod directives.
1380
#
1381
sub process_cut {
1382
    $Ignore = 1;
1383
}
1384
 
1385
#
1386
# process_pod - process a pod tag, thus stop ignoring pod directives
1387
# until we see a corresponding cut.
1388
#
1389
sub process_pod {
1390
    # no need to set $Ignore to 0 cause the main loop did it
1391
}
1392
 
1393
#
1394
# process_for - process a =for pod tag.  if it's for html, spit
1395
# it out verbatim, if illustration, center it, otherwise ignore it.
1396
#
1397
sub process_for {
1398
    my($whom, $text) = @_;
1399
    if ( $whom =~ /^(pod2)?html$/i) {
1400
            print HTML $text;
1401
    } elsif ($whom =~ /^htmlclass$/i) {
1402
        $set_p_class = $text;
1403
 
1404
    } elsif ($whom =~ /^htmltoc$/i) {
1405
            # Not processed here - processed in scan_toc
1406
 
1407
    } elsif ($whom =~ /^illustration$/i) {
1408
        1 while chomp $text;
1409
        for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1410
          $text .= $ext, last if -r "$text$ext";
1411
        }
1412
        print HTML qq{<p align="center"><img src="$text" alt="$text illustration" /></p>};
1413
    }
1414
}
1415
 
1416
#
1417
# process_begin - process a =begin pod tag.  this pushes
1418
# whom we're beginning on the begin stack.  if there's a
1419
# begin stack, we only print if it us.
1420
#
1421
sub process_begin {
1422
    my($whom, $text) = @_;
1423
    $whom = lc($whom);
1424
    push (@Begin_Stack, $whom);
1425
    if ( $whom =~ /^(pod2)?html$/) {
1426
        print HTML $text if $text;
1427
    }
1428
}
1429
 
1430
#
1431
# process_end - process a =end pod tag.  pop the
1432
# begin stack.  die if we're mismatched.
1433
#
1434
sub process_end {
1435
    my($whom, $text) = @_;
1436
    $whom = lc($whom);
1437
    if (!defined $Begin_Stack[-1] or $Begin_Stack[-1] ne $whom ) {
1438
        Carp::confess("Unmatched begin/end at chunk $Paragraph in pod $Podfile\n")
1439
    }
1440
    pop( @Begin_Stack );
1441
}
1442
 
1443
#
1444
# process_pre - indented paragraph, made into <pre></pre>
1445
#
1446
sub process_pre {
1447
    my( $text ) = @_;
1448
    my( $rest );
1449
    return if $Ignore;
1450
 
1451
    $rest = $$text;
1452
 
1453
    # insert spaces in place of tabs
1454
    $rest =~ s#(.+)#
1455
            my $line = $1;
1456
            1 while $line =~ s/(\t+)/' ' x ((length($1) * 8) - $-[0] % 8)/e;
1457
            $line;
1458
        #eg;
1459
 
1460
    my @line = split /\n/, $rest;
1461
    my $comments = !grep !/^\s*(#.*)?$/, @line;
1462
 
1463
    # Try to colorize the block as Perl sample code
1464
    if (($comments || $rest =~ /[;{]/) &&
1465
        eval {require ActiveState::Scineplex})
1466
    {
1467
        my $prefix;
1468
        my $input = $rest; # Don't modify $rest in case the colorizer fails
1469
        if ($input =~ /^( +)/) {
1470
            $prefix = $1;
1471
            s/^$prefix// for @line;
1472
            $input = join("\n", @line, "");
1473
        }
1474
        my $styled = eval { ActiveState::Scineplex::Annotate($input, 'perl', outputFormat => 'html') };
1475
        if ($styled) {
1476
            # If this really looks like code, then we don't want
1477
            # to add hyperlinks to URLs embedded in strings etc.
1478
            if ($prefix) {
1479
                $$text = $prefix . join("\n$prefix", split("\n", $styled)) . "\n";
1480
            }
1481
            else {
1482
                $$text = $styled;
1483
            }
1484
            return;
1485
        }
1486
    }
1487
 
1488
    # convert some special chars to HTML escapes
1489
    $rest = html_escape($rest);
1490
 
1491
    # try and create links for all occurrences of perl.* within
1492
    # the preformatted text.
1493
    $rest =~ s{
1494
                 (\s*)(perl\w+)
1495
              }{
1496
                 if ( defined $Pages{$2} ){     # is a link
1497
                     qq($1<a href="$Htmlroot/$Pages{$2}">$2</a>);
1498
                 } elsif (defined $Pages{dosify($2)}) { # is a link
1499
                     qq($1<a href="$Htmlroot/$Pages{dosify($2)}">$2</a>);
1500
                 } else {
1501
                     "$1$2";
1502
                 }
1503
              }xeg;
1504
     $rest =~ s{
1505
                 (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1506
               }{
1507
                  my $url ;
1508
                  if ( $Htmlfileurl ne '' ){
1509
                     # Here, we take advantage of the knowledge
1510
                     # that $Htmlfileurl ne '' implies $Htmlroot eq ''.
1511
                     # Since $Htmlroot eq '', we need to prepend $Htmldir
1512
                     # on the fron of the link to get the absolute path
1513
                     # of the link's target. We check for a leading '/'
1514
                     # to avoid corrupting links that are #, file:, etc.
1515
                     my $old_url = $3 ;
1516
                     $old_url = "$Htmldir$old_url" if $old_url =~ m{^\/};
1517
                     $url = relativize_url( "$old_url.html", $Htmlfileurl );
1518
                  } else {
1519
                     $url = "$3.html" ;
1520
                  }
1521
                  "$1$url" ;
1522
               }xeg;
1523
 
1524
    # Look for embedded URLs and make them into links.  We don't
1525
    # relativize them since they are best left as the author intended.
1526
 
1527
    my $urls = '(' . join ('|', qw{
1528
                http
1529
                telnet
1530
                mailto
1531
                news
1532
                gopher
1533
                file
1534
                wais
1535
                ftp
1536
            } )
1537
        . ')';
1538
 
1539
    my $ltrs = '\w';
1540
    my $gunk = '/#~:.?+=&%@!\-';
1541
    my $punc = '.:!?\-;';
1542
    my $any  = "${ltrs}${gunk}${punc}";
1543
 
1544
    $rest =~ s{
1545
        \b                      # start at word boundary
1546
        (                       # begin $1  {
1547
            $urls :             # need resource and a colon
1548
            (?!:)               # Ignore File::, among others.
1549
            [$any] +?           # followed by one or more of any valid
1550
                                #   character, but be conservative and
1551
                                #   take only what you need to....
1552
        )                       # end   $1  }
1553
        (?=
1554
            &quot; &gt;         # maybe pre-quoted '<a href="...">'
1555
        |                       # or:
1556
            [$punc]*            # 0 or more punctuation
1557
            (?:                 #   followed
1558
                [^$any]         #   by a non-url char
1559
            |                   #   or
1560
                $               #   end of the string
1561
            )                   #
1562
        |                       # or else
1563
            $                   #   then end of the string
1564
        )
1565
      }{<a href="$1">$1</a>}igox;
1566
 
1567
    # text should be as it is (verbatim)
1568
    $$text = $rest;
1569
}
1570
 
1571
 
1572
#
1573
# pure text processing
1574
#
1575
# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1576
# we don't want this to happen within IS
1577
#
1578
sub pure_text($){
1579
    my $text = shift();
1580
    process_puretext( $text, 1 );
1581
}
1582
 
1583
sub inIS_text($){
1584
    my $text = shift();
1585
    process_puretext( $text, 0 );
1586
}
1587
 
1588
#
1589
# process_puretext - process pure text (without pod-escapes) converting
1590
#  double-quotes and handling implicit C<> links.
1591
#
1592
sub process_puretext {
1593
    my($text, $notinIS) = @_;
1594
 
1595
    ## Guessing at func() or [\$\@%&]*var references in plain text is destined
1596
    ## to produce some strange looking ref's. uncomment to disable:
1597
    ## $notinIS = 0;
1598
 
1599
    my(@words, $lead, $trail);
1600
 
1601
    # keep track of leading and trailing white-space
1602
    $lead  = ($text =~ s/\A(\s+)//s ? $1 : "");
1603
    $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
1604
 
1605
    # split at space/non-space boundaries
1606
    @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
1607
 
1608
    # process each word individually
1609
    foreach my $word (@words) {
1610
        # skip space runs
1611
        next if $word =~ /^\s*$/;
1612
        # see if we can infer a link or a function call
1613
        #
1614
        # NOTE: This is a word based search, it won't automatically
1615
        # mark "substr($var, 1, 2)" because the 1st word would be "substr($var"
1616
        # User has to enclose those with proper C<>
1617
 
1618
        if( $notinIS && $word =~
1619
            m/
1620
                ^([a-z_]{2,})                 # The function name
1621
                \(
1622
                    ([0-9][a-z]*              # Manual page(1) or page(1M)
1623
                    |[^)]*[\$\@\%][^)]+       # ($foo), (1, @foo), (%hash)
1624
                    |                         # ()
1625
                    )
1626
                \)
1627
                ([.,;]?)$                     # a possible punctuation follows
1628
            /xi
1629
        ) {
1630
            # has parenthesis so should have been a C<> ref
1631
            ## try for a pagename (perlXXX(1))?
1632
            my( $func, $args, $rest ) = ( $1, $2, $3 || '' );
1633
            if( $args =~ /^\d+$/ ){
1634
                my $url = page_sect( $word, '' );
1635
                if( defined $url ){
1636
                    $word = qq(<a href="$url" class="man">the $word manpage</a>$rest);
1637
                    next;
1638
                }
1639
            }
1640
            ## try function name for a link, append tt'ed argument list
1641
            $word = emit_C( $func, '', "($args)") . $rest;
1642
 
1643
#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1644
##      } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1645
##          # perl variables, should be a C<> ref
1646
##          $word = emit_C( $word );
1647
 
1648
        } elsif ($word =~ m,^\w+://\w,) {
1649
            # looks like a URL
1650
            # Don't relativize it: leave it as the author intended
1651
            $word = qq(<a href="$word">$word</a>);
1652
        } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1653
            # looks like an e-mail address
1654
            my ($w1, $w2, $w3) = ("", $word, "");
1655
            ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1656
            ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1657
            $word = qq($w1<a href="mailto:$w2">$w2</a>$w3);
1658
        } else {
1659
            $word = html_escape($word) if $word =~ /["&<>]/;
1660
        }
1661
    }
1662
 
1663
    # put everything back together
1664
    return $lead . join( '', @words ) . $trail;
1665
}
1666
 
1667
 
1668
#
1669
# process_text - handles plaintext that appears in the input pod file.
1670
# there may be pod commands embedded within the text so those must be
1671
# converted to html commands.
1672
#
1673
 
1674
sub process_text1($$;$$);
1675
sub pattern ($) { $_[0] ? '\s+'.('>' x ($_[0] + 1)) : '>' }
1676
sub closing ($) { local($_) = shift; (defined && s/\s+\z//) ? length : 0 }
1677
 
1678
sub process_text {
1679
    return if $Ignore;
1680
    my( $tref ) = @_;
1681
    my $res = process_text1( 0, $tref );
1682
    $res =~ s/\s+$//s;
1683
    $$tref = $res;
1684
}
1685
 
1686
sub process_text_rfc_links {
1687
    my $text = shift;
1688
 
1689
    # For every "RFCnnnn" or "RFC nnn", link it to the authoritative
1690
    # ource. Do not use the /i modifier here. Require "RFC" to be written in
1691
    #  in capital letters.
1692
 
1693
    $text =~ s{
1694
        (?<=[^<>[:alpha:]])           # Make sure this is not an URL already
1695
        (RFC\s*([0-9]{1,5}))(?![0-9]) # max 5 digits
1696
    }
1697
    {<a href="http://www.ietf.org/rfc/rfc$2.txt" class="rfc">$1</a>}gx;
1698
 
1699
    $text;
1700
}
1701
 
1702
sub process_text1($$;$$){
1703
    my( $lev, $rstr, $func, $closing ) = @_;
1704
    my $res = '';
1705
 
1706
    unless (defined $func) {
1707
        $func = '';
1708
        $lev++;
1709
    }
1710
 
1711
    if( $func eq 'B' ){
1712
        # B<text> - boldface
1713
        $res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>';
1714
 
1715
    } elsif( $func eq 'C' ){
1716
        # C<code> - can be a ref or <code></code>
1717
        # need to extract text
1718
        my $par = go_ahead( $rstr, 'C', $closing );
1719
 
1720
        ## clean-up of the link target
1721
        my $text = depod( $par );
1722
 
1723
        ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1724
        ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1725
 
1726
        $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1727
 
1728
    } elsif( $func eq 'E' ){
1729
        # E<x> - convert to character
1730
        $$rstr =~ s/^([^>]*)>//;
1731
        my $escape = $1;
1732
        $escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;
1733
        $res = "&$escape;";
1734
 
1735
    } elsif( $func eq 'F' ){
1736
        # F<filename> - italicize
1737
        $res = '<em class="file">' . process_text1( $lev, $rstr ) . '</em>';
1738
 
1739
    } elsif( $func eq 'I' ){
1740
        # I<text> - italicize
1741
        $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1742
 
1743
    } elsif( $func eq 'L' ){
1744
        # L<link> - link
1745
        ## L<text|cross-ref> => produce text, use cross-ref for linking
1746
        ## L<cross-ref> => make text from cross-ref
1747
        ## need to extract text
1748
        my $par = go_ahead( $rstr, 'L', $closing );
1749
 
1750
        # some L<>'s that shouldn't be:
1751
        # a) full-blown URL's are emitted as-is
1752
        if( $par =~ m{^\w+://}s ){
1753
            return make_URL_href( $par );
1754
        }
1755
        # b) C<...> is stripped and treated as C<>
1756
        if( $par =~ /^C<(.*)>$/ ){
1757
            my $text = depod( $1 );
1758
            return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1759
        }
1760
 
1761
        # analyze the contents
1762
        $par =~ s/\n/ /g;   # undo word-wrapped tags
1763
        my $opar = $par;
1764
        my $linktext;
1765
        if( $par =~ s{^([^|]+)\|}{} ){
1766
            $linktext = $1;
1767
        }
1768
 
1769
        # make sure sections start with a /
1770
        $par =~ s{^"}{/"};
1771
 
1772
        my( $page, $section, $ident );
1773
 
1774
        # check for link patterns
1775
        if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){     # name/ident
1776
            # we've got a name/ident (no quotes)
1777
            if (length $2) {
1778
                ( $page, $ident ) = ( $1, $2 );
1779
            } else {
1780
                ( $page, $section ) = ( $1, $2 );
1781
            }
1782
            ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1783
 
1784
        } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1785
            # even though this should be a "section", we go for ident first
1786
            ( $page, $ident ) = ( $1, $2 );
1787
            ### print STDERR "--> L<$par> to page $page, section $section\n";
1788
 
1789
        } elsif( $par =~ /\s/ ){  # this must be a section with missing quotes
1790
            ( $page, $section ) = ( '', $par );
1791
            ### print STDERR "--> L<$par> to void page, section $section\n";
1792
 
1793
        } else {
1794
            ( $page, $section ) = ( $par, '' );
1795
            ### print STDERR "--> L<$par> to page $par, void section\n";
1796
        }
1797
 
1798
        # now, either $section or $ident is defined. the convoluted logic
1799
        # below tries to resolve L<> according to what the user specified.
1800
        # failing this, we try to find the next best thing...
1801
        my( $url, $ltext, $fid );
1802
 
1803
        RESOLVE: {
1804
            if( defined $ident ){
1805
                ## try to resolve $ident as an item
1806
                ( $url, $fid ) = coderef( $page, $ident );
1807
                if( $url ){
1808
                    if( ! defined( $linktext ) ){
1809
                        $linktext = $ident;
1810
                        $linktext .= " in " if $ident && $page;
1811
                        $linktext .= "the $page manpage" if $page;
1812
                    }
1813
                    ###  print STDERR "got coderef url=$url\n";
1814
                    last RESOLVE;
1815
                }
1816
                ## no luck: go for a section (auto-quoting!)
1817
                $section = $ident;
1818
            }
1819
            ## now go for a section
1820
            my $htmlsection = htmlify( $section );
1821
            $url = page_sect( $page, $htmlsection );
1822
            if( $url ){
1823
                if( ! defined( $linktext ) ){
1824
                    $linktext = $section;
1825
                    $linktext .= " in " if $section && $page;
1826
                    $linktext .= "the $page manpage" if $page;
1827
                }
1828
                ### print STDERR "got page/section url=$url\n";
1829
                last RESOLVE;
1830
            }
1831
            ## no luck: go for an ident
1832
            if( $section ){
1833
                $ident = $section;
1834
            } else {
1835
                $ident = $page;
1836
                $page  = undef();
1837
            }
1838
            ( $url, $fid ) = coderef( $page, $ident );
1839
            if( $url ){
1840
                if( ! defined( $linktext ) ){
1841
                    $linktext = $ident;
1842
                    $linktext .= " in " if $ident && $page;
1843
                    $linktext .= "the $page manpage" if $page;
1844
                }
1845
                ### print STDERR "got section=>coderef url=$url\n";
1846
                last RESOLVE;
1847
            }
1848
 
1849
            # warning; show some text.
1850
            $linktext = $opar unless defined $linktext;
1851
            warn "$0: $Podfile: cannot resolve L<$opar> in paragraph $Paragraph.\n" unless $Quiet;
1852
        }
1853
 
1854
        # now we have a URL or just plain code
1855
        $$rstr = $linktext . '>' . $$rstr;
1856
        if( defined( $url ) ){
1857
            $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>';
1858
        } else {
1859
            $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1860
        }
1861
 
1862
    } elsif( $func eq 'S' ){
1863
        # S<text> - non-breaking spaces
1864
        $res = process_text1( $lev, $rstr );
1865
        $res =~ s/ /&nbsp;/g;
1866
 
1867
    } elsif( $func eq 'X' ){
1868
        # X<> - ignore
1869
        warn "$0: $Podfile: invalid X<> in paragraph $Paragraph.\n"
1870
            unless $$rstr =~ s/^[^>]*>// or $Quiet;
1871
    } elsif( $func eq 'Z' ){
1872
        # Z<> - empty
1873
        warn "$0: $Podfile: invalid Z<> in paragraph $Paragraph.\n"
1874
            unless $$rstr =~ s/^>// or $Quiet;
1875
 
1876
    } else {
1877
        my $term = pattern $closing;
1878
        while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
1879
            # all others: either recurse into new function or
1880
            # terminate at closing angle bracket(s)
1881
            my $pt = $1;
1882
            $pt .= $2 if !$3 &&  $lev == 1;
1883
            $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1884
            return $res if !$3 && $lev > 1;
1885
            if( $3 ){
1886
                $res .= process_text1( $lev, $rstr, $3, closing $4 );
1887
            }
1888
        }
1889
        if( $lev == 1 ){
1890
            $res .= pure_text( $$rstr );
1891
        } elsif( ! $Quiet ) {
1892
            my $snippet = substr($$rstr,0,60);
1893
            warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph: '$snippet'.\n" 
1894
 
1895
        }
1896
        $res = process_text_rfc_links($res);
1897
    }
1898
    return $res;
1899
}
1900
 
1901
#
1902
# go_ahead: extract text of an IS (can be nested)
1903
#
1904
sub go_ahead($$$){
1905
    my( $rstr, $func, $closing ) = @_;
1906
    my $res = '';
1907
    my @closing = ($closing);
1908
    while( $$rstr =~
1909
      s/\A(.*?)(([BCEFILSXZ])<(<+\s+)?|@{[pattern $closing[0]]})//s ){
1910
        $res .= $1;
1911
        unless( $3 ){
1912
            shift @closing;
1913
            return $res unless @closing;
1914
        } else {
1915
            unshift @closing, closing $4;
1916
        }
1917
        $res .= $2;
1918
    }
1919
    unless ($Quiet) {
1920
        my $snippet = substr($$rstr,0,60);
1921
        warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph (go_ahead): '$snippet'.\n" 
1922
    }           
1923
    return $res;
1924
}
1925
 
1926
#
1927
# emit_C - output result of C<text>
1928
#    $text is the depod-ed text
1929
#
1930
sub emit_C($;$$){
1931
    my( $text, $nocode, $args ) = @_;
1932
    $args = '' unless defined $args;
1933
    my $res;
1934
    my( $url, $fid ) = coderef( undef(), $text );
1935
 
1936
    # need HTML-safe text
1937
    my $linktext = html_escape( "$text$args" );
1938
 
1939
    if( defined( $url ) &&
1940
        (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1941
        $res = "<a href=\"$url\"><code>$linktext</code></a>";
1942
    } elsif( 0 && $nocode ){
1943
        $res = $linktext;
1944
    } else {
1945
        $res = "<code>$linktext</code>";
1946
    }
1947
    return $res;
1948
}
1949
 
1950
#
1951
# html_escape: make text safe for HTML
1952
#
1953
sub html_escape {
1954
    my $rest = $_[0];
1955
    $rest   =~ s/&/&amp;/g;
1956
    $rest   =~ s/</&lt;/g;
1957
    $rest   =~ s/>/&gt;/g;
1958
    $rest   =~ s/"/&quot;/g;
1959
    # &apos; is only in XHTML, not HTML4.  Be conservative
1960
    #$rest   =~ s/'/&apos;/g;
1961
    return $rest;
1962
}
1963
 
1964
 
1965
#
1966
# dosify - convert filenames to 8.3
1967
#
1968
sub dosify {
1969
    my($str) = @_;
1970
    return lc($str) if $^O eq 'VMS';     # VMS just needs casing
1971
    if ($Is83) {
1972
        $str = lc $str;
1973
        $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1974
        $str =~ s/(\w+)/substr ($1,0,8)/ge;
1975
    }
1976
    return $str;
1977
}
1978
 
1979
#
1980
# page_sect - make a URL from the text of a L<>
1981
#
1982
sub page_sect($$) {
1983
    my( $page, $section ) = @_;
1984
    my( $linktext, $page83, $link);     # work strings
1985
 
1986
    # check if we know that this is a section in this page
1987
    if (!defined $Pages{$page} && defined $Sections{$page}) {
1988
        $section = $page;
1989
        $page = "";
1990
        ### print STDERR "reset page='', section=$section\n";
1991
    }
1992
 
1993
    $page83=dosify($page);
1994
    $page=$page83 if (defined $Pages{$page83});
1995
    if ($page eq "") {
1996
        $link = "#" . anchorify( $section );
1997
    } elsif ( $page =~ /::/ ) {
1998
        $page =~ s,::,/,g;
1999
        # Search page cache for an entry keyed under the html page name,
2000
        # then look to see what directory that page might be in.  NOTE:
2001
        # this will only find one page. A better solution might be to produce
2002
        # an intermediate page that is an index to all such pages.
2003
        my $page_name = $page ;
2004
        $page_name =~ s,^.*/,,s ;
2005
        if ( defined( $Pages{ $page_name } ) &&
2006
             $Pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
2007
           ) {
2008
            $page = $1 ;
2009
        }
2010
        else {
2011
            # NOTE: This branch assumes that all A::B pages are located in
2012
            # $Htmlroot/A/B.html . This is often incorrect, since they are
2013
            # often in $Htmlroot/lib/A/B.html or such like. Perhaps we could
2014
            # analyze the contents of %Pages and figure out where any
2015
            # cousins of A::B are, then assume that.  So, if A::B isn't found,
2016
            # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
2017
            # lib/A/B.pm. This is also limited, but it's an improvement.
2018
            # Maybe a hints file so that the links point to the correct places
2019
            # nonetheless?
2020
 
2021
        }
2022
        $link = "$Htmlroot/$page.html";
2023
        $link .= "#" . anchorify( $section ) if ($section);
2024
    } elsif (!defined $Pages{$page}) {
2025
        $link = "";
2026
    } else {
2027
        $section = anchorify( $section ) if $section ne "";
2028
        ### print STDERR "...section=$section\n";
2029
 
2030
        # if there is a directory by the name of the page, then assume that an
2031
        # appropriate section will exist in the subdirectory
2032
#       if ($section ne "" && $Pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
2033
        if ($section ne "" && $Pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
2034
            $link = "$Htmlroot/$1/$section.html";
2035
            ### print STDERR "...link=$link\n";
2036
 
2037
        # since there is no directory by the name of the page, the section will
2038
        # have to exist within a .html of the same name.  thus, make sure there
2039
        # is a .pod or .pm that might become that .html
2040
        } else {
2041
            $section = "#$section" if $section;
2042
            ### print STDERR "...section=$section\n";
2043
 
2044
            # check if there is a .pod with the page name.
2045
            # for L<Foo>, Foo.(pod|pm) is preferred to A/Foo.(pod|pm)
2046
            if ($Pages{$page} =~ /([^:]*)\.(?:pod|pm):/) {
2047
                $link = "$Htmlroot/$1.html$section";
2048
            } else {
2049
                $link = "";
2050
            }
2051
        }
2052
    }
2053
 
2054
    if ($link) {
2055
        # Here, we take advantage of the knowledge that $Htmlfileurl ne ''
2056
        # implies $Htmlroot eq ''. This means that the link in question
2057
        # needs a prefix of $Htmldir if it begins with '/'. The test for
2058
        # the initial '/' is done to avoid '#'-only links, and to allow
2059
        # for other kinds of links, like file:, ftp:, etc.
2060
        my $url ;
2061
        if (  $Htmlfileurl ne '' ) {
2062
            $link = "$Htmldir$link" if $link =~ m{^/}s;
2063
            $url = relativize_url( $link, $Htmlfileurl );
2064
# print( "  b: [$link,$Htmlfileurl,$url]\n" );
2065
        }
2066
        else {
2067
            $url = $link ;
2068
        }
2069
        return $url;
2070
 
2071
    } else {
2072
        return undef();
2073
    }
2074
}
2075
 
2076
#
2077
# relativize_url - convert an absolute URL to one relative to a base URL.
2078
# Assumes both end in a filename.
2079
#
2080
sub relativize_url {
2081
    my ($dest,$source) = @_ ;
2082
 
2083
    my ($dest_volume,$dest_directory,$dest_file) =
2084
        File::Spec::Unix->splitpath( $dest ) ;
2085
    $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
2086
 
2087
    my ($source_volume,$source_directory,$source_file) =
2088
        File::Spec::Unix->splitpath( $source ) ;
2089
    $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
2090
 
2091
    my $rel_path = '' ;
2092
    if ( $dest ne '' ) {
2093
       $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
2094
    }
2095
 
2096
    if ( $rel_path ne ''                &&
2097
         substr( $rel_path, -1 ) ne '/' &&
2098
         substr( $dest_file, 0, 1 ) ne '#'
2099
        ) {
2100
        $rel_path .= "/$dest_file" ;
2101
    }
2102
    else {
2103
        $rel_path .= "$dest_file" ;
2104
    }
2105
 
2106
    return $rel_path ;
2107
}
2108
 
2109
 
2110
#
2111
# coderef - make URL from the text of a C<>
2112
#
2113
sub coderef($$){
2114
    my( $page, $item ) = @_;
2115
    my( $url );
2116
 
2117
    my $fid = fragment_id( $item );
2118
 
2119
    if( defined( $page ) && $page ne "" ){
2120
        # we have been given a $page...
2121
        $page =~ s{::}{/}g;
2122
 
2123
        Carp::confess("Undefined fragment '$item' from fragment_id() in coderef() in $Podfile")
2124
            if !defined $fid;    
2125
        # Do we take it? Item could be a section!
2126
        my $base = $Items{$fid} || "";
2127
        $base =~ s{[^/]*/}{};
2128
        if( $base ne "$page.html" ){
2129
            ###   print STDERR "coderef( $page, $item ): items{$fid} = $Items{$fid} = $base => discard page!\n";
2130
            $page = undef();
2131
        }
2132
 
2133
    } else {
2134
        # no page - local items precede cached items
2135
        if( defined( $fid ) ){
2136
            if(  exists $Local_Items{$fid} ){
2137
                $page = $Local_Items{$fid};
2138
            } else {
2139
                $page = $Items{$fid};
2140
            }
2141
        }
2142
    }
2143
 
2144
    # if there was a pod file that we found earlier with an appropriate
2145
    # =item directive, then create a link to that page.
2146
    if( defined $page ){
2147
        if( $page ){
2148
            if( exists $Pages{$page} and $Pages{$page} =~ /([^:.]*)\.[^:]*:/){
2149
                $page = $1 . '.html';
2150
            }
2151
            my $link = "$Htmlroot/$page#item_" . anchorify($fid);
2152
 
2153
            # Here, we take advantage of the knowledge that $Htmlfileurl
2154
            # ne '' implies $Htmlroot eq ''.
2155
            if (  $Htmlfileurl ne '' ) {
2156
                $link = "$Htmldir$link" ;
2157
                $url = relativize_url( $link, $Htmlfileurl ) ;
2158
            } else {
2159
                $url = $link ;
2160
            }
2161
        } else {
2162
            $url = "#item_" . anchorify($fid);
2163
        }
2164
 
2165
        confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
2166
    }
2167
    return( $url, $fid );
2168
}
2169
 
2170
 
2171
 
2172
#
2173
# Adapted from Nick Ing-Simmons' PodToHtml package.
2174
sub relative_url {
2175
    my $source_file = shift ;
2176
    my $destination_file = shift;
2177
 
2178
    my $source = URI::file->new_abs($source_file);
2179
    my $uo = URI::file->new($destination_file,$source)->abs;
2180
    return $uo->rel->as_string;
2181
}
2182
 
2183
 
2184
#
2185
# finish_list - finish off any pending HTML lists.  this should be called
2186
# after the entire pod file has been read and converted.
2187
#
2188
sub finish_list {
2189
    while ($Listlevel > 0) {
2190
        print HTML "</dl>\n";
2191
        $Listlevel--;
2192
    }
2193
}
2194
 
2195
#
2196
# htmlify - converts a pod section specification to a suitable section
2197
# specification for HTML. Note that we keep spaces and special characters
2198
# except ", ? (Netscape problem) and the hyphen (writer's problem...).
2199
#
2200
sub htmlify {
2201
    my( $heading) = @_;
2202
    $heading =~ s/(\s+)/ /g;
2203
    $heading =~ s/\s+\Z//;
2204
    $heading =~ s/\A\s+//;
2205
    # The hyphen is a disgrace to the English language.
2206
    # $heading =~ s/[-"?]//g;
2207
    $heading =~ s/["?]//g;
2208
    $heading = lc( $heading );
2209
    return $heading;
2210
}
2211
 
2212
#
2213
# similar to htmlify, but turns non-alphanumerics into underscores
2214
#
2215
sub anchorify {
2216
    my ($anchor) = @_;
2217
    $anchor = htmlify($anchor);
2218
    $anchor =~ s/\W/_/g;
2219
    return $anchor;
2220
}
2221
 
2222
#
2223
# depod - convert text by eliminating all interior sequences
2224
# Note: can be called with copy or modify semantics
2225
#
2226
my %E2c;
2227
$E2c{lt}     = '<';
2228
$E2c{gt}     = '>';
2229
$E2c{sol}    = '/';
2230
$E2c{verbar} = '|';
2231
$E2c{amp}    = '&'; # in Tk's pods
2232
 
2233
sub depod1($;$$);
2234
 
2235
sub depod($){
2236
    my $string;
2237
    if( ref( $_[0] ) ){
2238
        $string =  ${$_[0]};
2239
        ${$_[0]} = depod1( \$string );
2240
    } else {
2241
        $string =  $_[0];
2242
        depod1( \$string );
2243
    }
2244
}
2245
 
2246
sub depod1($;$$){
2247
  my( $rstr, $func, $closing ) = @_;
2248
  my $res = '';
2249
  return $res unless defined $$rstr;
2250
  if( ! defined( $func ) ){
2251
      # skip to next begin of an interior sequence
2252
      while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?//s ){
2253
         # recurse into its text
2254
          $res .= $1 . depod1( $rstr, $2, closing $3);
2255
      }
2256
      $res .= $$rstr;
2257
  } elsif( $func eq 'E' ){
2258
      # E<x> - convert to character
2259
      $$rstr =~ s/^([^>]*)>//;
2260
      $res .= $E2c{$1} || "";
2261
  } elsif( $func eq 'X' ){
2262
      # X<> - ignore
2263
      $$rstr =~ s/^[^>]*>//;
2264
  } elsif( $func eq 'Z' ){
2265
      # Z<> - empty
2266
      $$rstr =~ s/^>//;
2267
  } else {
2268
      # all others: either recurse into new function or
2269
      # terminate at closing angle bracket
2270
      my $term = pattern $closing;
2271
      while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2272
          $res .= $1;
2273
          last unless $3;
2274
          $res .= depod1( $rstr, $3, closing $4 );
2275
      }
2276
      ## If we're here and $2 ne '>': undelimited interior sequence.
2277
      ## Ignored, as this is called without proper indication of where we are.
2278
      ## Rely on process_text to produce diagnostics.
2279
  }
2280
  return $res;
2281
}
2282
 
2283
{
2284
    my %seen;   # static fragment record hash
2285
 
2286
sub fragment_id_readable {
2287
    my $text     = shift;
2288
    my $generate = shift;   # optional flag
2289
 
2290
    my $orig = $text;
2291
 
2292
    # leave the words for the fragment identifier,
2293
    # change everything else to underbars.
2294
    $text =~ s/[^A-Za-z0-9_]+/_/g; # do not use \W to avoid locale dependency.
2295
    $text =~ s/_{2,}/_/g;
2296
    $text =~ s/\A_//;
2297
    $text =~ s/_\Z//;
2298
 
2299
    unless ($text)
2300
    {
2301
        # Nothing left after removing punctuation, so leave it as is
2302
        # E.g. if option is named: "=item -#"
2303
 
2304
        $text = $orig;
2305
    }
2306
 
2307
    if ($generate) {
2308
        if ( exists $seen{$text} ) {
2309
            # This already exists, make it unique
2310
            $seen{$text}++;
2311
            $text = $text . $seen{$text};
2312
        } else {
2313
            $seen{$text} = 1;  # first time seen this fragment
2314
        }
2315
    }
2316
 
2317
    $text;
2318
}}
2319
 
2320
#
2321
# fragment_id - construct a fragment identifier from:
2322
#   a) =item text
2323
#   b) contents of C<...>
2324
#
2325
 
2326
sub fragment_id {
2327
    my $text     = shift;
2328
    my $generate = shift;   # optional flag
2329
 
2330
    $text =~ s/\s+\Z//s;
2331
    if( $text ){
2332
        # a method or function?
2333
        return $1 if $text =~ /(\w+)\s*\(/;
2334
        return $1 if $text =~ /->\s*(\w+)\s*\(?/;
2335
 
2336
        # a variable name?
2337
        return $1 if $text =~ /^([\$\@%*]\S+)/;
2338
 
2339
        # some pattern matching operator?
2340
        return $1 if $text =~ m|^(\w+/).*/\w*$|;
2341
 
2342
        # fancy stuff... like "do { }"
2343
        return $1 if $text =~ m|^(\w+)\s*{.*}$|;
2344
 
2345
        # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
2346
        # and some funnies with ... Module ...
2347
        return $1 if $text =~ m{^([a-z\d_]+)(\s+[A-Z,/& ][A-Z\d,/& ]*)?$};
2348
        return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
2349
 
2350
        return fragment_id_readable($text, $generate);
2351
    } else {
2352
        return;
2353
    }
2354
}
2355
 
2356
#
2357
# make_URL_href - generate HTML href from URL
2358
# Special treatment for CGI queries.
2359
#
2360
sub make_URL_href($){
2361
    my( $url ) = @_;
2362
    if( $url !~
2363
        s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ){
2364
        $url = "<a href=\"$url\">$url</a>";
2365
    }
2366
    return $url;
2367
}
2368
 
2369
1;