Subversion Repositories DevTools

Rev

Rev 4386 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
361 dpurdie 1
#
2
#   Pinched from ActiveState v5.8.8 , Binary build 822 [280952]
3
#   For the same reason as the other POD:: bits
4
#       Fix bugs
5
#       Ensure that they are fixed independently of the Perl Pelease
6
#
7
#   In this module
8
#       * Remove AvtiveState mailto:
9
#
10
 
11
package Pod::Html;
12
use strict;
13
require Exporter;
14
 
15
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
16
$VERSION = "1.0801";
17
@ISA = qw(Exporter);
18
@EXPORT = qw(pod2html htmlify);
19
@EXPORT_OK = qw(anchorify);
20
 
21
use Carp;
22
use Config;
23
use Cwd;
24
use File::Spec;
25
use File::Spec::Unix;
26
use Getopt::Long;
27
 
28
use locale;     # make \w work right in non-ASCII lands
29
 
30
=head1 NAME
31
 
32
Pod::Html - module to convert pod files to HTML
33
 
34
=head1 SYNOPSIS
35
 
36
    use Pod::Html;
37
    pod2html([options]);
38
 
39
=head1 DESCRIPTION
40
 
41
Converts files from pod format (see L<perlpod>) to HTML format.  It
42
can automatically generate indexes and cross-references, and it keeps
43
a cache of things it knows how to cross-reference.
44
 
45
=head1 FUNCTIONS
46
 
47
=head2 pod2html
48
 
49
    pod2html("pod2html",
50
             "--podpath=lib:ext:pod:vms",
51
             "--podroot=/usr/src/perl",
52
             "--htmlroot=/perl/nmanual",
53
             "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
54
             "--recurse",
55
             "--infile=foo.pod",
56
             "--outfile=/perl/nmanual/foo.html");
57
 
58
pod2html takes the following arguments:
59
 
60
=over 4
61
 
62
=item backlink
63
 
64
    --backlink="Back to Top"
65
 
66
Adds "Back to Top" links in front of every C<head1> heading (except for
67
the first).  By default, no backlinks are generated.
68
 
69
=item cachedir
70
 
71
    --cachedir=name
72
 
73
Creates the item and directory caches in the given directory.
74
 
75
=item css
76
 
77
    --css=stylesheet
78
 
79
Specify the URL of a cascading style sheet.  Also disables all HTML/CSS
80
C<style> attributes that are output by default (to avoid conflicts).
81
 
82
=item flush
83
 
84
    --flush
85
 
86
Flushes the item and directory caches.
87
 
88
=item header
89
 
90
    --header
91
    --noheader
92
 
93
Creates header and footer blocks containing the text of the C<NAME>
94
section.  By default, no headers are generated.
95
 
96
=item help
97
 
98
    --help
99
 
100
Displays the usage message.
101
 
102
=item hiddendirs
103
 
104
    --hiddendirs
105
    --nohiddendirs
106
 
107
Include hidden directories in the search for POD's in podpath if recurse
108
is set.
109
The default is not to traverse any directory whose name begins with C<.>.
110
See L</"podpath"> and L</"recurse">.
111
 
112
[This option is for backward compatibility only.
113
It's hard to imagine that one would usefully create a module with a
114
name component beginning with C<.>.]
115
 
116
=item htmldir
117
 
118
    --htmldir=name
119
 
120
Sets the directory in which the resulting HTML file is placed.  This
121
is used to generate relative links to other files. Not passing this
122
causes all links to be absolute, since this is the value that tells
123
Pod::Html the root of the documentation tree.
124
 
125
=item htmlroot
126
 
127
    --htmlroot=name
128
 
129
Sets the base URL for the HTML files.  When cross-references are made,
130
the HTML root is prepended to the URL.
131
 
132
=item index
133
 
134
    --index
135
    --noindex
136
 
137
Generate an index at the top of the HTML file.  This is the default
138
behaviour.
139
 
140
=item infile
141
 
142
    --infile=name
143
 
144
Specify the pod file to convert.  Input is taken from STDIN if no
145
infile is specified.
146
 
147
=item libpods
148
 
149
    --libpods=name:...:name
150
 
151
List of page names (eg, "perlfunc") which contain linkable C<=item>s.
152
 
153
=item netscape
154
 
155
    --netscape
156
    --nonetscape
157
 
158
B<Deprecated>, has no effect. For backwards compatibility only.
159
 
160
=item outfile
161
 
162
    --outfile=name
163
 
164
Specify the HTML file to create.  Output goes to STDOUT if no outfile
165
is specified.
166
 
167
=item podpath
168
 
169
    --podpath=name:...:name
170
 
171
Specify which subdirectories of the podroot contain pod files whose
172
HTML converted forms can be linked to in cross references.
173
 
174
=item podroot
175
 
176
    --podroot=name
177
 
178
Specify the base directory for finding library pods.
179
 
180
=item quiet
181
 
182
    --quiet
183
    --noquiet
184
 
185
Don't display I<mostly harmless> warning messages.  These messages
186
will be displayed by default.  But this is not the same as C<verbose>
187
mode.
188
 
189
=item recurse
190
 
191
    --recurse
192
    --norecurse
193
 
194
Recurse into subdirectories specified in podpath (default behaviour).
195
 
196
=item title
197
 
198
    --title=title
199
 
200
Specify the title of the resulting HTML file.
201
 
202
=item verbose
203
 
204
    --verbose
205
    --noverbose
206
 
207
Display progress messages.  By default, they won't be displayed.
208
 
209
=back
210
 
211
=head2 htmlify
212
 
213
    htmlify($heading);
214
 
215
Converts a pod section specification to a suitable section specification
216
for HTML. Note that we keep spaces and special characters except 
217
C<", ?> (Netscape problem) and the hyphen (writer's problem...).
218
 
219
=head2 anchorify
220
 
221
    anchorify(@heading);
222
 
223
Similar to C<htmlify()>, but turns non-alphanumerics into underscores.  Note
224
that C<anchorify()> is not exported by default.
225
 
226
=head1 ENVIRONMENT
227
 
228
Uses C<$Config{pod2html}> to setup default options.
229
 
230
=head1 AUTHOR
231
 
232
Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
233
 
234
=head1 SEE ALSO
235
 
236
L<perlpod>
237
 
238
=head1 COPYRIGHT
239
 
240
This program is distributed under the Artistic License.
241
 
242
=cut
243
 
244
my($Cachedir);
245
my($Dircache, $Itemcache, $Toccache);
246
my @Begin_Stack;
247
my @Libpods;
248
my($Htmlroot, $Htmldir, $Htmlfile, $Htmlfileurl);
249
my($Podfile, @Podpath, $Podroot);
250
my $Css;
251
 
252
my $Recurse;
253
my $Quiet;
254
my $HiddenDirs;
255
my $Verbose;
256
my $Doindex;
257
 
258
my $Backlink;
259
my($Listlevel, @Listend);
260
my $After_Lpar;
261
use vars qw($Ignore);  # need to localize it later.
262
 
263
my(%Items_Named, @Items_Seen);
264
my($Title, $Header);
265
 
266
my $Top;
267
my $Paragraph;
268
 
269
my %Sections;
270
 
271
# Caches
272
my %Pages = ();                 # associative array used to find the location
273
                                #   of pages referenced by L<> links.
274
my %Items = ();                 # associative array used to find the location
275
                                #   of =item directives referenced by C<> links
276
 
277
my %Toc = ();                   # Hash of Table of content metadata
278
 
279
my %Local_Items;
280
my $Is83;
281
 
282
my $Curdir = File::Spec->curdir;
283
my $set_p_class;
284
 
285
init_globals();
286
 
287
sub init_globals {
288
    $Cachedir = ".";            # The directory to which item and directory
289
                                # caches will be written.
290
 
291
    $Dircache = "pod2htmd.tmp";
292
    $Itemcache = "pod2htmi.tmp";
293
    $Toccache = "pod2htmlt.tmp";
294
 
295
    @Begin_Stack = ();          # begin/end stack
296
 
297
    @Libpods = ();              # files to search for links from C<> directives
298
    $Htmlroot = "/";            # http-server base directory from which all
299
                                #   relative paths in $podpath stem.
300
    $Htmldir = "";              # The directory to which the html pages
301
                                # will (eventually) be written.
302
    $Htmlfile = "";             # write to stdout by default
303
    $Htmlfileurl = "" ;         # The url that other files would use to
304
                                # refer to this file.  This is only used
305
                                # to make relative urls that point to
306
                                # other files.
307
 
308
    $Podfile = "";              # read from stdin by default
309
    @Podpath = ();              # list of directories containing library pods.
310
    $Podroot = $Curdir;         # filesystem base directory from which all
311
                                #   relative paths in $podpath stem.
312
    $Css = '';                  # Cascading style sheet
313
    $Recurse = 1;               # recurse on subdirectories in $podpath.
314
    $Quiet = 0;                 # not quiet by default
315
    $Verbose = 0;               # not verbose by default
316
    $Doindex = 1;               # non-zero if we should generate an index
317
    $Backlink = '';             # text for "back to top" links
318
    $Listlevel = 0;             # current list depth
319
    @Listend = ();              # the text to use to end the list.
320
    $After_Lpar = 0;            # set to true after a par in an =item
321
    $Ignore = 1;                # whether or not to format text.  we don't
322
                                #   format text until we hit our first pod
323
                                #   directive.
324
 
325
    @Items_Seen = ();           # for multiples of the same item in perlfunc
326
    %Items_Named = ();
327
    $Header = 0;                # produce block header/footer
328
    $Title = '';                # title to give the pod(s)
329
    $Top = 1;                   # true if we are at the top of the doc.  used
330
                                #   to prevent the first <hr /> directive.
331
    $Paragraph = '';            # which paragraph we're processing (used
332
                                #   for error messages)
333
    %Sections = ();             # sections within this page
334
 
335
    %Local_Items = ();
336
    $Is83 = $^O eq 'dos';       # Is it an 8.3 filesystem?
337
#    $set_p_class = '';
338
}
339
 
340
#
341
# clean_data: global clean-up of pod data
342
#
343
sub clean_data($){
344
    my( $dataref ) = @_;
345
    for my $i ( 0..$#{$dataref} ) {
346
 
347
        ${$dataref}[$i] =~ s/\s+\Z//;
348
        ${$dataref}[$i] =~ s/^\n+(.+)/$1/;
349
 
350
        # have a look for all-space lines
351
      if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){
352
            my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
353
            splice( @$dataref, $i, 1, @chunks );
354
        }
355
    }
356
}
357
 
358
 
359
sub pod2html {
360
    local(@ARGV) = @_;
361
    local($/);
362
    local $_;
363
 
364
    init_globals();
365
 
366
    $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
367
 
368
    # cache of %Pages and %Items from last time we ran pod2html
369
 
370
    #undef $opt_help if defined $opt_help;
371
 
372
    # parse the command-line parameters
373
    parse_command_line();
374
 
375
    # escape the backlink argument (same goes for title but is done later...)
376
    $Backlink = html_escape($Backlink) if defined $Backlink;
377
 
378
    # set some variables to their default values if necessary
379
    local *POD;
380
    unless (@ARGV && $ARGV[0]) {
381
        $Podfile  = "-" unless $Podfile;        # stdin
382
        open(POD, "<$Podfile")
383
                || die "$0: cannot open $Podfile file for input: $!\n";
384
    } else {
385
        $Podfile = $ARGV[0];  # XXX: might be more filenames
386
        *POD = *ARGV;
387
    }
388
    $Htmlfile = "-" unless $Htmlfile;   # stdout
389
    $Htmlroot = "" if $Htmlroot eq "/"; # so we don't get a //
390
    $Htmldir =~ s#/\z## ;               # so we don't get a //
391
    if (  $Htmlroot eq ''
392
       && defined( $Htmldir )
393
       && $Htmldir ne ''
394
       && substr( $Htmlfile, 0, length( $Htmldir ) ) eq $Htmldir
395
       )
396
    {
397
        # Set the 'base' url for this file, so that we can use it
398
        # as the location from which to calculate relative links
399
        # to other files. If this is '', then absolute links will
400
        # be used throughout.
401
        $Htmlfileurl= "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1);
402
    }
403
 
404
    # read the pod a paragraph at a time
405
    warn "Scanning for sections in input file(s)\n" if $Verbose;
406
    $/ = "";
407
    my @poddata  = <POD>;
408
    close(POD);
409
 
410
    # be eol agnostic
411
    for (@poddata) {
412
        if (/\r/) {
413
            if (/\r\n/) {
414
                @poddata = map { s/\r\n/\n/g;
415
                                 /\n\n/ ?
416
                                     map { "$_\n\n" } split /\n\n/ :
417
                                     $_ } @poddata;
418
            } else {
419
                @poddata = map { s/\r/\n/g;
420
                                 /\n\n/ ?
421
                                     map { "$_\n\n" } split /\n\n/ :
422
                                     $_ } @poddata;
423
            }
424
            last;
425
        }
426
    }
427
    clean_data( \@poddata );
428
 
429
 
430
    # scan the pod for =head[1-6] directives and build an index
431
    my $index = scan_headings(\%Sections, @poddata);
432
 
433
    unless($index) {
434
        warn "No headings in $Podfile\n" if $Verbose;
435
    }
436
 
437
    # open the output file
438
    open(HTML, ">$Htmlfile")
439
            || die "$0: cannot open $Htmlfile file for output: $!\n";
440
 
441
    # put a title in the HTML file if one wasn't specified
442
    if ($Title eq '') {
443
        TITLE_SEARCH: {
444
            for (my $i = 0; $i < @poddata; $i++) {
445
                if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
446
                    for my $para ( @poddata[$i, $i+1] ) {
447
                        last TITLE_SEARCH
448
                            if ($Title) = $para =~ /(\S+\s+-+.*\S)/s;
449
                    }
450
                }
451
 
452
            }
453
        }
454
    }
455
    if (!$Title and $Podfile =~ /\.pod\z/) {
456
        # probably a split pod so take first =head[12] as title
457
        for (my $i = 0; $i < @poddata; $i++) {
458
            last if ($Title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
459
        }
460
        warn "adopted '$Title' as title for $Podfile\n"
461
            if $Verbose and $Title;
462
    }
463
    if ($Title) {
464
        $Title =~ s/\s*\(.*\)//;
465
    } else {
466
        warn "$0: no title for $Podfile.\n" unless $Quiet;
467
        $Podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
468
        $Title = ($Podfile eq "-" ? 'No Title' : $1);
469
        warn "using $Title" if $Verbose;
470
    }
471
    $Title = html_escape($Title);
472
 
473
    my $csslink = '';
474
    my $bodystyle = ' style="background-color: white"';
475
    my $tdstyle = ' style="background-color: #cccccc"';
476
 
477
    if ($Css) {
478
      $csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />);
479
      $csslink =~ s,\\,/,g;
480
      $csslink =~ s,(/.):,$1|,;
481
      $bodystyle = '';
482
      $tdstyle = '';
483
    }
484
 
485
      my $block = $Header ? <<END_OF_BLOCK : '';
486
<table border="0" width="100%" cellspacing="0" cellpadding="3">
487
<tr><td class="block"$tdstyle valign="middle">
488
<big><strong><span class="block">&nbsp;$Title</span></strong></big>
489
</td></tr>
490
</table>
491
END_OF_BLOCK
492
 
493
    print HTML <<END_OF_HEAD;
494
<?xml version="1.0" ?>
495
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
496
<html xmlns="http://www.w3.org/1999/xhtml">
497
<head>
498
<title>$Title</title>$csslink
499
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
500
</head>
501
 
502
<body$bodystyle>
503
$block
504
END_OF_HEAD
505
 
506
    # load/reload/validate/cache %Pages and %Items
507
    get_cache($Dircache, $Itemcache, $Toccache, \@Podpath, $Podroot, $Recurse);
508
 
509
    # scan the pod for =item directives
510
    scan_items( \%Local_Items, "", @poddata);
511
 
512
    scan_toc( \%Toc, $Podfile, @poddata);
513
    save_toc($Podroot);
514
 
515
    # put an index at the top of the file.  note, if $Doindex is 0 we
516
    # still generate an index, but surround it with an html comment.
517
    # that way some other program can extract it if desired.
518
    $index =~ s/--+/-/g;
519
 
520
    my $hr = ($Doindex and $index) ? qq(<hr name="index" />) : "";
521
    unless ($Doindex)
522
    {
523
        $index = qq(<!--\n$index\n-->\n);
524
    }
525
 
526
    print HTML << "END_OF_INDEX";
527
 
528
<!-- INDEX BEGIN -->
529
<div name="index"  class="index">
530
<p><a name=\"__index__\"></a></p>
531
$index
532
$hr
533
</div>
534
<!-- INDEX END -->
535
 
536
END_OF_INDEX
537
 
538
    # now convert this file
539
    my $after_item;             # set to true after an =item
540
    my $need_dd = 0;
541
    warn "Converting input file $Podfile\n" if $Verbose;
542
    foreach my $i (0..$#poddata){
543
        $_ = $poddata[$i];
544
        $Paragraph = $i+1;
545
        if (/^(=.*)/s) {        # is it a pod directive?
546
            $Ignore = 0;
547
            $after_item = 0;
548
            $need_dd = 0;
549
            $_ = $1;
550
            if (/^=begin\s+(\S+)\s*(.*)/si) {   # =begin
551
                process_begin($1, $2);
552
            } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
553
                process_end($1, $2);
554
            } elsif (/^=cut/) {                 # =cut
555
                process_cut();
556
            } elsif (/^=pod/) {                 # =pod
557
                process_pod();
558
            } else {
559
                next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
560
 
561
                if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
562
                    process_head( $1, $2, $Doindex && $index );
563
                } elsif (/^=item\s*(.*\S)?/sm) {        # =item text
564
                    $need_dd = process_item( $1 );
565
                    $after_item = 1;
566
                } elsif (/^=over\s*(.*)/) {             # =over N
567
                    process_over();
568
                } elsif (/^=back/) {                    # =back
569
                    process_back($need_dd);
570
                } elsif (/^=for\s+(\S+)\s*(.*)/si) {    # =for
571
                    process_for($1,$2);
572
                } else {
573
                    /^=(\S*)\s*/;
574
                    warn "$0: $Podfile: unknown pod directive '$1' in "
575
                       . "paragraph $Paragraph.  ignoring.\n" unless $Quiet;
576
                }
577
            }
578
            $Top = 0;
579
        }
580
        else {
581
            next if $Ignore;
582
            next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
583
            print HTML and next if @Begin_Stack && $Begin_Stack[-1] eq 'html';
584
            print HTML "<dd>\n" if $need_dd;
585
            my $text = $_;
586
            if( $text =~ /\A\s+/ ){
587
                process_pre( \$text );
588
                print HTML "<pre>\n$text</pre>\n";
589
 
590
            } else {
591
                process_text( \$text );
592
 
593
                # experimental: check for a paragraph where all lines
594
                # have some ...\t...\t...\n pattern
595
                if( $text =~ /\t/ ){
596
                    my @lines = split( "\n", $text );
597
                    if( @lines > 1 ){
598
                        my $all = 2;
599
                        foreach my $line ( @lines ){
600
                            if( $line =~ /\S/ && $line !~ /\t/ ){
601
                                $all--;
602
                                last if $all == 0;
603
                            }
604
                        }
605
                        if( $all > 0 ){
606
                            $text =~ s/\t+/<td>/g;
607
                            $text =~ s/^/<tr><td>/gm;
608
                            $text = '<table cellspacing="0" cellpadding="0">' .
609
                                    $text . '</table>';
610
                        }
611
                    }
612
                }
613
                ## end of experimental
614
 
615
                if( $after_item ){
616
                    $After_Lpar = 1;
617
                }
618
 
619
        if ( $set_p_class )
620
        {
621
                print HTML "<p class=\"$set_p_class\">$text</p>\n";
622
            $set_p_class = 0;
623
        }
624
        else
625
        {
626
                    print HTML "<p>$text</p>\n";
627
        }
628
            }
629
            print HTML "</dd>\n" if $need_dd;
630
            $after_item = 0;
631
        }
632
    }
633
 
634
    # finish off any pending directives
635
    finish_list();
636
 
637
    # link to page index
638
    print HTML "<p><a href=\"#__index__\"><small>$Backlink</small></a></p>\n"
639
        if $Doindex and $index and $Backlink;
640
 
641
    print HTML <<END_OF_TAIL;
642
$block
643
</body>
644
 
645
</html>
646
END_OF_TAIL
647
 
648
    # close the html file
649
    close(HTML);
650
 
651
    warn "Finished\n" if $Verbose;
652
}
653
 
654
##############################################################################
655
 
656
sub usage {
657
    my $podfile = shift;
658
    warn "$0: $podfile: @_\n" if @_;
659
    die <<END_OF_USAGE;
660
Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
661
           --podpath=<name>:...:<name> --podroot=<name>
662
           --libpods=<name>:...:<name> --recurse --verbose --index
663
           --netscape --norecurse --noindex --cachedir=<name>
664
 
665
  --backlink     - set text for "back to top" links (default: none).
666
  --cachedir     - directory for the item and directory cache files.
667
  --css          - stylesheet URL
668
  --flush        - flushes the item and directory caches.
669
  --[no]header   - produce block header/footer (default is no headers).
670
  --help         - prints this message.
671
  --hiddendirs   - search hidden directories in podpath
672
  --htmldir      - directory for resulting HTML files.
673
  --htmlroot     - http-server base directory from which all relative paths
674
                   in podpath stem (default is /).
675
  --[no]index    - generate an index at the top of the resulting html
676
                   (default behaviour).
677
  --infile       - filename for the pod to convert (input taken from stdin
678
                   by default).
679
  --libpods      - colon-separated list of pages to search for =item pod
680
                   directives in as targets of C<> and implicit links (empty
681
                   by default).  note, these are not filenames, but rather
682
                   page names like those that appear in L<> links.
683
  --outfile      - filename for the resulting html file (output sent to
684
                   stdout by default).
685
  --podpath      - colon-separated list of directories containing library
686
                   pods (empty by default).
687
  --podroot      - filesystem base directory from which all relative paths
688
                   in podpath stem (default is .).
689
  --[no]quiet    - suppress some benign warning messages (default is off).
690
  --[no]recurse  - recurse on those subdirectories listed in podpath
691
                   (default behaviour).
692
  --title        - title that will appear in resulting html file.
693
  --[no]verbose  - self-explanatory (off by default).
694
  --[no]netscape - deprecated, has no effect. for backwards compatibility only.
695
 
696
END_OF_USAGE
697
 
698
}
699
 
700
sub parse_command_line {
701
    my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help,
702
        $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,
703
        $opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,
704
        $opt_recurse,$opt_title,$opt_verbose,$opt_hiddendirs);
705
 
706
    unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
707
    my $result = GetOptions(
708
                            'backlink=s' => \$opt_backlink,
709
                            'cachedir=s' => \$opt_cachedir,
710
                            'css=s'      => \$opt_css,
711
                            'flush'      => \$opt_flush,
712
                            'header!'    => \$opt_header,
713
                            'help'       => \$opt_help,
714
                            'hiddendirs!'=> \$opt_hiddendirs,
715
                            'htmldir=s'  => \$opt_htmldir,
716
                            'htmlroot=s' => \$opt_htmlroot,
717
                            'index!'     => \$opt_index,
718
                            'infile=s'   => \$opt_infile,
719
                            'libpods=s'  => \$opt_libpods,
720
                            'netscape!'  => \$opt_netscape,
721
                            'outfile=s'  => \$opt_outfile,
722
                            'podpath=s'  => \$opt_podpath,
723
                            'podroot=s'  => \$opt_podroot,
724
                            'quiet!'     => \$opt_quiet,
725
                            'recurse!'   => \$opt_recurse,
726
                            'title=s'    => \$opt_title,
727
                            'verbose!'   => \$opt_verbose,
728
                           );
729
    usage("-", "invalid parameters") if not $result;
730
 
731
    usage("-") if defined $opt_help;    # see if the user asked for help
732
    $opt_help = "";                     # just to make -w shut-up.
733
 
734
    @Podpath  = map { s/\|/:/g; $_ } split(":", $opt_podpath) if defined $opt_podpath;
735
    @Libpods  = split(":", $opt_libpods) if defined $opt_libpods;
736
 
737
    $Backlink = $opt_backlink if defined $opt_backlink;
738
    $Cachedir = $opt_cachedir if defined $opt_cachedir;
739
    $Css      = $opt_css      if defined $opt_css;
740
    $Header   = $opt_header   if defined $opt_header;
741
    $Htmldir  = $opt_htmldir  if defined $opt_htmldir;
742
    $Htmlroot = $opt_htmlroot if defined $opt_htmlroot;
743
    $Doindex  = $opt_index    if defined $opt_index;
744
    $Podfile  = $opt_infile   if defined $opt_infile;
745
    $HiddenDirs = $opt_hiddendirs if defined $opt_hiddendirs;
746
    $Htmlfile = $opt_outfile  if defined $opt_outfile;
747
    $Podroot  = $opt_podroot  if defined $opt_podroot;
748
    $Quiet    = $opt_quiet    if defined $opt_quiet;
749
    $Recurse  = $opt_recurse  if defined $opt_recurse;
750
    $Title    = $opt_title    if defined $opt_title;
751
    $Verbose  = $opt_verbose  if defined $opt_verbose;
752
 
753
    warn "Flushing item and directory caches\n"
754
        if $opt_verbose && defined $opt_flush;
755
    $Dircache = "$Cachedir/pod2htmd.tmp";
756
    $Itemcache = "$Cachedir/pod2htmi.tmp";
757
    $Toccache = "$Cachedir/pod2htmt.tmp";
758
    if (defined $opt_flush) {
759
        1 while unlink($Dircache, $Itemcache, $Toccache);
760
    }
761
}
762
 
763
 
764
my $Saved_Cache_Key;
765
 
766
sub get_cache {
767
    my($dircache, $itemcache, $toccache, $podpath, $podroot, $recurse) = @_;
768
    my @cache_key_args = @_;
769
 
770
    # A first-level cache:
771
    # Don't bother reading the cache files if they still apply
772
    # and haven't changed since we last read them.
773
    my $this_cache_key = cache_key(@cache_key_args);
774
 
775
    return if $Saved_Cache_Key and $this_cache_key eq $Saved_Cache_Key;
776
 
777
    # load the cache of %Pages and %Items if possible.  $tests will be
778
    # non-zero if successful.
779
    my $tests = 0;
780
    if (-f $dircache && -f $itemcache && -f $toccache) {
781
        warn "scanning for item cache\n" if $Verbose;
782
        $tests = load_cache($dircache, $itemcache, $toccache, $podpath, $podroot);
783
    }
784
 
785
    # if we didn't succeed in loading the cache then we must (re)build
786
    #  %Pages and %Items.
787
    if (!$tests) {
788
        warn "scanning directories in pod-path\n" if $Verbose;
789
        scan_podpath($podroot, $recurse, 0);
790
    }
791
    $Saved_Cache_Key = cache_key(@cache_key_args);
792
}
793
 
794
sub cache_key {
795
    my($dircache, $itemcache, $toccache, $podpath, $podroot, $recurse) = @_;
796
    return join('!', $dircache, $itemcache, $toccache, $recurse,
797
        @$podpath, $podroot, stat($dircache), stat($itemcache), stat($toccache));
798
}
799
 
800
#
801
# load_cache - tries to find if the caches stored in $dircache and $itemcache
802
#  are valid caches of %Pages and %Items.  if they are valid then it loads
803
#  them and returns a non-zero value.
804
#
805
sub load_cache {
806
    my($dircache, $itemcache, $toccache, $podpath, $podroot) = @_;
807
    my($tests);
808
    local $_;
809
 
810
    $tests = 0;
811
 
812
    open(CACHE, "<$itemcache") ||
813
        die "$0: error opening $itemcache for reading: $!\n";
814
    $/ = "\n";
815
 
816
    # is it the same podpath?
817
    $_ = <CACHE>;
818
    chomp($_);
819
    $tests++ if (join(":", @$podpath) eq $_);
820
 
821
    # is it the same podroot?
822
    $_ = <CACHE>;
823
    chomp($_);
824
    $tests++ if ($podroot eq $_);
825
 
826
    # load the cache if its good
827
    if ($tests != 2) {
828
        close(CACHE);
829
        return 0;
830
    }
831
 
832
    warn "loading item cache\n" if $Verbose;
833
    while (<CACHE>) {
834
        /(.*?) (.*)$/;
835
        $Items{$1} = $2;
836
    }
837
    close(CACHE);
838
 
839
    warn "scanning for directory cache\n" if $Verbose;
840
    open(CACHE, "<$dircache") ||
841
        die "$0: error opening $dircache for reading: $!\n";
842
    $/ = "\n";
843
    $tests = 0;
844
 
845
    # is it the same podpath?
846
    $_ = <CACHE>;
847
    chomp($_);
848
    $tests++ if (join(":", @$podpath) eq $_);
849
 
850
    # is it the same podroot?
851
    $_ = <CACHE>;
852
    chomp($_);
853
    $tests++ if ($podroot eq $_);
854
 
855
    # load the cache if its good
856
    if ($tests != 2) {
857
        close(CACHE);
858
        return 0;
859
    }
860
 
861
    warn "loading directory cache\n" if $Verbose;
862
    while (<CACHE>) {
863
        /(.*?) (.*)$/;
864
        $Pages{$1} = $2;
865
    }
866
 
867
    close(CACHE);
868
 
869
    #
870
    #   Load Toc Cache
871
    #
872
    open(CACHE, "<$toccache") ||
873
        die "$0: error opening $toccache for reading: $!\n";
874
    $/ = "\n";
875
    $tests = 0;
876
 
877
    # is it the same podpath?
878
    $_ = <CACHE>;
879
    chomp($_);
880
    $tests++ if (join(":", @$podpath) eq $_);
881
 
882
    # is it the same podroot?
883
    $_ = <CACHE>;
884
    chomp($_);
885
    $tests++ if ($podroot eq $_);
886
 
887
    # load the cache if its good
888
    if ($tests != 2) {
889
        close(CACHE);
890
        return 0;
891
    }
892
    warn "loading toc cache\n" if $Verbose;
893
    while (<CACHE>) {
894
        /(.*?) (.*)$/;
895
        $Toc{$1} = $2;
896
    }
897
    close(CACHE);
898
 
899
    return 1;
900
}
901
 
902
#
903
# scan_podpath - scans the directories specified in @podpath for directories,
904
#  .pod files, and .pm files.  it also scans the pod files specified in
905
#  @Libpods for =item directives.
906
#
907
sub scan_podpath {
908
    my($podroot, $recurse, $append) = @_;
909
    my($pwd, $dir);
910
    my($libpod, $dirname, $pod, @files, @poddata);
911
    unless($append) {
912
        %Items = ();
913
        %Pages = ();
914
        %Toc = ();
915
    }
916
 
917
    # scan each directory listed in @Podpath
918
    $pwd = getcwd();
919
    chdir($podroot)
920
        || die "$0: error changing to directory $podroot: $!\n";
921
    foreach $dir (@Podpath) {
922
        scan_dir($dir, $recurse);
923
    }
924
 
925
    # scan the pods listed in @Libpods for =item directives
926
    foreach $libpod (@Libpods) {
927
        # if the page isn't defined then we won't know where to find it
928
        # on the system.
929
        next unless defined $Pages{$libpod} && $Pages{$libpod};
930
 
931
        # if there is a directory then use the .pod and .pm files within it.
932
        # NOTE: Only finds the first so-named directory in the tree.
933
#       if ($Pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
934
        if ($Pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)(?<!\.pl)):/) {
935
            #  find all the .pod and .pm files within the directory
936
            $dirname = $1;
937
            opendir(DIR, $dirname) ||
938
                die "$0: scan_podpath:error opening directory $dirname: $!\n";
939
            @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
940
            closedir(DIR);
941
 
942
            # scan each .pod and .pm file for =item directives
943
            foreach $pod (@files) {
944
                open(POD, "<$dirname/$pod") ||
945
                    die "$0: error opening $dirname/$pod for input: $!\n";
946
                @poddata = <POD>;
947
                close(POD);
948
                clean_data( \@poddata );
949
 
950
                scan_items( \%Items, "$dirname/$pod", @poddata);
951
            }
952
 
953
            # use the names of files as =item directives too.
954
### Don't think this should be done this way - confuses issues.(WL)
955
###         foreach $pod (@files) {
956
###             $pod =~ /^(.*)(\.pod|\.pm)$/;
957
###             $Items{$1} = "$dirname/$1.html" if $1;
958
###         }
959
        } elsif ($Pages{$libpod} =~ /([^:]*\.pod):/ ||
960
                 $Pages{$libpod} =~ /([^:]*\.p[ml]):/) {
961
            # scan the .pod or .pm file for =item directives
962
            $pod = $1;
963
            open(POD, "<$pod") ||
964
                die "$0: error opening $pod for input: $!\n";
965
            @poddata = <POD>;
966
            close(POD);
967
            clean_data( \@poddata );
968
 
969
            scan_items( \%Items, "$pod", @poddata);
970
        } else {
971
            warn "$0: shouldn't be here (line ".__LINE__."\n" unless $Quiet;
972
        }
973
    }
974
    @poddata = ();      # clean-up a bit
975
 
976
    #
977
    #   Scan all POD for TOC metadata
978
    #
979
    foreach my $libpod ( keys %Pages )
980
    {
981
        if ($Pages{$libpod} =~ /([^:]*\.pod):/ ||
982
                 $Pages{$libpod} =~ /([^:]*\.p[ml]):/) {
983
            # scan the .pod or .pm file for =for directives
984
            $pod = $1;
985
            open(POD, "<$pod") ||
986
                die "$0: error opening $pod for input: $!\n";
987
            @poddata = <POD>;
988
            close(POD);
989
            clean_data( \@poddata );
990
 
991
            scan_toc( \%Toc, "$pod", @poddata);
992
        }
993
    }
994
    @poddata = ();      # clean-up a bit
995
 
996
 
997
    chdir($pwd)
998
        || die "$0: error changing to directory $pwd: $!\n";
999
 
1000
    # cache the item list for later use
1001
    warn "caching items for later use\n" if $Verbose;
1002
    open(CACHE, ">$Itemcache") ||
1003
        die "$0: error open $Itemcache for writing: $!\n";
1004
 
1005
    print CACHE join(":", @Podpath) . "\n$podroot\n";
1006
    foreach my $key (keys %Items) {
1007
        print CACHE "$key $Items{$key}\n";
1008
    }
1009
 
1010
    close(CACHE);
1011
 
1012
    # cache the directory list for later use
1013
    warn "caching directories for later use\n" if $Verbose;
1014
    open(CACHE, ">$Dircache") ||
1015
        die "$0: error open $Dircache for writing: $!\n";
1016
 
1017
    print CACHE join(":", @Podpath) . "\n$podroot\n";
1018
    foreach my $key (keys %Pages) {
1019
        print CACHE "$key $Pages{$key}\n";
1020
    }
1021
    close(CACHE);
1022
 
1023
    #
1024
    # cache the Toc Data for later use
1025
    #   Save Toc too
1026
    #
1027
    save_toc($podroot);
1028
}
1029
 
1030
sub save_toc
1031
{
1032
    my ($podroot) = @_;
1033
    warn "caching toc for later use\n" if $Verbose;
1034
    open(CACHE, ">$Toccache") ||
1035
        die "$0: error open $Toccache for writing: $!\n";
1036
 
1037
    print CACHE join(":", @Podpath) . "\n$podroot\n";
1038
    foreach my $key (keys %Toc) {
1039
        print CACHE "$key $Toc{$key}\n";
1040
    }
1041
    close(CACHE);
1042
}
1043
 
1044
#
1045
# scan_dir - scans the directory specified in $dir for subdirectories, .pod
1046
#  files, and .pm files.  notes those that it finds.  this information will
1047
#  be used later in order to figure out where the pages specified in L<>
1048
#  links are on the filesystem.
1049
#
1050
sub scan_dir {
1051
    my($dir, $recurse) = @_;
1052
    my($t, @subdirs, @pods, $pod, $dirname, @dirs);
1053
    local $_;
1054
 
1055
    @subdirs = ();
1056
    @pods = ();
1057
 
1058
    opendir(DIR, $dir) ||
1059
        die "$0: scan_dir:error opening directory $dir: $!\n";
1060
    while (defined($_ = readdir(DIR))) {
1061
        if (-d "$dir/$_" && $_ ne "." && $_ ne ".."
1062
            && ($HiddenDirs || !/^\./)
1063
        ) {         # directory
1064
            $Pages{$_}  = "" unless defined $Pages{$_};
1065
            $Pages{$_} .= "$dir/$_:";
1066
            push(@subdirs, $_);
1067
        } elsif (/\.pod\z/) {                               # .pod
1068
            s/\.pod\z//;
1069
            $Pages{$_}  = "" unless defined $Pages{$_};
1070
            $Pages{$_} .= "$dir/$_.pod:";
1071
            push(@pods, "$dir/$_.pod");
1072
        } elsif (/\.html\z/) {                              # .html
1073
            s/\.html\z//;
1074
            $Pages{$_}  = "" unless defined $Pages{$_};
1075
            $Pages{$_} .= "$dir/$_.pod:";
1076
        } elsif (/\.pm\z/) {                                # .pm
1077
            s/\.pm\z//;
1078
            $Pages{$_}  = "" unless defined $Pages{$_};
1079
            $Pages{$_} .= "$dir/$_.pm:";
1080
            push(@pods, "$dir/$_.pm");
1081
        } elsif (/\.pl\z/) {                                # .pl
1082
            s/\.pl\z//;
1083
            $Pages{$_}  = "" unless defined $Pages{$_};
1084
            $Pages{$_} .= "$dir/$_.pl:";
1085
            push(@pods, "$dir/$_.pl");
1086
        } elsif (/\.pod\.txt\z/) {                          # Skip .pod.txt
1087
        } elsif (-T "$dir/$_") {                            # script(?)
1088
            local *F;
1089
            if (open(F, "$dir/$_")) {
1090
                my $line;
1091
                while (defined($line = <F>)) {
1092
                    if ($line =~ /^=(?:pod|head1)/) {
1093
                        $Pages{$_}  = "" unless defined $Pages{$_};
1094
                        $Pages{$_} .= "$dir/$_.pod:";
1095
                        last;
1096
                    }
1097
                }
1098
                close(F);
1099
            }
1100
        }
1101
    }
1102
    closedir(DIR);
1103
 
1104
    # recurse on the subdirectories if necessary
1105
    if ($recurse) {
1106
        foreach my $subdir (@subdirs) {
1107
            scan_dir("$dir/$subdir", $recurse);
1108
        }
1109
    }
1110
}
1111
 
1112
#
1113
# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
1114
#  build an index.
1115
#
1116
sub scan_headings {
1117
    my($sections, @data) = @_;
1118
    my($tag, $which_head, $otitle, $listdepth, $index);
1119
 
1120
    local $Ignore = 0;
1121
 
1122
    $listdepth = 0;
1123
    $index = "";
1124
 
1125
    # scan for =head directives, note their name, and build an index
1126
    #  pointing to each of them.
1127
    foreach my $line (@data) {
1128
      if ($line =~ /^=(head)([1-6])\s+(.*)/) {
1129
        ($tag, $which_head, $otitle) = ($1,$2,$3);
1130
 
1131
        my $title = depod( $otitle );
1132
        my $name = anchorify( $title );
1133
        $$sections{$name} = 1;
1134
        $title = process_text( \$otitle );
1135
 
1136
            while ($which_head != $listdepth) {
1137
                if ($which_head > $listdepth) {
1138
                    $index .= "\n" . ("\t" x $listdepth) . "<ul>\n";
1139
                    $listdepth++;
1140
                } elsif ($which_head < $listdepth) {
1141
                    $listdepth--;
1142
                    $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1143
                }
1144
            }
1145
 
1146
            $index .= "\n" . ("\t" x $listdepth) . "<li>" .
1147
                      "<a href=\"#" . $name . "\">" .
1148
                      $title . "</a></li>";
1149
        }
1150
    }
1151
 
1152
    # finish off the lists
1153
    while ($listdepth--) {
1154
        $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1155
    }
1156
 
1157
    # get rid of bogus lists
1158
    $index =~ s,\t*<ul>\s*</ul>\n,,g;
1159
 
1160
    return $index;
1161
}
1162
 
1163
#
1164
# scan_items - scans the pod specified by $pod for =item directives.  we
1165
#  will use this information later on in resolving C<> links.
1166
#
1167
sub scan_items {
1168
    my( $itemref, $pod, @poddata ) = @_;
1169
    my($i, $item);
1170
    local $_;
1171
 
1172
    $pod =~ s/\.pod\z//;
1173
    $pod =~ s/\.p[lm]\z//;
1174
    $pod .= ".html" if $pod;
1175
 
1176
    foreach $i (0..$#poddata) {
1177
        my $txt = depod( $poddata[$i] );
1178
        $txt =~ s~^\n~~;
1179
        # figure out what kind of item it is.
1180
        # Build string for referencing this item.
1181
        if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
1182
            next unless $1;
1183
            $item = $1;
1184
        } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
1185
            $item = $1;
1186
        } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
1187
            $item = $1;
1188
        } elsif( $txt =~ /\A=head[234]\s+(.*)\Z/s ) { # plain item
1189
            $item = $1;
1190
        } else {
1191
            next;
1192
        }
1193
        my $fid = fragment_id( $item );
1194
        $$itemref{$fid} = "$pod" if $fid;
1195
    }
1196
}
1197
 
1198
sub scan_toc {
1199
    my( $tocref, $pod, @poddata ) = @_;
1200
    my($i, $item);
1201
    local $_;
1202
 
1203
    $pod =~ s/\.pod\z//;
1204
    $pod =~ s/\.p[lm]\z//;
1205
    $pod .= ".html" if $pod;
1206
 
1207
    foreach $i (0..$#poddata) {
1208
        my $txt = depod( $poddata[$i] );
1209
 
1210
        if ($txt =~ /^=for\s+htmltoc\s*(.*)/si) {# =for
1211
            $item = $1;
1212
            $$tocref{$pod} = $1 if $1;
1213
        }
1214
    }
1215
}
1216
 
1217
 
1218
#
1219
# process_head - convert a pod head[1-6] tag and convert it to HTML format.
1220
#
1221
sub process_head {
1222
    my($tag, $heading, $hasindex) = @_;
1223
 
1224
    # figure out the level of the =head
1225
    $tag =~ /head([1-6])/;
1226
    my $level = $1;
1227
 
1228
    if( $Listlevel ){
1229
        warn "$0: $Podfile: unterminated list at =head in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1230
        while( $Listlevel ){
1231
            process_back();
1232
        }
1233
    }
1234
 
1235
    print HTML "<p>\n";
1236
    if( $level == 1 && ! $Top ){
1237
      print HTML "<a href=\"#__index__\"><small>$Backlink</small></a>\n"
1238
        if $hasindex and $Backlink;
1239
      print HTML "</p>\n<hr />\n"
1240
    } else {
1241
      print HTML "</p>\n";
1242
    }
1243
 
1244
    my $name = anchorify( depod( $heading ) );
1245
    my $convert = process_text( \$heading );
1246
    print HTML "<h$level><a name=\"$name\">$convert</a></h$level>\n";
1247
}
1248
 
1249
 
1250
#
1251
# emit_item_tag - print an =item's text
1252
# Note: The global $EmittedItem is used for inhibiting self-references.
1253
#
1254
my $EmittedItem;
1255
 
1256
sub emit_item_tag($$$){
1257
    my( $otext, $text, $compact ) = @_;
1258
    my $item = fragment_id( depod($text) , -generate);
1259
    Carp::confess("Undefined fragment '$text' (".depod($text).") from fragment_id() in emit_item_tag() in $Podfile")
1260
        if !defined $item;
1261
    $EmittedItem = $item;
1262
### print STDERR "emit_item_tag=$item ($text)\n";
1263
 
1264
    print HTML '<strong>';
1265
    if ($Items_Named{$item}++) {
1266
        print HTML process_text( \$otext );
1267
    } else {
1268
        my $name = 'item_' .$item;
1269
        $name = anchorify($name);
1270
        print HTML qq{<a name="$name" class="item">}, process_text( \$otext ), '</a>';
1271
    }
1272
    print HTML "</strong>\n";
1273
    undef( $EmittedItem );
1274
}
1275
 
1276
sub emit_li {
1277
    my( $tag ) = @_;
1278
    if( $Items_Seen[$Listlevel]++ == 0 ){
1279
        push( @Listend, "</$tag>" );
1280
        print HTML "<$tag>\n";
1281
    }
1282
    my $emitted = $tag eq 'dl' ? 'dt' : 'li';
1283
    print HTML "<$emitted>";
1284
    return $emitted;
1285
}
1286
 
1287
#
1288
# process_item - convert a pod item tag and convert it to HTML format.
1289
#
1290
sub process_item {
1291
    my( $otext ) = @_;
1292
    my $need_dd = 0; # set to 1 if we need a <dd></dd> after an item
1293
 
1294
    # lots of documents start a list without doing an =over.  this is
1295
    # bad!  but, the proper thing to do seems to be to just assume
1296
    # they did do an =over.  so warn them once and then continue.
1297
    if( $Listlevel == 0 ){
1298
        warn "$0: $Podfile: unexpected =item directive in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1299
        process_over();
1300
    }
1301
 
1302
    # formatting: insert a paragraph if preceding item has >1 paragraph
1303
    if( $After_Lpar ){
1304
        print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar;
1305
        $After_Lpar = 0;
1306
    }
1307
 
1308
    # remove formatting instructions from the text
1309
    my $text = depod( $otext );
1310
 
1311
    my $emitted; # the tag actually emitted, used for closing
1312
 
1313
    # all the list variants:
1314
    if( $text =~ /\A\*/ ){ # bullet
1315
        $emitted = emit_li( 'ul' );
1316
        if ($text =~ /\A\*\s+(\S.*)\Z/s ) { # with additional text
1317
            my $tag = $1;
1318
            $otext =~ s/\A\*\s+//;
1319
            emit_item_tag( $otext, $tag, 1 );
1320
        }
1321
 
1322
    } elsif( $text =~ /\A\d+/ ){ # numbered list
1323
        $emitted = emit_li( 'ol' );
1324
        if ($text =~ /\A(?>\d+\.?)\s*(\S.*)\Z/s ) { # with additional text
1325
            my $tag = $1;
1326
            $otext =~ s/\A\d+\.?\s*//;
1327
            emit_item_tag( $otext, $tag, 1 );
1328
        }
1329
 
1330
    } else {                    # definition list
1331
        $emitted = emit_li( 'dl' );
1332
        if ($text =~ /\A(.+)\Z/s ){ # should have text
1333
            emit_item_tag( $otext, $text, 1 );
1334
        }
1335
        $need_dd = 1;
1336
    }
1337
    print HTML "\n";
1338
    return $need_dd;
1339
}
1340
 
1341
#
1342
# process_over - process a pod over tag and start a corresponding HTML list.
1343
#
1344
sub process_over {
1345
    # start a new list
1346
    $Listlevel++;
1347
    push( @Items_Seen, 0 );
1348
    $After_Lpar = 0;
1349
}
1350
 
1351
#
1352
# process_back - process a pod back tag and convert it to HTML format.
1353
#
1354
sub process_back {
1355
    my $need_dd = shift;
1356
    if( $Listlevel == 0 ){
1357
        warn "$0: $Podfile: unexpected =back directive in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1358
        return;
1359
    }
1360
 
1361
    # close off the list.  note, I check to see if $Listend[$Listlevel] is
1362
    # defined because an =item directive may have never appeared and thus
1363
    # $Listend[$Listlevel] may have never been initialized.
1364
    $Listlevel--;
1365
    if( defined $Listend[$Listlevel] ){
1366
        print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar;
1367
        print HTML $Listend[$Listlevel];
1368
        print HTML "\n";
1369
        pop( @Listend );
1370
    }
1371
    $After_Lpar = 0;
1372
 
1373
    # clean up item count
1374
    pop( @Items_Seen );
1375
}
1376
 
1377
#
1378
# process_cut - process a pod cut tag, thus start ignoring pod directives.
1379
#
1380
sub process_cut {
1381
    $Ignore = 1;
1382
}
1383
 
1384
#
1385
# process_pod - process a pod tag, thus stop ignoring pod directives
1386
# until we see a corresponding cut.
1387
#
1388
sub process_pod {
1389
    # no need to set $Ignore to 0 cause the main loop did it
1390
}
1391
 
1392
#
1393
# process_for - process a =for pod tag.  if it's for html, spit
1394
# it out verbatim, if illustration, center it, otherwise ignore it.
1395
#
1396
sub process_for {
1397
    my($whom, $text) = @_;
1398
    if ( $whom =~ /^(pod2)?html$/i) {
1399
            print HTML $text;
1400
    } elsif ($whom =~ /^htmlclass$/i) {
1401
        $set_p_class = $text;
1402
 
1403
    } elsif ($whom =~ /^htmltoc$/i) {
1404
            # Not processed here - processed in scan_toc
1405
 
1406
    } elsif ($whom =~ /^illustration$/i) {
1407
        1 while chomp $text;
1408
        for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1409
          $text .= $ext, last if -r "$text$ext";
1410
        }
1411
        print HTML qq{<p align="center"><img src="$text" alt="$text illustration" /></p>};
1412
    }
1413
}
1414
 
1415
#
1416
# process_begin - process a =begin pod tag.  this pushes
1417
# whom we're beginning on the begin stack.  if there's a
1418
# begin stack, we only print if it us.
1419
#
1420
sub process_begin {
1421
    my($whom, $text) = @_;
1422
    $whom = lc($whom);
1423
    push (@Begin_Stack, $whom);
1424
    if ( $whom =~ /^(pod2)?html$/) {
1425
        print HTML $text if $text;
1426
    }
1427
}
1428
 
1429
#
1430
# process_end - process a =end pod tag.  pop the
1431
# begin stack.  die if we're mismatched.
1432
#
1433
sub process_end {
1434
    my($whom, $text) = @_;
1435
    $whom = lc($whom);
1436
    if (!defined $Begin_Stack[-1] or $Begin_Stack[-1] ne $whom ) {
1437
        Carp::confess("Unmatched begin/end at chunk $Paragraph in pod $Podfile\n")
1438
    }
1439
    pop( @Begin_Stack );
1440
}
1441
 
1442
#
1443
# process_pre - indented paragraph, made into <pre></pre>
1444
#
1445
sub process_pre {
1446
    my( $text ) = @_;
1447
    my( $rest );
1448
    return if $Ignore;
1449
 
1450
    $rest = $$text;
1451
 
1452
    # insert spaces in place of tabs
1453
    $rest =~ s#(.+)#
1454
            my $line = $1;
1455
            1 while $line =~ s/(\t+)/' ' x ((length($1) * 8) - $-[0] % 8)/e;
1456
            $line;
1457
        #eg;
1458
 
1459
    my @line = split /\n/, $rest;
1460
    my $comments = !grep !/^\s*(#.*)?$/, @line;
1461
 
1462
    # Try to colorize the block as Perl sample code
1463
    if (($comments || $rest =~ /[;{]/) &&
1464
        eval {require ActiveState::Scineplex})
1465
    {
1466
        my $prefix;
1467
        my $input = $rest; # Don't modify $rest in case the colorizer fails
1468
        if ($input =~ /^( +)/) {
1469
            $prefix = $1;
1470
            s/^$prefix// for @line;
1471
            $input = join("\n", @line, "");
1472
        }
1473
        my $styled = eval { ActiveState::Scineplex::Annotate($input, 'perl', outputFormat => 'html') };
1474
        if ($styled) {
1475
            # If this really looks like code, then we don't want
1476
            # to add hyperlinks to URLs embedded in strings etc.
1477
            if ($prefix) {
1478
                $$text = $prefix . join("\n$prefix", split("\n", $styled)) . "\n";
1479
            }
1480
            else {
1481
                $$text = $styled;
1482
            }
1483
            return;
1484
        }
1485
    }
1486
 
1487
    # convert some special chars to HTML escapes
1488
    $rest = html_escape($rest);
1489
 
1490
    # try and create links for all occurrences of perl.* within
1491
    # the preformatted text.
1492
    $rest =~ s{
1493
                 (\s*)(perl\w+)
1494
              }{
1495
                 if ( defined $Pages{$2} ){     # is a link
1496
                     qq($1<a href="$Htmlroot/$Pages{$2}">$2</a>);
1497
                 } elsif (defined $Pages{dosify($2)}) { # is a link
1498
                     qq($1<a href="$Htmlroot/$Pages{dosify($2)}">$2</a>);
1499
                 } else {
1500
                     "$1$2";
1501
                 }
1502
              }xeg;
1503
     $rest =~ s{
1504
                 (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1505
               }{
1506
                  my $url ;
1507
                  if ( $Htmlfileurl ne '' ){
1508
                     # Here, we take advantage of the knowledge
1509
                     # that $Htmlfileurl ne '' implies $Htmlroot eq ''.
1510
                     # Since $Htmlroot eq '', we need to prepend $Htmldir
1511
                     # on the fron of the link to get the absolute path
1512
                     # of the link's target. We check for a leading '/'
1513
                     # to avoid corrupting links that are #, file:, etc.
1514
                     my $old_url = $3 ;
1515
                     $old_url = "$Htmldir$old_url" if $old_url =~ m{^\/};
1516
                     $url = relativize_url( "$old_url.html", $Htmlfileurl );
1517
                  } else {
1518
                     $url = "$3.html" ;
1519
                  }
1520
                  "$1$url" ;
1521
               }xeg;
1522
 
1523
    # Look for embedded URLs and make them into links.  We don't
1524
    # relativize them since they are best left as the author intended.
1525
 
1526
    my $urls = '(' . join ('|', qw{
1527
                http
1528
                telnet
1529
                mailto
1530
                news
1531
                gopher
1532
                file
1533
                wais
1534
                ftp
1535
            } )
1536
        . ')';
1537
 
1538
    my $ltrs = '\w';
1539
    my $gunk = '/#~:.?+=&%@!\-';
1540
    my $punc = '.:!?\-;';
1541
    my $any  = "${ltrs}${gunk}${punc}";
1542
 
1543
    $rest =~ s{
1544
        \b                      # start at word boundary
1545
        (                       # begin $1  {
1546
            $urls :             # need resource and a colon
1547
            (?!:)               # Ignore File::, among others.
1548
            [$any] +?           # followed by one or more of any valid
1549
                                #   character, but be conservative and
1550
                                #   take only what you need to....
1551
        )                       # end   $1  }
1552
        (?=
1553
            &quot; &gt;         # maybe pre-quoted '<a href="...">'
1554
        |                       # or:
1555
            [$punc]*            # 0 or more punctuation
1556
            (?:                 #   followed
1557
                [^$any]         #   by a non-url char
1558
            |                   #   or
1559
                $               #   end of the string
1560
            )                   #
1561
        |                       # or else
1562
            $                   #   then end of the string
1563
        )
1564
      }{<a href="$1">$1</a>}igox;
1565
 
1566
    # text should be as it is (verbatim)
1567
    $$text = $rest;
1568
}
1569
 
1570
 
1571
#
1572
# pure text processing
1573
#
1574
# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1575
# we don't want this to happen within IS
1576
#
1577
sub pure_text($){
1578
    my $text = shift();
1579
    process_puretext( $text, 1 );
1580
}
1581
 
1582
sub inIS_text($){
1583
    my $text = shift();
1584
    process_puretext( $text, 0 );
1585
}
1586
 
1587
#
1588
# process_puretext - process pure text (without pod-escapes) converting
1589
#  double-quotes and handling implicit C<> links.
1590
#
1591
sub process_puretext {
1592
    my($text, $notinIS) = @_;
1593
 
1594
    ## Guessing at func() or [\$\@%&]*var references in plain text is destined
1595
    ## to produce some strange looking ref's. uncomment to disable:
1596
    ## $notinIS = 0;
1597
 
1598
    my(@words, $lead, $trail);
1599
 
1600
    # keep track of leading and trailing white-space
1601
    $lead  = ($text =~ s/\A(\s+)//s ? $1 : "");
1602
    $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
1603
 
1604
    # split at space/non-space boundaries
1605
    @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
1606
 
1607
    # process each word individually
1608
    foreach my $word (@words) {
1609
        # skip space runs
1610
        next if $word =~ /^\s*$/;
1611
        # see if we can infer a link or a function call
1612
        #
1613
        # NOTE: This is a word based search, it won't automatically
1614
        # mark "substr($var, 1, 2)" because the 1st word would be "substr($var"
1615
        # User has to enclose those with proper C<>
1616
 
1617
        if( $notinIS && $word =~
1618
            m/
1619
                ^([a-z_]{2,})                 # The function name
1620
                \(
1621
                    ([0-9][a-z]*              # Manual page(1) or page(1M)
1622
                    |[^)]*[\$\@\%][^)]+       # ($foo), (1, @foo), (%hash)
1623
                    |                         # ()
1624
                    )
1625
                \)
1626
                ([.,;]?)$                     # a possible punctuation follows
1627
            /xi
1628
        ) {
1629
            # has parenthesis so should have been a C<> ref
1630
            ## try for a pagename (perlXXX(1))?
1631
            my( $func, $args, $rest ) = ( $1, $2, $3 || '' );
1632
            if( $args =~ /^\d+$/ ){
1633
                my $url = page_sect( $word, '' );
1634
                if( defined $url ){
1635
                    $word = qq(<a href="$url" class="man">the $word manpage</a>$rest);
1636
                    next;
1637
                }
1638
            }
1639
            ## try function name for a link, append tt'ed argument list
1640
            $word = emit_C( $func, '', "($args)") . $rest;
1641
 
1642
#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1643
##      } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1644
##          # perl variables, should be a C<> ref
1645
##          $word = emit_C( $word );
1646
 
1647
        } elsif ($word =~ m,^\w+://\w,) {
1648
            # looks like a URL
1649
            # Don't relativize it: leave it as the author intended
1650
            $word = qq(<a href="$word">$word</a>);
1651
        } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1652
            # looks like an e-mail address
1653
            my ($w1, $w2, $w3) = ("", $word, "");
1654
            ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1655
            ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1656
            $word = qq($w1<a href="mailto:$w2">$w2</a>$w3);
1657
        } else {
1658
            $word = html_escape($word) if $word =~ /["&<>]/;
1659
        }
1660
    }
1661
 
1662
    # put everything back together
1663
    return $lead . join( '', @words ) . $trail;
1664
}
1665
 
1666
 
1667
#
1668
# process_text - handles plaintext that appears in the input pod file.
1669
# there may be pod commands embedded within the text so those must be
1670
# converted to html commands.
1671
#
1672
 
1673
sub process_text1($$;$$);
1674
sub pattern ($) { $_[0] ? '\s+'.('>' x ($_[0] + 1)) : '>' }
1675
sub closing ($) { local($_) = shift; (defined && s/\s+\z//) ? length : 0 }
1676
 
1677
sub process_text {
1678
    return if $Ignore;
1679
    my( $tref ) = @_;
1680
    my $res = process_text1( 0, $tref );
1681
    $res =~ s/\s+$//s;
1682
    $$tref = $res;
1683
}
1684
 
1685
sub process_text_rfc_links {
1686
    my $text = shift;
1687
 
1688
    # For every "RFCnnnn" or "RFC nnn", link it to the authoritative
1689
    # ource. Do not use the /i modifier here. Require "RFC" to be written in
1690
    #  in capital letters.
1691
 
1692
    $text =~ s{
1693
        (?<=[^<>[:alpha:]])           # Make sure this is not an URL already
1694
        (RFC\s*([0-9]{1,5}))(?![0-9]) # max 5 digits
1695
    }
1696
    {<a href="http://www.ietf.org/rfc/rfc$2.txt" class="rfc">$1</a>}gx;
1697
 
1698
    $text;
1699
}
1700
 
1701
sub process_text1($$;$$){
1702
    my( $lev, $rstr, $func, $closing ) = @_;
1703
    my $res = '';
1704
 
1705
    unless (defined $func) {
1706
        $func = '';
1707
        $lev++;
1708
    }
1709
 
1710
    if( $func eq 'B' ){
1711
        # B<text> - boldface
1712
        $res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>';
1713
 
1714
    } elsif( $func eq 'C' ){
1715
        # C<code> - can be a ref or <code></code>
1716
        # need to extract text
1717
        my $par = go_ahead( $rstr, 'C', $closing );
1718
 
1719
        ## clean-up of the link target
1720
        my $text = depod( $par );
1721
 
1722
        ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1723
        ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1724
 
1725
        $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1726
 
1727
    } elsif( $func eq 'E' ){
1728
        # E<x> - convert to character
1729
        $$rstr =~ s/^([^>]*)>//;
1730
        my $escape = $1;
1731
        $escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;
1732
        $res = "&$escape;";
1733
 
1734
    } elsif( $func eq 'F' ){
1735
        # F<filename> - italicize
1736
        $res = '<em class="file">' . process_text1( $lev, $rstr ) . '</em>';
1737
 
1738
    } elsif( $func eq 'I' ){
1739
        # I<text> - italicize
1740
        $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1741
 
1742
    } elsif( $func eq 'L' ){
1743
        # L<link> - link
1744
        ## L<text|cross-ref> => produce text, use cross-ref for linking
1745
        ## L<cross-ref> => make text from cross-ref
1746
        ## need to extract text
1747
        my $par = go_ahead( $rstr, 'L', $closing );
1748
 
1749
        # some L<>'s that shouldn't be:
1750
        # a) full-blown URL's are emitted as-is
1751
        if( $par =~ m{^\w+://}s ){
1752
            return make_URL_href( $par );
1753
        }
1754
        # b) C<...> is stripped and treated as C<>
1755
        if( $par =~ /^C<(.*)>$/ ){
1756
            my $text = depod( $1 );
1757
            return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1758
        }
1759
 
1760
        # analyze the contents
1761
        $par =~ s/\n/ /g;   # undo word-wrapped tags
1762
        my $opar = $par;
1763
        my $linktext;
1764
        if( $par =~ s{^([^|]+)\|}{} ){
1765
            $linktext = $1;
1766
        }
1767
 
1768
        # make sure sections start with a /
1769
        $par =~ s{^"}{/"};
1770
 
1771
        my( $page, $section, $ident );
1772
 
1773
        # check for link patterns
1774
        if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){     # name/ident
1775
            # we've got a name/ident (no quotes)
1776
            if (length $2) {
1777
                ( $page, $ident ) = ( $1, $2 );
1778
            } else {
1779
                ( $page, $section ) = ( $1, $2 );
1780
            }
1781
            ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1782
 
1783
        } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1784
            # even though this should be a "section", we go for ident first
1785
            ( $page, $ident ) = ( $1, $2 );
1786
            ### print STDERR "--> L<$par> to page $page, section $section\n";
1787
 
1788
        } elsif( $par =~ /\s/ ){  # this must be a section with missing quotes
1789
            ( $page, $section ) = ( '', $par );
1790
            ### print STDERR "--> L<$par> to void page, section $section\n";
1791
 
1792
        } else {
1793
            ( $page, $section ) = ( $par, '' );
1794
            ### print STDERR "--> L<$par> to page $par, void section\n";
1795
        }
1796
 
1797
        # now, either $section or $ident is defined. the convoluted logic
1798
        # below tries to resolve L<> according to what the user specified.
1799
        # failing this, we try to find the next best thing...
1800
        my( $url, $ltext, $fid );
1801
 
1802
        RESOLVE: {
1803
            if( defined $ident ){
1804
                ## try to resolve $ident as an item
1805
                ( $url, $fid ) = coderef( $page, $ident );
1806
                if( $url ){
1807
                    if( ! defined( $linktext ) ){
1808
                        $linktext = $ident;
1809
                        $linktext .= " in " if $ident && $page;
1810
                        $linktext .= "the $page manpage" if $page;
1811
                    }
1812
                    ###  print STDERR "got coderef url=$url\n";
1813
                    last RESOLVE;
1814
                }
1815
                ## no luck: go for a section (auto-quoting!)
1816
                $section = $ident;
1817
            }
1818
            ## now go for a section
1819
            my $htmlsection = htmlify( $section );
1820
            $url = page_sect( $page, $htmlsection );
1821
            if( $url ){
1822
                if( ! defined( $linktext ) ){
1823
                    $linktext = $section;
1824
                    $linktext .= " in " if $section && $page;
1825
                    $linktext .= "the $page manpage" if $page;
1826
                }
1827
                ### print STDERR "got page/section url=$url\n";
1828
                last RESOLVE;
1829
            }
1830
            ## no luck: go for an ident
1831
            if( $section ){
1832
                $ident = $section;
1833
            } else {
1834
                $ident = $page;
1835
                $page  = undef();
1836
            }
1837
            ( $url, $fid ) = coderef( $page, $ident );
1838
            if( $url ){
1839
                if( ! defined( $linktext ) ){
1840
                    $linktext = $ident;
1841
                    $linktext .= " in " if $ident && $page;
1842
                    $linktext .= "the $page manpage" if $page;
1843
                }
1844
                ### print STDERR "got section=>coderef url=$url\n";
1845
                last RESOLVE;
1846
            }
1847
 
1848
            # warning; show some text.
1849
            $linktext = $opar unless defined $linktext;
1850
            warn "$0: $Podfile: cannot resolve L<$opar> in paragraph $Paragraph.\n" unless $Quiet;
1851
        }
1852
 
1853
        # now we have a URL or just plain code
1854
        $$rstr = $linktext . '>' . $$rstr;
1855
        if( defined( $url ) ){
1856
            $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>';
1857
        } else {
1858
            $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1859
        }
1860
 
1861
    } elsif( $func eq 'S' ){
1862
        # S<text> - non-breaking spaces
1863
        $res = process_text1( $lev, $rstr );
1864
        $res =~ s/ /&nbsp;/g;
1865
 
1866
    } elsif( $func eq 'X' ){
1867
        # X<> - ignore
1868
        warn "$0: $Podfile: invalid X<> in paragraph $Paragraph.\n"
1869
            unless $$rstr =~ s/^[^>]*>// or $Quiet;
1870
    } elsif( $func eq 'Z' ){
1871
        # Z<> - empty
1872
        warn "$0: $Podfile: invalid Z<> in paragraph $Paragraph.\n"
1873
            unless $$rstr =~ s/^>// or $Quiet;
1874
 
1875
    } else {
1876
        my $term = pattern $closing;
1877
        while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
1878
            # all others: either recurse into new function or
1879
            # terminate at closing angle bracket(s)
1880
            my $pt = $1;
1881
            $pt .= $2 if !$3 &&  $lev == 1;
1882
            $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1883
            return $res if !$3 && $lev > 1;
1884
            if( $3 ){
1885
                $res .= process_text1( $lev, $rstr, $3, closing $4 );
1886
            }
1887
        }
1888
        if( $lev == 1 ){
1889
            $res .= pure_text( $$rstr );
1890
        } elsif( ! $Quiet ) {
1891
            my $snippet = substr($$rstr,0,60);
1892
            warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph: '$snippet'.\n" 
1893
 
1894
        }
1895
        $res = process_text_rfc_links($res);
1896
    }
1897
    return $res;
1898
}
1899
 
1900
#
1901
# go_ahead: extract text of an IS (can be nested)
1902
#
1903
sub go_ahead($$$){
1904
    my( $rstr, $func, $closing ) = @_;
1905
    my $res = '';
1906
    my @closing = ($closing);
1907
    while( $$rstr =~
1908
      s/\A(.*?)(([BCEFILSXZ])<(<+\s+)?|@{[pattern $closing[0]]})//s ){
1909
        $res .= $1;
1910
        unless( $3 ){
1911
            shift @closing;
1912
            return $res unless @closing;
1913
        } else {
1914
            unshift @closing, closing $4;
1915
        }
1916
        $res .= $2;
1917
    }
1918
    unless ($Quiet) {
1919
        my $snippet = substr($$rstr,0,60);
1920
        warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph (go_ahead): '$snippet'.\n" 
1921
    }           
1922
    return $res;
1923
}
1924
 
1925
#
1926
# emit_C - output result of C<text>
1927
#    $text is the depod-ed text
1928
#
1929
sub emit_C($;$$){
1930
    my( $text, $nocode, $args ) = @_;
1931
    $args = '' unless defined $args;
1932
    my $res;
1933
    my( $url, $fid ) = coderef( undef(), $text );
1934
 
1935
    # need HTML-safe text
1936
    my $linktext = html_escape( "$text$args" );
1937
 
1938
    if( defined( $url ) &&
1939
        (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1940
        $res = "<a href=\"$url\"><code>$linktext</code></a>";
1941
    } elsif( 0 && $nocode ){
1942
        $res = $linktext;
1943
    } else {
1944
        $res = "<code>$linktext</code>";
1945
    }
1946
    return $res;
1947
}
1948
 
1949
#
1950
# html_escape: make text safe for HTML
1951
#
1952
sub html_escape {
1953
    my $rest = $_[0];
1954
    $rest   =~ s/&/&amp;/g;
1955
    $rest   =~ s/</&lt;/g;
1956
    $rest   =~ s/>/&gt;/g;
1957
    $rest   =~ s/"/&quot;/g;
1958
    # &apos; is only in XHTML, not HTML4.  Be conservative
1959
    #$rest   =~ s/'/&apos;/g;
1960
    return $rest;
1961
}
1962
 
1963
 
1964
#
1965
# dosify - convert filenames to 8.3
1966
#
1967
sub dosify {
1968
    my($str) = @_;
1969
    return lc($str) if $^O eq 'VMS';     # VMS just needs casing
1970
    if ($Is83) {
1971
        $str = lc $str;
1972
        $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1973
        $str =~ s/(\w+)/substr ($1,0,8)/ge;
1974
    }
1975
    return $str;
1976
}
1977
 
1978
#
1979
# page_sect - make a URL from the text of a L<>
1980
#
1981
sub page_sect($$) {
1982
    my( $page, $section ) = @_;
1983
    my( $linktext, $page83, $link);     # work strings
1984
 
1985
    # check if we know that this is a section in this page
1986
    if (!defined $Pages{$page} && defined $Sections{$page}) {
1987
        $section = $page;
1988
        $page = "";
1989
        ### print STDERR "reset page='', section=$section\n";
1990
    }
1991
 
1992
    $page83=dosify($page);
1993
    $page=$page83 if (defined $Pages{$page83});
1994
    if ($page eq "") {
1995
        $link = "#" . anchorify( $section );
1996
    } elsif ( $page =~ /::/ ) {
1997
        $page =~ s,::,/,g;
1998
        # Search page cache for an entry keyed under the html page name,
1999
        # then look to see what directory that page might be in.  NOTE:
2000
        # this will only find one page. A better solution might be to produce
2001
        # an intermediate page that is an index to all such pages.
2002
        my $page_name = $page ;
2003
        $page_name =~ s,^.*/,,s ;
2004
        if ( defined( $Pages{ $page_name } ) &&
2005
             $Pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
2006
           ) {
2007
            $page = $1 ;
2008
        }
2009
        else {
2010
            # NOTE: This branch assumes that all A::B pages are located in
2011
            # $Htmlroot/A/B.html . This is often incorrect, since they are
2012
            # often in $Htmlroot/lib/A/B.html or such like. Perhaps we could
2013
            # analyze the contents of %Pages and figure out where any
2014
            # cousins of A::B are, then assume that.  So, if A::B isn't found,
2015
            # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
2016
            # lib/A/B.pm. This is also limited, but it's an improvement.
2017
            # Maybe a hints file so that the links point to the correct places
2018
            # nonetheless?
2019
 
2020
        }
2021
        $link = "$Htmlroot/$page.html";
2022
        $link .= "#" . anchorify( $section ) if ($section);
2023
    } elsif (!defined $Pages{$page}) {
2024
        $link = "";
2025
    } else {
2026
        $section = anchorify( $section ) if $section ne "";
2027
        ### print STDERR "...section=$section\n";
2028
 
2029
        # if there is a directory by the name of the page, then assume that an
2030
        # appropriate section will exist in the subdirectory
2031
#       if ($section ne "" && $Pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
2032
        if ($section ne "" && $Pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
2033
            $link = "$Htmlroot/$1/$section.html";
2034
            ### print STDERR "...link=$link\n";
2035
 
2036
        # since there is no directory by the name of the page, the section will
2037
        # have to exist within a .html of the same name.  thus, make sure there
2038
        # is a .pod or .pm that might become that .html
2039
        } else {
2040
            $section = "#$section" if $section;
2041
            ### print STDERR "...section=$section\n";
2042
 
2043
            # check if there is a .pod with the page name.
2044
            # for L<Foo>, Foo.(pod|pm) is preferred to A/Foo.(pod|pm)
2045
            if ($Pages{$page} =~ /([^:]*)\.(?:pod|pm):/) {
2046
                $link = "$Htmlroot/$1.html$section";
2047
            } else {
2048
                $link = "";
2049
            }
2050
        }
2051
    }
2052
 
2053
    if ($link) {
2054
        # Here, we take advantage of the knowledge that $Htmlfileurl ne ''
2055
        # implies $Htmlroot eq ''. This means that the link in question
2056
        # needs a prefix of $Htmldir if it begins with '/'. The test for
2057
        # the initial '/' is done to avoid '#'-only links, and to allow
2058
        # for other kinds of links, like file:, ftp:, etc.
2059
        my $url ;
2060
        if (  $Htmlfileurl ne '' ) {
2061
            $link = "$Htmldir$link" if $link =~ m{^/}s;
2062
            $url = relativize_url( $link, $Htmlfileurl );
2063
# print( "  b: [$link,$Htmlfileurl,$url]\n" );
2064
        }
2065
        else {
2066
            $url = $link ;
2067
        }
2068
        return $url;
2069
 
2070
    } else {
2071
        return undef();
2072
    }
2073
}
2074
 
2075
#
2076
# relativize_url - convert an absolute URL to one relative to a base URL.
2077
# Assumes both end in a filename.
2078
#
2079
sub relativize_url {
2080
    my ($dest,$source) = @_ ;
2081
 
2082
    my ($dest_volume,$dest_directory,$dest_file) =
2083
        File::Spec::Unix->splitpath( $dest ) ;
2084
    $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
2085
 
2086
    my ($source_volume,$source_directory,$source_file) =
2087
        File::Spec::Unix->splitpath( $source ) ;
2088
    $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
2089
 
2090
    my $rel_path = '' ;
2091
    if ( $dest ne '' ) {
2092
       $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
2093
    }
2094
 
2095
    if ( $rel_path ne ''                &&
2096
         substr( $rel_path, -1 ) ne '/' &&
2097
         substr( $dest_file, 0, 1 ) ne '#'
2098
        ) {
2099
        $rel_path .= "/$dest_file" ;
2100
    }
2101
    else {
2102
        $rel_path .= "$dest_file" ;
2103
    }
2104
 
2105
    return $rel_path ;
2106
}
2107
 
2108
 
2109
#
2110
# coderef - make URL from the text of a C<>
2111
#
2112
sub coderef($$){
2113
    my( $page, $item ) = @_;
2114
    my( $url );
2115
 
2116
    my $fid = fragment_id( $item );
2117
 
2118
    if( defined( $page ) && $page ne "" ){
2119
        # we have been given a $page...
2120
        $page =~ s{::}{/}g;
2121
 
2122
        Carp::confess("Undefined fragment '$item' from fragment_id() in coderef() in $Podfile")
2123
            if !defined $fid;    
2124
        # Do we take it? Item could be a section!
2125
        my $base = $Items{$fid} || "";
2126
        $base =~ s{[^/]*/}{};
2127
        if( $base ne "$page.html" ){
2128
            ###   print STDERR "coderef( $page, $item ): items{$fid} = $Items{$fid} = $base => discard page!\n";
2129
            $page = undef();
2130
        }
2131
 
2132
    } else {
2133
        # no page - local items precede cached items
2134
        if( defined( $fid ) ){
2135
            if(  exists $Local_Items{$fid} ){
2136
                $page = $Local_Items{$fid};
2137
            } else {
2138
                $page = $Items{$fid};
2139
            }
2140
        }
2141
    }
2142
 
2143
    # if there was a pod file that we found earlier with an appropriate
2144
    # =item directive, then create a link to that page.
2145
    if( defined $page ){
2146
        if( $page ){
2147
            if( exists $Pages{$page} and $Pages{$page} =~ /([^:.]*)\.[^:]*:/){
2148
                $page = $1 . '.html';
2149
            }
2150
            my $link = "$Htmlroot/$page#item_" . anchorify($fid);
2151
 
2152
            # Here, we take advantage of the knowledge that $Htmlfileurl
2153
            # ne '' implies $Htmlroot eq ''.
2154
            if (  $Htmlfileurl ne '' ) {
2155
                $link = "$Htmldir$link" ;
2156
                $url = relativize_url( $link, $Htmlfileurl ) ;
2157
            } else {
2158
                $url = $link ;
2159
            }
2160
        } else {
2161
            $url = "#item_" . anchorify($fid);
2162
        }
2163
 
2164
        confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
2165
    }
2166
    return( $url, $fid );
2167
}
2168
 
2169
 
2170
 
2171
#
2172
# Adapted from Nick Ing-Simmons' PodToHtml package.
2173
sub relative_url {
2174
    my $source_file = shift ;
2175
    my $destination_file = shift;
2176
 
2177
    my $source = URI::file->new_abs($source_file);
2178
    my $uo = URI::file->new($destination_file,$source)->abs;
2179
    return $uo->rel->as_string;
2180
}
2181
 
2182
 
2183
#
2184
# finish_list - finish off any pending HTML lists.  this should be called
2185
# after the entire pod file has been read and converted.
2186
#
2187
sub finish_list {
2188
    while ($Listlevel > 0) {
2189
        print HTML "</dl>\n";
2190
        $Listlevel--;
2191
    }
2192
}
2193
 
2194
#
2195
# htmlify - converts a pod section specification to a suitable section
2196
# specification for HTML. Note that we keep spaces and special characters
2197
# except ", ? (Netscape problem) and the hyphen (writer's problem...).
2198
#
2199
sub htmlify {
2200
    my( $heading) = @_;
2201
    $heading =~ s/(\s+)/ /g;
2202
    $heading =~ s/\s+\Z//;
2203
    $heading =~ s/\A\s+//;
2204
    # The hyphen is a disgrace to the English language.
2205
    # $heading =~ s/[-"?]//g;
2206
    $heading =~ s/["?]//g;
2207
    $heading = lc( $heading );
2208
    return $heading;
2209
}
2210
 
2211
#
2212
# similar to htmlify, but turns non-alphanumerics into underscores
2213
#
2214
sub anchorify {
2215
    my ($anchor) = @_;
2216
    $anchor = htmlify($anchor);
2217
    $anchor =~ s/\W/_/g;
2218
    return $anchor;
2219
}
2220
 
2221
#
2222
# depod - convert text by eliminating all interior sequences
2223
# Note: can be called with copy or modify semantics
2224
#
2225
my %E2c;
2226
$E2c{lt}     = '<';
2227
$E2c{gt}     = '>';
2228
$E2c{sol}    = '/';
2229
$E2c{verbar} = '|';
2230
$E2c{amp}    = '&'; # in Tk's pods
2231
 
2232
sub depod1($;$$);
2233
 
2234
sub depod($){
2235
    my $string;
2236
    if( ref( $_[0] ) ){
2237
        $string =  ${$_[0]};
2238
        ${$_[0]} = depod1( \$string );
2239
    } else {
2240
        $string =  $_[0];
2241
        depod1( \$string );
2242
    }
2243
}
2244
 
2245
sub depod1($;$$){
2246
  my( $rstr, $func, $closing ) = @_;
2247
  my $res = '';
2248
  return $res unless defined $$rstr;
2249
  if( ! defined( $func ) ){
2250
      # skip to next begin of an interior sequence
2251
      while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?//s ){
2252
         # recurse into its text
2253
          $res .= $1 . depod1( $rstr, $2, closing $3);
2254
      }
2255
      $res .= $$rstr;
2256
  } elsif( $func eq 'E' ){
2257
      # E<x> - convert to character
2258
      $$rstr =~ s/^([^>]*)>//;
2259
      $res .= $E2c{$1} || "";
2260
  } elsif( $func eq 'X' ){
2261
      # X<> - ignore
2262
      $$rstr =~ s/^[^>]*>//;
2263
  } elsif( $func eq 'Z' ){
2264
      # Z<> - empty
2265
      $$rstr =~ s/^>//;
2266
  } else {
2267
      # all others: either recurse into new function or
2268
      # terminate at closing angle bracket
2269
      my $term = pattern $closing;
2270
      while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2271
          $res .= $1;
2272
          last unless $3;
2273
          $res .= depod1( $rstr, $3, closing $4 );
2274
      }
2275
      ## If we're here and $2 ne '>': undelimited interior sequence.
2276
      ## Ignored, as this is called without proper indication of where we are.
2277
      ## Rely on process_text to produce diagnostics.
2278
  }
2279
  return $res;
2280
}
2281
 
2282
{
2283
    my %seen;   # static fragment record hash
2284
 
2285
sub fragment_id_readable {
2286
    my $text     = shift;
2287
    my $generate = shift;   # optional flag
2288
 
2289
    my $orig = $text;
2290
 
2291
    # leave the words for the fragment identifier,
2292
    # change everything else to underbars.
2293
    $text =~ s/[^A-Za-z0-9_]+/_/g; # do not use \W to avoid locale dependency.
2294
    $text =~ s/_{2,}/_/g;
2295
    $text =~ s/\A_//;
2296
    $text =~ s/_\Z//;
2297
 
2298
    unless ($text)
2299
    {
2300
        # Nothing left after removing punctuation, so leave it as is
2301
        # E.g. if option is named: "=item -#"
2302
 
2303
        $text = $orig;
2304
    }
2305
 
2306
    if ($generate) {
2307
        if ( exists $seen{$text} ) {
2308
            # This already exists, make it unique
2309
            $seen{$text}++;
2310
            $text = $text . $seen{$text};
2311
        } else {
2312
            $seen{$text} = 1;  # first time seen this fragment
2313
        }
2314
    }
2315
 
2316
    $text;
2317
}}
2318
 
2319
#
2320
# fragment_id - construct a fragment identifier from:
2321
#   a) =item text
2322
#   b) contents of C<...>
2323
#
2324
 
2325
sub fragment_id {
2326
    my $text     = shift;
2327
    my $generate = shift;   # optional flag
2328
 
2329
    $text =~ s/\s+\Z//s;
2330
    if( $text ){
2331
        # a method or function?
2332
        return $1 if $text =~ /(\w+)\s*\(/;
2333
        return $1 if $text =~ /->\s*(\w+)\s*\(?/;
2334
 
2335
        # a variable name?
2336
        return $1 if $text =~ /^([\$\@%*]\S+)/;
2337
 
2338
        # some pattern matching operator?
2339
        return $1 if $text =~ m|^(\w+/).*/\w*$|;
2340
 
2341
        # fancy stuff... like "do { }"
2342
        return $1 if $text =~ m|^(\w+)\s*{.*}$|;
2343
 
2344
        # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
2345
        # and some funnies with ... Module ...
2346
        return $1 if $text =~ m{^([a-z\d_]+)(\s+[A-Z,/& ][A-Z\d,/& ]*)?$};
2347
        return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
2348
 
2349
        return fragment_id_readable($text, $generate);
2350
    } else {
2351
        return;
2352
    }
2353
}
2354
 
2355
#
2356
# make_URL_href - generate HTML href from URL
2357
# Special treatment for CGI queries.
2358
#
2359
sub make_URL_href($){
2360
    my( $url ) = @_;
2361
    if( $url !~
2362
        s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ){
2363
        $url = "<a href=\"$url\">$url</a>";
2364
    }
2365
    return $url;
2366
}
2367
 
2368
1;