Subversion Repositories DevTools

Rev

Rev 4386 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
361 dpurdie 1
#
2
#   Pinched from ActiveState v5.8.8 , Binary build 822 [280952]
3
#   For the same reason as the other POD:: bits
4
#       Fix bugs
5
#       Ensure that they are fixed independently of the Perl Pelease
6
#
7
#   In this module
8
#       * Remove AvtiveState mailto:
9
#
10
 
11
package Pod::Html;
12
use strict;
13
require Exporter;
14
 
15
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
5542 dpurdie 16
$VERSION = 1.0901;
361 dpurdie 17
@ISA = qw(Exporter);
18
@EXPORT = qw(pod2html htmlify);
19
@EXPORT_OK = qw(anchorify);
20
 
21
use Carp;
22
use Config;
23
use Cwd;
24
use File::Spec;
25
use File::Spec::Unix;
26
use Getopt::Long;
27
 
28
use locale;     # make \w work right in non-ASCII lands
29
 
30
=head1 NAME
31
 
32
Pod::Html - module to convert pod files to HTML
33
 
34
=head1 SYNOPSIS
35
 
36
    use Pod::Html;
37
    pod2html([options]);
38
 
39
=head1 DESCRIPTION
40
 
41
Converts files from pod format (see L<perlpod>) to HTML format.  It
42
can automatically generate indexes and cross-references, and it keeps
43
a cache of things it knows how to cross-reference.
44
 
45
=head1 FUNCTIONS
46
 
47
=head2 pod2html
48
 
49
    pod2html("pod2html",
50
             "--podpath=lib:ext:pod:vms",
51
             "--podroot=/usr/src/perl",
52
             "--htmlroot=/perl/nmanual",
53
             "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
54
             "--recurse",
55
             "--infile=foo.pod",
56
             "--outfile=/perl/nmanual/foo.html");
57
 
58
pod2html takes the following arguments:
59
 
60
=over 4
61
 
62
=item backlink
63
 
64
    --backlink="Back to Top"
65
 
66
Adds "Back to Top" links in front of every C<head1> heading (except for
67
the first).  By default, no backlinks are generated.
68
 
69
=item cachedir
70
 
71
    --cachedir=name
72
 
73
Creates the item and directory caches in the given directory.
74
 
75
=item css
76
 
77
    --css=stylesheet
78
 
79
Specify the URL of a cascading style sheet.  Also disables all HTML/CSS
80
C<style> attributes that are output by default (to avoid conflicts).
81
 
82
=item flush
83
 
84
    --flush
85
 
86
Flushes the item and directory caches.
87
 
88
=item header
89
 
90
    --header
91
    --noheader
92
 
93
Creates header and footer blocks containing the text of the C<NAME>
94
section.  By default, no headers are generated.
95
 
96
=item help
97
 
98
    --help
99
 
100
Displays the usage message.
101
 
102
=item hiddendirs
103
 
104
    --hiddendirs
105
    --nohiddendirs
106
 
107
Include hidden directories in the search for POD's in podpath if recurse
108
is set.
109
The default is not to traverse any directory whose name begins with C<.>.
110
See L</"podpath"> and L</"recurse">.
111
 
112
[This option is for backward compatibility only.
113
It's hard to imagine that one would usefully create a module with a
114
name component beginning with C<.>.]
115
 
116
=item htmldir
117
 
118
    --htmldir=name
119
 
120
Sets the directory in which the resulting HTML file is placed.  This
121
is used to generate relative links to other files. Not passing this
122
causes all links to be absolute, since this is the value that tells
123
Pod::Html the root of the documentation tree.
124
 
125
=item htmlroot
126
 
127
    --htmlroot=name
128
 
129
Sets the base URL for the HTML files.  When cross-references are made,
130
the HTML root is prepended to the URL.
131
 
132
=item index
133
 
134
    --index
135
    --noindex
136
 
137
Generate an index at the top of the HTML file.  This is the default
138
behaviour.
139
 
140
=item infile
141
 
142
    --infile=name
143
 
144
Specify the pod file to convert.  Input is taken from STDIN if no
145
infile is specified.
146
 
147
=item libpods
148
 
149
    --libpods=name:...:name
150
 
151
List of page names (eg, "perlfunc") which contain linkable C<=item>s.
152
 
153
=item netscape
154
 
155
    --netscape
156
    --nonetscape
157
 
158
B<Deprecated>, has no effect. For backwards compatibility only.
159
 
160
=item outfile
161
 
162
    --outfile=name
163
 
164
Specify the HTML file to create.  Output goes to STDOUT if no outfile
165
is specified.
166
 
167
=item podpath
168
 
169
    --podpath=name:...:name
170
 
171
Specify which subdirectories of the podroot contain pod files whose
172
HTML converted forms can be linked to in cross references.
173
 
174
=item podroot
175
 
176
    --podroot=name
177
 
178
Specify the base directory for finding library pods.
179
 
180
=item quiet
181
 
182
    --quiet
183
    --noquiet
184
 
185
Don't display I<mostly harmless> warning messages.  These messages
186
will be displayed by default.  But this is not the same as C<verbose>
187
mode.
188
 
189
=item recurse
190
 
191
    --recurse
192
    --norecurse
193
 
194
Recurse into subdirectories specified in podpath (default behaviour).
195
 
196
=item title
197
 
198
    --title=title
199
 
200
Specify the title of the resulting HTML file.
201
 
202
=item verbose
203
 
204
    --verbose
205
    --noverbose
206
 
207
Display progress messages.  By default, they won't be displayed.
208
 
209
=back
210
 
211
=head2 htmlify
212
 
213
    htmlify($heading);
214
 
215
Converts a pod section specification to a suitable section specification
216
for HTML. Note that we keep spaces and special characters except 
217
C<", ?> (Netscape problem) and the hyphen (writer's problem...).
218
 
219
=head2 anchorify
220
 
221
    anchorify(@heading);
222
 
223
Similar to C<htmlify()>, but turns non-alphanumerics into underscores.  Note
224
that C<anchorify()> is not exported by default.
225
 
226
=head1 ENVIRONMENT
227
 
228
Uses C<$Config{pod2html}> to setup default options.
229
 
230
=head1 AUTHOR
231
 
232
Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
233
 
234
=head1 SEE ALSO
235
 
236
L<perlpod>
237
 
238
=head1 COPYRIGHT
239
 
240
This program is distributed under the Artistic License.
241
 
242
=cut
243
 
244
my($Cachedir);
245
my($Dircache, $Itemcache, $Toccache);
246
my @Begin_Stack;
247
my @Libpods;
248
my($Htmlroot, $Htmldir, $Htmlfile, $Htmlfileurl);
249
my($Podfile, @Podpath, $Podroot);
250
my $Css;
251
 
252
my $Recurse;
253
my $Quiet;
254
my $HiddenDirs;
255
my $Verbose;
256
my $Doindex;
257
 
258
my $Backlink;
5542 dpurdie 259
my($Listlevel, @Listtype);
260
my $ListNewTerm;
361 dpurdie 261
use vars qw($Ignore);  # need to localize it later.
262
 
263
my(%Items_Named, @Items_Seen);
264
my($Title, $Header);
265
 
266
my $Top;
267
my $Paragraph;
268
 
269
my %Sections;
270
 
271
# Caches
272
my %Pages = ();                 # associative array used to find the location
273
                                #   of pages referenced by L<> links.
274
my %Items = ();                 # associative array used to find the location
275
                                #   of =item directives referenced by C<> links
276
 
277
my %Toc = ();                   # Hash of Table of content metadata
278
 
279
my %Local_Items;
280
my $Is83;
281
 
282
my $Curdir = File::Spec->curdir;
283
my $set_p_class;
284
 
285
init_globals();
286
 
287
sub init_globals {
288
    $Cachedir = ".";            # The directory to which item and directory
289
                                # caches will be written.
290
 
291
    $Dircache = "pod2htmd.tmp";
292
    $Itemcache = "pod2htmi.tmp";
293
    $Toccache = "pod2htmlt.tmp";
294
 
295
    @Begin_Stack = ();          # begin/end stack
296
 
297
    @Libpods = ();              # files to search for links from C<> directives
298
    $Htmlroot = "/";            # http-server base directory from which all
299
                                #   relative paths in $podpath stem.
300
    $Htmldir = "";              # The directory to which the html pages
301
                                # will (eventually) be written.
302
    $Htmlfile = "";             # write to stdout by default
303
    $Htmlfileurl = "" ;         # The url that other files would use to
304
                                # refer to this file.  This is only used
305
                                # to make relative urls that point to
306
                                # other files.
307
 
308
    $Podfile = "";              # read from stdin by default
309
    @Podpath = ();              # list of directories containing library pods.
310
    $Podroot = $Curdir;         # filesystem base directory from which all
311
                                #   relative paths in $podpath stem.
312
    $Css = '';                  # Cascading style sheet
313
    $Recurse = 1;               # recurse on subdirectories in $podpath.
314
    $Quiet = 0;                 # not quiet by default
315
    $Verbose = 0;               # not verbose by default
316
    $Doindex = 1;               # non-zero if we should generate an index
317
    $Backlink = '';             # text for "back to top" links
318
    $Listlevel = 0;             # current list depth
5542 dpurdie 319
    @Listtype = ();     # list types for open lists
320
    $ListNewTerm = 0;       # indicates new term in definition list; used
321
                    # to correctly open/close <dd> tags
361 dpurdie 322
    $Ignore = 1;                # whether or not to format text.  we don't
323
                                #   format text until we hit our first pod
324
                                #   directive.
325
 
326
    @Items_Seen = ();           # for multiples of the same item in perlfunc
327
    %Items_Named = ();
328
    $Header = 0;                # produce block header/footer
329
    $Title = '';                # title to give the pod(s)
330
    $Top = 1;                   # true if we are at the top of the doc.  used
331
                                #   to prevent the first <hr /> directive.
332
    $Paragraph = '';            # which paragraph we're processing (used
333
                                #   for error messages)
334
    %Sections = ();             # sections within this page
335
 
336
    %Local_Items = ();
337
    $Is83 = $^O eq 'dos';       # Is it an 8.3 filesystem?
338
#    $set_p_class = '';
339
}
340
 
341
#
342
# clean_data: global clean-up of pod data
343
#
344
sub clean_data($){
345
    my( $dataref ) = @_;
346
    for my $i ( 0..$#{$dataref} ) {
347
 
348
        ${$dataref}[$i] =~ s/\s+\Z//;
349
        ${$dataref}[$i] =~ s/^\n+(.+)/$1/;
350
 
351
        # have a look for all-space lines
352
      if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){
353
            my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
354
            splice( @$dataref, $i, 1, @chunks );
355
        }
356
    }
357
}
358
 
359
 
360
sub pod2html {
361
    local(@ARGV) = @_;
362
    local($/);
363
    local $_;
364
 
365
    init_globals();
366
 
367
    $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
368
 
369
    # cache of %Pages and %Items from last time we ran pod2html
370
 
371
    #undef $opt_help if defined $opt_help;
372
 
373
    # parse the command-line parameters
374
    parse_command_line();
375
 
376
    # escape the backlink argument (same goes for title but is done later...)
377
    $Backlink = html_escape($Backlink) if defined $Backlink;
378
 
379
    # set some variables to their default values if necessary
380
    local *POD;
381
    unless (@ARGV && $ARGV[0]) {
382
        $Podfile  = "-" unless $Podfile;        # stdin
383
        open(POD, "<$Podfile")
384
                || die "$0: cannot open $Podfile file for input: $!\n";
385
    } else {
386
        $Podfile = $ARGV[0];  # XXX: might be more filenames
387
        *POD = *ARGV;
388
    }
389
    $Htmlfile = "-" unless $Htmlfile;   # stdout
390
    $Htmlroot = "" if $Htmlroot eq "/"; # so we don't get a //
391
    $Htmldir =~ s#/\z## ;               # so we don't get a //
392
    if (  $Htmlroot eq ''
393
       && defined( $Htmldir )
394
       && $Htmldir ne ''
395
       && substr( $Htmlfile, 0, length( $Htmldir ) ) eq $Htmldir
396
       )
397
    {
398
        # Set the 'base' url for this file, so that we can use it
399
        # as the location from which to calculate relative links
400
        # to other files. If this is '', then absolute links will
401
        # be used throughout.
402
        $Htmlfileurl= "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1);
403
    }
404
 
405
    # read the pod a paragraph at a time
406
    warn "Scanning for sections in input file(s)\n" if $Verbose;
407
    $/ = "";
408
    my @poddata  = <POD>;
409
    close(POD);
410
 
411
    # be eol agnostic
412
    for (@poddata) {
413
        if (/\r/) {
414
            if (/\r\n/) {
415
                @poddata = map { s/\r\n/\n/g;
416
                                 /\n\n/ ?
417
                                     map { "$_\n\n" } split /\n\n/ :
418
                                     $_ } @poddata;
419
            } else {
420
                @poddata = map { s/\r/\n/g;
421
                                 /\n\n/ ?
422
                                     map { "$_\n\n" } split /\n\n/ :
423
                                     $_ } @poddata;
424
            }
425
            last;
426
        }
427
    }
428
    clean_data( \@poddata );
429
 
430
 
431
    # scan the pod for =head[1-6] directives and build an index
432
    my $index = scan_headings(\%Sections, @poddata);
433
 
434
    unless($index) {
435
        warn "No headings in $Podfile\n" if $Verbose;
436
    }
437
 
438
    # open the output file
439
    open(HTML, ">$Htmlfile")
440
            || die "$0: cannot open $Htmlfile file for output: $!\n";
441
 
442
    # put a title in the HTML file if one wasn't specified
443
    if ($Title eq '') {
444
        TITLE_SEARCH: {
445
            for (my $i = 0; $i < @poddata; $i++) {
446
                if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
447
                    for my $para ( @poddata[$i, $i+1] ) {
448
                        last TITLE_SEARCH
449
                            if ($Title) = $para =~ /(\S+\s+-+.*\S)/s;
450
                    }
451
                }
452
 
453
            }
454
        }
455
    }
456
    if (!$Title and $Podfile =~ /\.pod\z/) {
457
        # probably a split pod so take first =head[12] as title
458
        for (my $i = 0; $i < @poddata; $i++) {
459
            last if ($Title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
460
        }
461
        warn "adopted '$Title' as title for $Podfile\n"
462
            if $Verbose and $Title;
463
    }
464
    if ($Title) {
465
        $Title =~ s/\s*\(.*\)//;
466
    } else {
467
        warn "$0: no title for $Podfile.\n" unless $Quiet;
468
        $Podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
469
        $Title = ($Podfile eq "-" ? 'No Title' : $1);
470
        warn "using $Title" if $Verbose;
471
    }
472
    $Title = html_escape($Title);
473
 
474
    my $csslink = '';
475
    my $bodystyle = ' style="background-color: white"';
476
    my $tdstyle = ' style="background-color: #cccccc"';
477
 
478
    if ($Css) {
479
      $csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />);
480
      $csslink =~ s,\\,/,g;
481
      $csslink =~ s,(/.):,$1|,;
482
      $bodystyle = '';
483
      $tdstyle = '';
484
    }
485
 
486
      my $block = $Header ? <<END_OF_BLOCK : '';
487
<table border="0" width="100%" cellspacing="0" cellpadding="3">
488
<tr><td class="block"$tdstyle valign="middle">
489
<big><strong><span class="block">&nbsp;$Title</span></strong></big>
490
</td></tr>
491
</table>
492
END_OF_BLOCK
493
 
494
    print HTML <<END_OF_HEAD;
495
<?xml version="1.0" ?>
496
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
497
<html xmlns="http://www.w3.org/1999/xhtml">
498
<head>
499
<title>$Title</title>$csslink
500
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
501
</head>
502
 
503
<body$bodystyle>
504
$block
505
END_OF_HEAD
506
 
507
    # load/reload/validate/cache %Pages and %Items
508
    get_cache($Dircache, $Itemcache, $Toccache, \@Podpath, $Podroot, $Recurse);
509
 
510
    # scan the pod for =item directives
511
    scan_items( \%Local_Items, "", @poddata);
512
 
513
    scan_toc( \%Toc, $Podfile, @poddata);
514
    save_toc($Podroot);
515
 
516
    # put an index at the top of the file.  note, if $Doindex is 0 we
517
    # still generate an index, but surround it with an html comment.
518
    # that way some other program can extract it if desired.
519
    $index =~ s/--+/-/g;
520
 
521
    my $hr = ($Doindex and $index) ? qq(<hr name="index" />) : "";
522
    unless ($Doindex)
523
    {
524
        $index = qq(<!--\n$index\n-->\n);
525
    }
526
 
527
    print HTML << "END_OF_INDEX";
528
 
529
<!-- INDEX BEGIN -->
530
<div name="index"  class="index">
531
<p><a name=\"__index__\"></a></p>
532
$index
533
$hr
534
</div>
535
<!-- INDEX END -->
536
 
537
END_OF_INDEX
538
 
539
    # now convert this file
540
    my $after_item;             # set to true after an =item
541
    warn "Converting input file $Podfile\n" if $Verbose;
542
    foreach my $i (0..$#poddata){
543
        $_ = $poddata[$i];
544
        $Paragraph = $i+1;
545
        if (/^(=.*)/s) {        # is it a pod directive?
546
            $Ignore = 0;
547
            $after_item = 0;
548
            $_ = $1;
549
            if (/^=begin\s+(\S+)\s*(.*)/si) {   # =begin
550
                process_begin($1, $2);
551
            } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
552
                process_end($1, $2);
553
            } elsif (/^=cut/) {                 # =cut
554
                process_cut();
555
            } elsif (/^=pod/) {                 # =pod
556
                process_pod();
557
            } else {
558
                next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
559
 
560
                if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
561
                    process_head( $1, $2, $Doindex && $index );
562
                } elsif (/^=item\s*(.*\S)?/sm) {        # =item text
5542 dpurdie 563
            process_item( $1 );
361 dpurdie 564
                    $after_item = 1;
565
                } elsif (/^=over\s*(.*)/) {             # =over N
566
                    process_over();
567
                } elsif (/^=back/) {                    # =back
5542 dpurdie 568
            process_back();
361 dpurdie 569
                } elsif (/^=for\s+(\S+)\s*(.*)/si) {    # =for
570
                    process_for($1,$2);
571
                } else {
572
                    /^=(\S*)\s*/;
573
                    warn "$0: $Podfile: unknown pod directive '$1' in "
574
                       . "paragraph $Paragraph.  ignoring.\n" unless $Quiet;
575
                }
576
            }
577
            $Top = 0;
578
        }
579
        else {
580
            next if $Ignore;
581
            next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
582
            print HTML and next if @Begin_Stack && $Begin_Stack[-1] eq 'html';
583
            my $text = $_;
584
 
5542 dpurdie 585
        # Open tag for definition list as we have something to put in it
586
        if( $ListNewTerm ){
587
        print HTML "<dd>\n";
588
        $ListNewTerm = 0;
589
        }
590
 
591
        if( $text =~ /\A\s+/ ){
592
        process_pre( \$text );
593
            print HTML "<pre>\n$text</pre>\n";
594
 
361 dpurdie 595
            } else {
596
                process_text( \$text );
597
 
598
                # experimental: check for a paragraph where all lines
599
                # have some ...\t...\t...\n pattern
600
                if( $text =~ /\t/ ){
601
                    my @lines = split( "\n", $text );
602
                    if( @lines > 1 ){
603
                        my $all = 2;
604
                        foreach my $line ( @lines ){
605
                            if( $line =~ /\S/ && $line !~ /\t/ ){
606
                                $all--;
607
                                last if $all == 0;
608
                            }
609
                        }
610
                        if( $all > 0 ){
611
                            $text =~ s/\t+/<td>/g;
612
                            $text =~ s/^/<tr><td>/gm;
613
                            $text = '<table cellspacing="0" cellpadding="0">' .
614
                                    $text . '</table>';
615
                        }
616
                    }
617
                }
618
                ## end of experimental
619
 
620
        if ( $set_p_class )
621
        {
622
                print HTML "<p class=\"$set_p_class\">$text</p>\n";
623
            $set_p_class = 0;
624
        }
625
        else
626
        {
627
                    print HTML "<p>$text</p>\n";
628
        }
629
            }
630
            $after_item = 0;
631
        }
632
    }
633
 
634
    # finish off any pending directives
635
    finish_list();
636
 
637
    # link to page index
638
    print HTML "<p><a href=\"#__index__\"><small>$Backlink</small></a></p>\n"
639
        if $Doindex and $index and $Backlink;
640
 
641
    print HTML <<END_OF_TAIL;
642
$block
643
</body>
644
 
645
</html>
646
END_OF_TAIL
647
 
648
    # close the html file
649
    close(HTML);
650
 
651
    warn "Finished\n" if $Verbose;
652
}
653
 
654
##############################################################################
655
 
656
sub usage {
657
    my $podfile = shift;
658
    warn "$0: $podfile: @_\n" if @_;
659
    die <<END_OF_USAGE;
660
Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
661
           --podpath=<name>:...:<name> --podroot=<name>
662
           --libpods=<name>:...:<name> --recurse --verbose --index
663
           --netscape --norecurse --noindex --cachedir=<name>
664
 
665
  --backlink     - set text for "back to top" links (default: none).
666
  --cachedir     - directory for the item and directory cache files.
667
  --css          - stylesheet URL
668
  --flush        - flushes the item and directory caches.
669
  --[no]header   - produce block header/footer (default is no headers).
670
  --help         - prints this message.
671
  --hiddendirs   - search hidden directories in podpath
672
  --htmldir      - directory for resulting HTML files.
673
  --htmlroot     - http-server base directory from which all relative paths
674
                   in podpath stem (default is /).
675
  --[no]index    - generate an index at the top of the resulting html
676
                   (default behaviour).
677
  --infile       - filename for the pod to convert (input taken from stdin
678
                   by default).
679
  --libpods      - colon-separated list of pages to search for =item pod
680
                   directives in as targets of C<> and implicit links (empty
681
                   by default).  note, these are not filenames, but rather
682
                   page names like those that appear in L<> links.
683
  --outfile      - filename for the resulting html file (output sent to
684
                   stdout by default).
685
  --podpath      - colon-separated list of directories containing library
686
                   pods (empty by default).
687
  --podroot      - filesystem base directory from which all relative paths
688
                   in podpath stem (default is .).
689
  --[no]quiet    - suppress some benign warning messages (default is off).
690
  --[no]recurse  - recurse on those subdirectories listed in podpath
691
                   (default behaviour).
692
  --title        - title that will appear in resulting html file.
693
  --[no]verbose  - self-explanatory (off by default).
694
  --[no]netscape - deprecated, has no effect. for backwards compatibility only.
695
 
696
END_OF_USAGE
697
 
698
}
699
 
700
sub parse_command_line {
701
    my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help,
702
        $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,
703
        $opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,
704
        $opt_recurse,$opt_title,$opt_verbose,$opt_hiddendirs);
705
 
706
    unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
707
    my $result = GetOptions(
708
                            'backlink=s' => \$opt_backlink,
709
                            'cachedir=s' => \$opt_cachedir,
710
                            'css=s'      => \$opt_css,
711
                            'flush'      => \$opt_flush,
712
                            'header!'    => \$opt_header,
713
                            'help'       => \$opt_help,
714
                            'hiddendirs!'=> \$opt_hiddendirs,
715
                            'htmldir=s'  => \$opt_htmldir,
716
                            'htmlroot=s' => \$opt_htmlroot,
717
                            'index!'     => \$opt_index,
718
                            'infile=s'   => \$opt_infile,
719
                            'libpods=s'  => \$opt_libpods,
720
                            'netscape!'  => \$opt_netscape,
721
                            'outfile=s'  => \$opt_outfile,
722
                            'podpath=s'  => \$opt_podpath,
723
                            'podroot=s'  => \$opt_podroot,
724
                            'quiet!'     => \$opt_quiet,
725
                            'recurse!'   => \$opt_recurse,
726
                            'title=s'    => \$opt_title,
727
                            'verbose!'   => \$opt_verbose,
728
                           );
729
    usage("-", "invalid parameters") if not $result;
730
 
731
    usage("-") if defined $opt_help;    # see if the user asked for help
732
    $opt_help = "";                     # just to make -w shut-up.
733
 
734
    @Podpath  = map { s/\|/:/g; $_ } split(":", $opt_podpath) if defined $opt_podpath;
735
    @Libpods  = split(":", $opt_libpods) if defined $opt_libpods;
736
 
737
    $Backlink = $opt_backlink if defined $opt_backlink;
738
    $Cachedir = $opt_cachedir if defined $opt_cachedir;
739
    $Css      = $opt_css      if defined $opt_css;
740
    $Header   = $opt_header   if defined $opt_header;
741
    $Htmldir  = $opt_htmldir  if defined $opt_htmldir;
742
    $Htmlroot = $opt_htmlroot if defined $opt_htmlroot;
743
    $Doindex  = $opt_index    if defined $opt_index;
744
    $Podfile  = $opt_infile   if defined $opt_infile;
745
    $HiddenDirs = $opt_hiddendirs if defined $opt_hiddendirs;
746
    $Htmlfile = $opt_outfile  if defined $opt_outfile;
747
    $Podroot  = $opt_podroot  if defined $opt_podroot;
748
    $Quiet    = $opt_quiet    if defined $opt_quiet;
749
    $Recurse  = $opt_recurse  if defined $opt_recurse;
750
    $Title    = $opt_title    if defined $opt_title;
751
    $Verbose  = $opt_verbose  if defined $opt_verbose;
752
 
753
    warn "Flushing item and directory caches\n"
754
        if $opt_verbose && defined $opt_flush;
755
    $Dircache = "$Cachedir/pod2htmd.tmp";
756
    $Itemcache = "$Cachedir/pod2htmi.tmp";
757
    $Toccache = "$Cachedir/pod2htmt.tmp";
758
    if (defined $opt_flush) {
759
        1 while unlink($Dircache, $Itemcache, $Toccache);
760
    }
761
}
762
 
763
 
764
my $Saved_Cache_Key;
765
 
766
sub get_cache {
767
    my($dircache, $itemcache, $toccache, $podpath, $podroot, $recurse) = @_;
768
    my @cache_key_args = @_;
769
 
770
    # A first-level cache:
771
    # Don't bother reading the cache files if they still apply
772
    # and haven't changed since we last read them.
773
    my $this_cache_key = cache_key(@cache_key_args);
774
 
775
    return if $Saved_Cache_Key and $this_cache_key eq $Saved_Cache_Key;
776
 
777
    # load the cache of %Pages and %Items if possible.  $tests will be
778
    # non-zero if successful.
779
    my $tests = 0;
780
    if (-f $dircache && -f $itemcache && -f $toccache) {
781
        warn "scanning for item cache\n" if $Verbose;
782
        $tests = load_cache($dircache, $itemcache, $toccache, $podpath, $podroot);
783
    }
784
 
785
    # if we didn't succeed in loading the cache then we must (re)build
786
    #  %Pages and %Items.
787
    if (!$tests) {
788
        warn "scanning directories in pod-path\n" if $Verbose;
789
        scan_podpath($podroot, $recurse, 0);
790
    }
791
    $Saved_Cache_Key = cache_key(@cache_key_args);
792
}
793
 
794
sub cache_key {
795
    my($dircache, $itemcache, $toccache, $podpath, $podroot, $recurse) = @_;
796
    return join('!', $dircache, $itemcache, $toccache, $recurse,
797
        @$podpath, $podroot, stat($dircache), stat($itemcache), stat($toccache));
798
}
799
 
800
#
801
# load_cache - tries to find if the caches stored in $dircache and $itemcache
802
#  are valid caches of %Pages and %Items.  if they are valid then it loads
803
#  them and returns a non-zero value.
804
#
805
sub load_cache {
806
    my($dircache, $itemcache, $toccache, $podpath, $podroot) = @_;
807
    my($tests);
808
    local $_;
809
 
810
    $tests = 0;
811
 
812
    open(CACHE, "<$itemcache") ||
813
        die "$0: error opening $itemcache for reading: $!\n";
814
    $/ = "\n";
815
 
816
    # is it the same podpath?
817
    $_ = <CACHE>;
818
    chomp($_);
819
    $tests++ if (join(":", @$podpath) eq $_);
820
 
821
    # is it the same podroot?
822
    $_ = <CACHE>;
823
    chomp($_);
824
    $tests++ if ($podroot eq $_);
825
 
826
    # load the cache if its good
827
    if ($tests != 2) {
828
        close(CACHE);
829
        return 0;
830
    }
831
 
832
    warn "loading item cache\n" if $Verbose;
833
    while (<CACHE>) {
834
        /(.*?) (.*)$/;
835
        $Items{$1} = $2;
836
    }
837
    close(CACHE);
838
 
839
    warn "scanning for directory cache\n" if $Verbose;
840
    open(CACHE, "<$dircache") ||
841
        die "$0: error opening $dircache for reading: $!\n";
842
    $/ = "\n";
843
    $tests = 0;
844
 
845
    # is it the same podpath?
846
    $_ = <CACHE>;
847
    chomp($_);
848
    $tests++ if (join(":", @$podpath) eq $_);
849
 
850
    # is it the same podroot?
851
    $_ = <CACHE>;
852
    chomp($_);
853
    $tests++ if ($podroot eq $_);
854
 
855
    # load the cache if its good
856
    if ($tests != 2) {
857
        close(CACHE);
858
        return 0;
859
    }
860
 
861
    warn "loading directory cache\n" if $Verbose;
862
    while (<CACHE>) {
863
        /(.*?) (.*)$/;
864
        $Pages{$1} = $2;
865
    }
866
 
867
    close(CACHE);
868
 
869
    #
870
    #   Load Toc Cache
871
    #
872
    open(CACHE, "<$toccache") ||
873
        die "$0: error opening $toccache for reading: $!\n";
874
    $/ = "\n";
875
    $tests = 0;
876
 
877
    # is it the same podpath?
878
    $_ = <CACHE>;
879
    chomp($_);
880
    $tests++ if (join(":", @$podpath) eq $_);
881
 
882
    # is it the same podroot?
883
    $_ = <CACHE>;
884
    chomp($_);
885
    $tests++ if ($podroot eq $_);
886
 
887
    # load the cache if its good
888
    if ($tests != 2) {
889
        close(CACHE);
890
        return 0;
891
    }
892
    warn "loading toc cache\n" if $Verbose;
893
    while (<CACHE>) {
894
        /(.*?) (.*)$/;
895
        $Toc{$1} = $2;
896
    }
897
    close(CACHE);
898
 
899
    return 1;
900
}
901
 
902
#
903
# scan_podpath - scans the directories specified in @podpath for directories,
904
#  .pod files, and .pm files.  it also scans the pod files specified in
905
#  @Libpods for =item directives.
906
#
907
sub scan_podpath {
908
    my($podroot, $recurse, $append) = @_;
909
    my($pwd, $dir);
910
    my($libpod, $dirname, $pod, @files, @poddata);
911
    unless($append) {
912
        %Items = ();
913
        %Pages = ();
914
        %Toc = ();
915
    }
916
 
917
    # scan each directory listed in @Podpath
918
    $pwd = getcwd();
919
    chdir($podroot)
920
        || die "$0: error changing to directory $podroot: $!\n";
921
    foreach $dir (@Podpath) {
922
        scan_dir($dir, $recurse);
923
    }
924
 
925
    # scan the pods listed in @Libpods for =item directives
926
    foreach $libpod (@Libpods) {
927
        # if the page isn't defined then we won't know where to find it
928
        # on the system.
929
        next unless defined $Pages{$libpod} && $Pages{$libpod};
930
 
931
        # if there is a directory then use the .pod and .pm files within it.
932
        # NOTE: Only finds the first so-named directory in the tree.
933
#       if ($Pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
934
        if ($Pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)(?<!\.pl)):/) {
935
            #  find all the .pod and .pm files within the directory
936
            $dirname = $1;
937
            opendir(DIR, $dirname) ||
938
                die "$0: scan_podpath:error opening directory $dirname: $!\n";
939
            @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
940
            closedir(DIR);
941
 
942
            # scan each .pod and .pm file for =item directives
943
            foreach $pod (@files) {
944
                open(POD, "<$dirname/$pod") ||
4386 dpurdie 945
                    die "$0: error opening(pod) $dirname/$pod for input: $!\n";
361 dpurdie 946
                @poddata = <POD>;
947
                close(POD);
948
                clean_data( \@poddata );
949
 
950
                scan_items( \%Items, "$dirname/$pod", @poddata);
951
            }
952
 
953
            # use the names of files as =item directives too.
954
### Don't think this should be done this way - confuses issues.(WL)
955
###         foreach $pod (@files) {
956
###             $pod =~ /^(.*)(\.pod|\.pm)$/;
957
###             $Items{$1} = "$dirname/$1.html" if $1;
958
###         }
959
        } elsif ($Pages{$libpod} =~ /([^:]*\.pod):/ ||
960
                 $Pages{$libpod} =~ /([^:]*\.p[ml]):/) {
961
            # scan the .pod or .pm file for =item directives
962
            $pod = $1;
963
            open(POD, "<$pod") ||
4386 dpurdie 964
                die "$0: error opening(pod1) $pod for input: $!\n";
361 dpurdie 965
            @poddata = <POD>;
966
            close(POD);
967
            clean_data( \@poddata );
968
 
969
            scan_items( \%Items, "$pod", @poddata);
970
        } else {
971
            warn "$0: shouldn't be here (line ".__LINE__."\n" unless $Quiet;
972
        }
973
    }
974
    @poddata = ();      # clean-up a bit
975
 
976
    #
977
    #   Scan all POD for TOC metadata
978
    #
979
    foreach my $libpod ( keys %Pages )
980
    {
981
        if ($Pages{$libpod} =~ /([^:]*\.pod):/ ||
982
                 $Pages{$libpod} =~ /([^:]*\.p[ml]):/) {
983
            # scan the .pod or .pm file for =for directives
984
            $pod = $1;
4386 dpurdie 985
            next if ($pod =~ m~\.keep\.pod~);
361 dpurdie 986
            open(POD, "<$pod") ||
4386 dpurdie 987
                die "$0: error opening(pod2) $pod for input: $!\n";
361 dpurdie 988
            @poddata = <POD>;
989
            close(POD);
990
            clean_data( \@poddata );
991
 
992
            scan_toc( \%Toc, "$pod", @poddata);
993
        }
994
    }
995
    @poddata = ();      # clean-up a bit
996
 
997
 
998
    chdir($pwd)
999
        || die "$0: error changing to directory $pwd: $!\n";
1000
 
1001
    # cache the item list for later use
1002
    warn "caching items for later use\n" if $Verbose;
1003
    open(CACHE, ">$Itemcache") ||
1004
        die "$0: error open $Itemcache for writing: $!\n";
1005
 
1006
    print CACHE join(":", @Podpath) . "\n$podroot\n";
1007
    foreach my $key (keys %Items) {
1008
        print CACHE "$key $Items{$key}\n";
1009
    }
1010
 
1011
    close(CACHE);
1012
 
1013
    # cache the directory list for later use
1014
    warn "caching directories for later use\n" if $Verbose;
1015
    open(CACHE, ">$Dircache") ||
1016
        die "$0: error open $Dircache for writing: $!\n";
1017
 
1018
    print CACHE join(":", @Podpath) . "\n$podroot\n";
1019
    foreach my $key (keys %Pages) {
1020
        print CACHE "$key $Pages{$key}\n";
1021
    }
1022
    close(CACHE);
1023
 
1024
    #
1025
    # cache the Toc Data for later use
1026
    #   Save Toc too
1027
    #
1028
    save_toc($podroot);
1029
}
1030
 
1031
sub save_toc
1032
{
1033
    my ($podroot) = @_;
1034
    warn "caching toc for later use\n" if $Verbose;
1035
    open(CACHE, ">$Toccache") ||
1036
        die "$0: error open $Toccache for writing: $!\n";
1037
 
1038
    print CACHE join(":", @Podpath) . "\n$podroot\n";
1039
    foreach my $key (keys %Toc) {
1040
        print CACHE "$key $Toc{$key}\n";
1041
    }
1042
    close(CACHE);
1043
}
1044
 
1045
#
1046
# scan_dir - scans the directory specified in $dir for subdirectories, .pod
1047
#  files, and .pm files.  notes those that it finds.  this information will
1048
#  be used later in order to figure out where the pages specified in L<>
1049
#  links are on the filesystem.
1050
#
1051
sub scan_dir {
1052
    my($dir, $recurse) = @_;
1053
    my($t, @subdirs, @pods, $pod, $dirname, @dirs);
1054
    local $_;
1055
 
1056
    @subdirs = ();
1057
    @pods = ();
1058
 
1059
    opendir(DIR, $dir) ||
1060
        die "$0: scan_dir:error opening directory $dir: $!\n";
1061
    while (defined($_ = readdir(DIR))) {
1062
        if (-d "$dir/$_" && $_ ne "." && $_ ne ".."
1063
            && ($HiddenDirs || !/^\./)
1064
        ) {         # directory
1065
            $Pages{$_}  = "" unless defined $Pages{$_};
1066
            $Pages{$_} .= "$dir/$_:";
1067
            push(@subdirs, $_);
1068
        } elsif (/\.pod\z/) {                               # .pod
1069
            s/\.pod\z//;
1070
            $Pages{$_}  = "" unless defined $Pages{$_};
1071
            $Pages{$_} .= "$dir/$_.pod:";
1072
            push(@pods, "$dir/$_.pod");
1073
        } elsif (/\.html\z/) {                              # .html
1074
            s/\.html\z//;
1075
            $Pages{$_}  = "" unless defined $Pages{$_};
1076
            $Pages{$_} .= "$dir/$_.pod:";
1077
        } elsif (/\.pm\z/) {                                # .pm
1078
            s/\.pm\z//;
1079
            $Pages{$_}  = "" unless defined $Pages{$_};
1080
            $Pages{$_} .= "$dir/$_.pm:";
1081
            push(@pods, "$dir/$_.pm");
1082
        } elsif (/\.pl\z/) {                                # .pl
1083
            s/\.pl\z//;
1084
            $Pages{$_}  = "" unless defined $Pages{$_};
1085
            $Pages{$_} .= "$dir/$_.pl:";
1086
            push(@pods, "$dir/$_.pl");
1087
        } elsif (/\.pod\.txt\z/) {                          # Skip .pod.txt
1088
        } elsif (-T "$dir/$_") {                            # script(?)
1089
            local *F;
1090
            if (open(F, "$dir/$_")) {
1091
                my $line;
1092
                while (defined($line = <F>)) {
1093
                    if ($line =~ /^=(?:pod|head1)/) {
1094
                        $Pages{$_}  = "" unless defined $Pages{$_};
1095
                        $Pages{$_} .= "$dir/$_.pod:";
1096
                        last;
1097
                    }
1098
                }
1099
                close(F);
1100
            }
1101
        }
1102
    }
1103
    closedir(DIR);
1104
 
1105
    # recurse on the subdirectories if necessary
1106
    if ($recurse) {
1107
        foreach my $subdir (@subdirs) {
1108
            scan_dir("$dir/$subdir", $recurse);
1109
        }
1110
    }
1111
}
1112
 
1113
#
1114
# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
1115
#  build an index.
1116
#
1117
sub scan_headings {
1118
    my($sections, @data) = @_;
1119
    my($tag, $which_head, $otitle, $listdepth, $index);
1120
 
1121
    local $Ignore = 0;
1122
 
1123
    $listdepth = 0;
1124
    $index = "";
1125
 
1126
    # scan for =head directives, note their name, and build an index
1127
    #  pointing to each of them.
1128
    foreach my $line (@data) {
1129
      if ($line =~ /^=(head)([1-6])\s+(.*)/) {
1130
        ($tag, $which_head, $otitle) = ($1,$2,$3);
1131
 
1132
        my $title = depod( $otitle );
1133
        my $name = anchorify( $title );
1134
        $$sections{$name} = 1;
1135
        $title = process_text( \$otitle );
1136
 
1137
            while ($which_head != $listdepth) {
1138
                if ($which_head > $listdepth) {
1139
                    $index .= "\n" . ("\t" x $listdepth) . "<ul>\n";
1140
                    $listdepth++;
1141
                } elsif ($which_head < $listdepth) {
1142
                    $listdepth--;
1143
                    $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1144
                }
1145
            }
1146
 
1147
            $index .= "\n" . ("\t" x $listdepth) . "<li>" .
1148
                      "<a href=\"#" . $name . "\">" .
1149
                      $title . "</a></li>";
1150
        }
1151
    }
1152
 
1153
    # finish off the lists
1154
    while ($listdepth--) {
1155
        $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1156
    }
1157
 
1158
    # get rid of bogus lists
1159
    $index =~ s,\t*<ul>\s*</ul>\n,,g;
1160
 
1161
    return $index;
1162
}
1163
 
1164
#
1165
# scan_items - scans the pod specified by $pod for =item directives.  we
1166
#  will use this information later on in resolving C<> links.
1167
#
1168
sub scan_items {
1169
    my( $itemref, $pod, @poddata ) = @_;
1170
    my($i, $item);
1171
    local $_;
1172
 
1173
    $pod =~ s/\.pod\z//;
1174
    $pod =~ s/\.p[lm]\z//;
1175
    $pod .= ".html" if $pod;
1176
 
1177
    foreach $i (0..$#poddata) {
1178
        my $txt = depod( $poddata[$i] );
1179
        $txt =~ s~^\n~~;
1180
        # figure out what kind of item it is.
1181
        # Build string for referencing this item.
5542 dpurdie 1182
    if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bulleted list
361 dpurdie 1183
            next unless $1;
1184
            $item = $1;
1185
        } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
1186
            $item = $1;
5542 dpurdie 1187
    } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # definition list
361 dpurdie 1188
            $item = $1;
1189
        } elsif( $txt =~ /\A=head[234]\s+(.*)\Z/s ) { # plain item
1190
            $item = $1;
1191
        } else {
1192
            next;
1193
        }
1194
        my $fid = fragment_id( $item );
1195
        $$itemref{$fid} = "$pod" if $fid;
1196
    }
1197
}
1198
 
1199
sub scan_toc {
1200
    my( $tocref, $pod, @poddata ) = @_;
1201
    my($i, $item);
1202
    local $_;
1203
 
1204
    $pod =~ s/\.pod\z//;
1205
    $pod =~ s/\.p[lm]\z//;
1206
    $pod .= ".html" if $pod;
1207
 
1208
    foreach $i (0..$#poddata) {
1209
        my $txt = depod( $poddata[$i] );
1210
 
1211
        if ($txt =~ /^=for\s+htmltoc\s*(.*)/si) {# =for
1212
            $item = $1;
1213
            $$tocref{$pod} = $1 if $1;
1214
        }
1215
    }
1216
}
1217
 
1218
 
1219
#
1220
# process_head - convert a pod head[1-6] tag and convert it to HTML format.
1221
#
1222
sub process_head {
1223
    my($tag, $heading, $hasindex) = @_;
1224
 
1225
    # figure out the level of the =head
1226
    $tag =~ /head([1-6])/;
1227
    my $level = $1;
1228
 
5542 dpurdie 1229
 
1230
        finish_list();
361 dpurdie 1231
 
1232
    print HTML "<p>\n";
1233
    if( $level == 1 && ! $Top ){
1234
      print HTML "<a href=\"#__index__\"><small>$Backlink</small></a>\n"
1235
        if $hasindex and $Backlink;
1236
      print HTML "</p>\n<hr />\n"
1237
    } else {
1238
      print HTML "</p>\n";
1239
    }
1240
 
1241
    my $name = anchorify( depod( $heading ) );
1242
    my $convert = process_text( \$heading );
1243
    print HTML "<h$level><a name=\"$name\">$convert</a></h$level>\n";
1244
}
1245
 
1246
 
1247
#
1248
# emit_item_tag - print an =item's text
1249
# Note: The global $EmittedItem is used for inhibiting self-references.
1250
#
1251
my $EmittedItem;
1252
 
1253
sub emit_item_tag($$$){
1254
    my( $otext, $text, $compact ) = @_;
1255
    my $item = fragment_id( depod($text) , -generate);
1256
    Carp::confess("Undefined fragment '$text' (".depod($text).") from fragment_id() in emit_item_tag() in $Podfile")
1257
        if !defined $item;
1258
    $EmittedItem = $item;
1259
### print STDERR "emit_item_tag=$item ($text)\n";
1260
 
1261
    print HTML '<strong>';
1262
    if ($Items_Named{$item}++) {
1263
        print HTML process_text( \$otext );
1264
    } else {
1265
        my $name = 'item_' .$item;
1266
        $name = anchorify($name);
1267
        print HTML qq{<a name="$name" class="item">}, process_text( \$otext ), '</a>';
1268
    }
1269
    print HTML "</strong>\n";
1270
    undef( $EmittedItem );
1271
}
1272
 
5542 dpurdie 1273
sub new_listitem {
361 dpurdie 1274
    my( $tag ) = @_;
5542 dpurdie 1275
    # Open tag for definition list as we have something to put in it
1276
    if( ($tag ne 'dl') && ($ListNewTerm) ){
1277
    print HTML "<dd>\n";
1278
    $ListNewTerm = 0;
1279
    }
1280
 
361 dpurdie 1281
    if( $Items_Seen[$Listlevel]++ == 0 ){
5542 dpurdie 1282
    # start of new list
1283
    push( @Listtype, "$tag" );
1284
    print HTML "<$tag>\n";
1285
    } else {
1286
    # if this is not the first item, close the previous one
1287
    if ( $tag eq 'dl' ){
1288
        print HTML "</dd>\n" unless $ListNewTerm;
1289
    } else {
1290
        print HTML "</li>\n";
361 dpurdie 1291
    }
5542 dpurdie 1292
    }
1293
    my $opentag = $tag eq 'dl' ? 'dt' : 'li';
1294
    print HTML "<$opentag>";
361 dpurdie 1295
}
1296
 
1297
#
1298
# process_item - convert a pod item tag and convert it to HTML format.
1299
#
1300
sub process_item {
1301
    my( $otext ) = @_;
1302
 
1303
    # lots of documents start a list without doing an =over.  this is
1304
    # bad!  but, the proper thing to do seems to be to just assume
1305
    # they did do an =over.  so warn them once and then continue.
1306
    if( $Listlevel == 0 ){
1307
        warn "$0: $Podfile: unexpected =item directive in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1308
        process_over();
1309
    }
1310
 
1311
    # remove formatting instructions from the text
1312
    my $text = depod( $otext );
1313
 
1314
    # all the list variants:
1315
    if( $text =~ /\A\*/ ){ # bullet
5542 dpurdie 1316
        new_listitem( 'ul' );
361 dpurdie 1317
        if ($text =~ /\A\*\s+(\S.*)\Z/s ) { # with additional text
1318
            my $tag = $1;
1319
            $otext =~ s/\A\*\s+//;
1320
            emit_item_tag( $otext, $tag, 1 );
5542 dpurdie 1321
            print HTML "\n";
361 dpurdie 1322
        }
1323
 
1324
    } elsif( $text =~ /\A\d+/ ){ # numbered list
5542 dpurdie 1325
        new_listitem( 'ol' );
361 dpurdie 1326
        if ($text =~ /\A(?>\d+\.?)\s*(\S.*)\Z/s ) { # with additional text
1327
            my $tag = $1;
1328
            $otext =~ s/\A\d+\.?\s*//;
1329
            emit_item_tag( $otext, $tag, 1 );
5542 dpurdie 1330
            print HTML "\n";
361 dpurdie 1331
        }
1332
 
5542 dpurdie 1333
    } else {            # definition list
1334
        # new_listitem takes care of opening the <dt> tag
1335
        new_listitem( 'dl' );
361 dpurdie 1336
        if ($text =~ /\A(.+)\Z/s ){ # should have text
1337
            emit_item_tag( $otext, $text, 1 );
5542 dpurdie 1338
        # write the definition term and close <dt> tag
1339
        print HTML "</dt>\n";
361 dpurdie 1340
        }
5542 dpurdie 1341
        # trigger opening a <dd> tag for the actual definition; will not
1342
        # happen if next paragraph is also a definition term (=item)
1343
        $ListNewTerm = 1;
361 dpurdie 1344
    }
1345
    print HTML "\n";
1346
}
1347
 
1348
#
1349
# process_over - process a pod over tag and start a corresponding HTML list.
1350
#
1351
sub process_over {
1352
    # start a new list
1353
    $Listlevel++;
1354
    push( @Items_Seen, 0 );
1355
}
1356
 
1357
#
1358
# process_back - process a pod back tag and convert it to HTML format.
1359
#
1360
sub process_back {
1361
    if( $Listlevel == 0 ){
5542 dpurdie 1362
    warn "$0: $Podfile: unexpected =back directive in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1363
    return;
361 dpurdie 1364
    }
1365
 
5542 dpurdie 1366
    # close off the list.  note, I check to see if $Listtype[$Listlevel] is
361 dpurdie 1367
    # defined because an =item directive may have never appeared and thus
5542 dpurdie 1368
    # $Listtype[$Listlevel] may have never been initialized.
361 dpurdie 1369
    $Listlevel--;
5542 dpurdie 1370
    if( defined $Listtype[$Listlevel] ){
1371
        if ( $Listtype[$Listlevel] eq 'dl' ){
1372
            print HTML "</dd>\n" unless $ListNewTerm;
1373
        } else {
1374
            print HTML "</li>\n";
1375
        }
1376
        print HTML "</$Listtype[$Listlevel]>\n";
1377
        pop( @Listtype );
1378
        $ListNewTerm = 0;
361 dpurdie 1379
    }
1380
 
1381
    # clean up item count
1382
    pop( @Items_Seen );
1383
}
1384
 
1385
#
1386
# process_cut - process a pod cut tag, thus start ignoring pod directives.
1387
#
1388
sub process_cut {
1389
    $Ignore = 1;
1390
}
1391
 
1392
#
1393
# process_pod - process a pod tag, thus stop ignoring pod directives
1394
# until we see a corresponding cut.
1395
#
1396
sub process_pod {
1397
    # no need to set $Ignore to 0 cause the main loop did it
1398
}
1399
 
1400
#
1401
# process_for - process a =for pod tag.  if it's for html, spit
1402
# it out verbatim, if illustration, center it, otherwise ignore it.
1403
#
1404
sub process_for {
1405
    my($whom, $text) = @_;
1406
    if ( $whom =~ /^(pod2)?html$/i) {
1407
            print HTML $text;
1408
    } elsif ($whom =~ /^htmlclass$/i) {
1409
        $set_p_class = $text;
1410
 
1411
    } elsif ($whom =~ /^htmltoc$/i) {
1412
            # Not processed here - processed in scan_toc
1413
 
1414
    } elsif ($whom =~ /^illustration$/i) {
1415
        1 while chomp $text;
1416
        for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1417
          $text .= $ext, last if -r "$text$ext";
1418
        }
1419
        print HTML qq{<p align="center"><img src="$text" alt="$text illustration" /></p>};
1420
    }
1421
}
1422
 
1423
#
1424
# process_begin - process a =begin pod tag.  this pushes
1425
# whom we're beginning on the begin stack.  if there's a
1426
# begin stack, we only print if it us.
1427
#
1428
sub process_begin {
1429
    my($whom, $text) = @_;
1430
    $whom = lc($whom);
1431
    push (@Begin_Stack, $whom);
1432
    if ( $whom =~ /^(pod2)?html$/) {
1433
        print HTML $text if $text;
1434
    }
1435
}
1436
 
1437
#
1438
# process_end - process a =end pod tag.  pop the
1439
# begin stack.  die if we're mismatched.
1440
#
1441
sub process_end {
1442
    my($whom, $text) = @_;
1443
    $whom = lc($whom);
1444
    if (!defined $Begin_Stack[-1] or $Begin_Stack[-1] ne $whom ) {
1445
        Carp::confess("Unmatched begin/end at chunk $Paragraph in pod $Podfile\n")
1446
    }
1447
    pop( @Begin_Stack );
1448
}
1449
 
1450
#
1451
# process_pre - indented paragraph, made into <pre></pre>
1452
#
1453
sub process_pre {
1454
    my( $text ) = @_;
1455
    my( $rest );
1456
    return if $Ignore;
1457
 
1458
    $rest = $$text;
1459
 
1460
    # insert spaces in place of tabs
1461
    $rest =~ s#(.+)#
1462
            my $line = $1;
1463
            1 while $line =~ s/(\t+)/' ' x ((length($1) * 8) - $-[0] % 8)/e;
1464
            $line;
1465
        #eg;
1466
 
1467
    my @line = split /\n/, $rest;
1468
    my $comments = !grep !/^\s*(#.*)?$/, @line;
1469
 
1470
    # Try to colorize the block as Perl sample code
1471
    if (($comments || $rest =~ /[;{]/) &&
1472
        eval {require ActiveState::Scineplex})
1473
    {
1474
        my $prefix;
1475
        my $input = $rest; # Don't modify $rest in case the colorizer fails
1476
        if ($input =~ /^( +)/) {
1477
            $prefix = $1;
1478
            s/^$prefix// for @line;
1479
            $input = join("\n", @line, "");
1480
        }
1481
        my $styled = eval { ActiveState::Scineplex::Annotate($input, 'perl', outputFormat => 'html') };
1482
        if ($styled) {
1483
            # If this really looks like code, then we don't want
1484
            # to add hyperlinks to URLs embedded in strings etc.
1485
            if ($prefix) {
1486
                $$text = $prefix . join("\n$prefix", split("\n", $styled)) . "\n";
1487
            }
1488
            else {
1489
                $$text = $styled;
1490
            }
1491
            return;
1492
        }
1493
    }
1494
 
1495
    # convert some special chars to HTML escapes
1496
    $rest = html_escape($rest);
1497
 
1498
    # try and create links for all occurrences of perl.* within
1499
    # the preformatted text.
1500
    $rest =~ s{
1501
                 (\s*)(perl\w+)
1502
              }{
1503
                 if ( defined $Pages{$2} ){     # is a link
1504
                     qq($1<a href="$Htmlroot/$Pages{$2}">$2</a>);
1505
                 } elsif (defined $Pages{dosify($2)}) { # is a link
1506
                     qq($1<a href="$Htmlroot/$Pages{dosify($2)}">$2</a>);
1507
                 } else {
1508
                     "$1$2";
1509
                 }
1510
              }xeg;
1511
     $rest =~ s{
1512
                 (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1513
               }{
1514
                  my $url ;
1515
                  if ( $Htmlfileurl ne '' ){
1516
                     # Here, we take advantage of the knowledge
1517
                     # that $Htmlfileurl ne '' implies $Htmlroot eq ''.
1518
                     # Since $Htmlroot eq '', we need to prepend $Htmldir
1519
                     # on the fron of the link to get the absolute path
1520
                     # of the link's target. We check for a leading '/'
1521
                     # to avoid corrupting links that are #, file:, etc.
1522
                     my $old_url = $3 ;
1523
                     $old_url = "$Htmldir$old_url" if $old_url =~ m{^\/};
1524
                     $url = relativize_url( "$old_url.html", $Htmlfileurl );
1525
                  } else {
1526
                     $url = "$3.html" ;
1527
                  }
1528
                  "$1$url" ;
1529
               }xeg;
1530
 
1531
    # Look for embedded URLs and make them into links.  We don't
1532
    # relativize them since they are best left as the author intended.
1533
 
1534
    my $urls = '(' . join ('|', qw{
1535
                http
1536
                telnet
1537
                mailto
1538
                news
1539
                gopher
1540
                file
1541
                wais
1542
                ftp
1543
            } )
1544
        . ')';
1545
 
1546
    my $ltrs = '\w';
1547
    my $gunk = '/#~:.?+=&%@!\-';
1548
    my $punc = '.:!?\-;';
1549
    my $any  = "${ltrs}${gunk}${punc}";
1550
 
1551
    $rest =~ s{
1552
        \b                      # start at word boundary
1553
        (                       # begin $1  {
1554
            $urls :             # need resource and a colon
1555
            (?!:)               # Ignore File::, among others.
1556
            [$any] +?           # followed by one or more of any valid
1557
                                #   character, but be conservative and
1558
                                #   take only what you need to....
1559
        )                       # end   $1  }
1560
        (?=
1561
            &quot; &gt;         # maybe pre-quoted '<a href="...">'
1562
        |                       # or:
1563
            [$punc]*            # 0 or more punctuation
1564
            (?:                 #   followed
1565
                [^$any]         #   by a non-url char
1566
            |                   #   or
1567
                $               #   end of the string
1568
            )                   #
1569
        |                       # or else
1570
            $                   #   then end of the string
1571
        )
1572
      }{<a href="$1">$1</a>}igox;
1573
 
1574
    # text should be as it is (verbatim)
1575
    $$text = $rest;
1576
}
1577
 
1578
 
1579
#
1580
# pure text processing
1581
#
1582
# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1583
# we don't want this to happen within IS
1584
#
1585
sub pure_text($){
1586
    my $text = shift();
1587
    process_puretext( $text, 1 );
1588
}
1589
 
1590
sub inIS_text($){
1591
    my $text = shift();
1592
    process_puretext( $text, 0 );
1593
}
1594
 
1595
#
1596
# process_puretext - process pure text (without pod-escapes) converting
1597
#  double-quotes and handling implicit C<> links.
1598
#
1599
sub process_puretext {
1600
    my($text, $notinIS) = @_;
1601
 
1602
    ## Guessing at func() or [\$\@%&]*var references in plain text is destined
1603
    ## to produce some strange looking ref's. uncomment to disable:
1604
    ## $notinIS = 0;
1605
 
1606
    my(@words, $lead, $trail);
1607
 
1608
    # keep track of leading and trailing white-space
1609
    $lead  = ($text =~ s/\A(\s+)//s ? $1 : "");
1610
    $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
1611
 
1612
    # split at space/non-space boundaries
1613
    @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
1614
 
1615
    # process each word individually
1616
    foreach my $word (@words) {
1617
        # skip space runs
1618
        next if $word =~ /^\s*$/;
1619
        # see if we can infer a link or a function call
1620
        #
1621
        # NOTE: This is a word based search, it won't automatically
1622
        # mark "substr($var, 1, 2)" because the 1st word would be "substr($var"
1623
        # User has to enclose those with proper C<>
1624
 
1625
        if( $notinIS && $word =~
1626
            m/
1627
                ^([a-z_]{2,})                 # The function name
1628
                \(
1629
                    ([0-9][a-z]*              # Manual page(1) or page(1M)
1630
                    |[^)]*[\$\@\%][^)]+       # ($foo), (1, @foo), (%hash)
1631
                    |                         # ()
1632
                    )
1633
                \)
1634
                ([.,;]?)$                     # a possible punctuation follows
1635
            /xi
1636
        ) {
1637
            # has parenthesis so should have been a C<> ref
1638
            ## try for a pagename (perlXXX(1))?
1639
            my( $func, $args, $rest ) = ( $1, $2, $3 || '' );
1640
            if( $args =~ /^\d+$/ ){
1641
                my $url = page_sect( $word, '' );
1642
                if( defined $url ){
1643
                    $word = qq(<a href="$url" class="man">the $word manpage</a>$rest);
1644
                    next;
1645
                }
1646
            }
1647
            ## try function name for a link, append tt'ed argument list
1648
            $word = emit_C( $func, '', "($args)") . $rest;
1649
 
1650
#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1651
##      } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1652
##          # perl variables, should be a C<> ref
1653
##          $word = emit_C( $word );
1654
 
1655
        } elsif ($word =~ m,^\w+://\w,) {
1656
            # looks like a URL
1657
            # Don't relativize it: leave it as the author intended
1658
            $word = qq(<a href="$word">$word</a>);
1659
        } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1660
            # looks like an e-mail address
1661
            my ($w1, $w2, $w3) = ("", $word, "");
1662
            ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1663
            ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1664
            $word = qq($w1<a href="mailto:$w2">$w2</a>$w3);
1665
        } else {
1666
            $word = html_escape($word) if $word =~ /["&<>]/;
1667
        }
1668
    }
1669
 
1670
    # put everything back together
1671
    return $lead . join( '', @words ) . $trail;
1672
}
1673
 
1674
 
1675
#
1676
# process_text - handles plaintext that appears in the input pod file.
1677
# there may be pod commands embedded within the text so those must be
1678
# converted to html commands.
1679
#
1680
 
1681
sub process_text1($$;$$);
1682
sub pattern ($) { $_[0] ? '\s+'.('>' x ($_[0] + 1)) : '>' }
1683
sub closing ($) { local($_) = shift; (defined && s/\s+\z//) ? length : 0 }
1684
 
1685
sub process_text {
1686
    return if $Ignore;
1687
    my( $tref ) = @_;
1688
    my $res = process_text1( 0, $tref );
1689
    $res =~ s/\s+$//s;
1690
    $$tref = $res;
1691
}
1692
 
1693
sub process_text_rfc_links {
1694
    my $text = shift;
1695
 
1696
    # For every "RFCnnnn" or "RFC nnn", link it to the authoritative
1697
    # ource. Do not use the /i modifier here. Require "RFC" to be written in
1698
    #  in capital letters.
1699
 
1700
    $text =~ s{
1701
        (?<=[^<>[:alpha:]])           # Make sure this is not an URL already
1702
        (RFC\s*([0-9]{1,5}))(?![0-9]) # max 5 digits
1703
    }
1704
    {<a href="http://www.ietf.org/rfc/rfc$2.txt" class="rfc">$1</a>}gx;
1705
 
1706
    $text;
1707
}
1708
 
1709
sub process_text1($$;$$){
1710
    my( $lev, $rstr, $func, $closing ) = @_;
1711
    my $res = '';
1712
 
1713
    unless (defined $func) {
1714
        $func = '';
1715
        $lev++;
1716
    }
1717
 
1718
    if( $func eq 'B' ){
1719
        # B<text> - boldface
1720
        $res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>';
1721
 
1722
    } elsif( $func eq 'C' ){
1723
        # C<code> - can be a ref or <code></code>
1724
        # need to extract text
1725
        my $par = go_ahead( $rstr, 'C', $closing );
1726
 
1727
        ## clean-up of the link target
1728
        my $text = depod( $par );
1729
 
1730
        ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1731
        ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1732
 
1733
        $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1734
 
1735
    } elsif( $func eq 'E' ){
1736
        # E<x> - convert to character
1737
        $$rstr =~ s/^([^>]*)>//;
1738
        my $escape = $1;
1739
        $escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;
1740
        $res = "&$escape;";
1741
 
1742
    } elsif( $func eq 'F' ){
1743
        # F<filename> - italicize
1744
        $res = '<em class="file">' . process_text1( $lev, $rstr ) . '</em>';
1745
 
1746
    } elsif( $func eq 'I' ){
1747
        # I<text> - italicize
1748
        $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1749
 
1750
    } elsif( $func eq 'L' ){
1751
        # L<link> - link
1752
        ## L<text|cross-ref> => produce text, use cross-ref for linking
1753
        ## L<cross-ref> => make text from cross-ref
1754
        ## need to extract text
1755
        my $par = go_ahead( $rstr, 'L', $closing );
1756
 
1757
        # some L<>'s that shouldn't be:
1758
        # a) full-blown URL's are emitted as-is
1759
        if( $par =~ m{^\w+://}s ){
1760
            return make_URL_href( $par );
1761
        }
1762
        # b) C<...> is stripped and treated as C<>
1763
        if( $par =~ /^C<(.*)>$/ ){
1764
            my $text = depod( $1 );
1765
            return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1766
        }
1767
 
1768
        # analyze the contents
1769
        $par =~ s/\n/ /g;   # undo word-wrapped tags
1770
        my $opar = $par;
1771
        my $linktext;
1772
        if( $par =~ s{^([^|]+)\|}{} ){
1773
            $linktext = $1;
1774
        }
1775
 
1776
        # make sure sections start with a /
1777
        $par =~ s{^"}{/"};
1778
 
1779
        my( $page, $section, $ident );
1780
 
1781
        # check for link patterns
1782
        if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){     # name/ident
1783
            # we've got a name/ident (no quotes)
1784
            if (length $2) {
1785
                ( $page, $ident ) = ( $1, $2 );
1786
            } else {
1787
                ( $page, $section ) = ( $1, $2 );
1788
            }
1789
            ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1790
 
1791
        } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1792
            # even though this should be a "section", we go for ident first
1793
            ( $page, $ident ) = ( $1, $2 );
1794
            ### print STDERR "--> L<$par> to page $page, section $section\n";
1795
 
1796
        } elsif( $par =~ /\s/ ){  # this must be a section with missing quotes
1797
            ( $page, $section ) = ( '', $par );
1798
            ### print STDERR "--> L<$par> to void page, section $section\n";
1799
 
1800
        } else {
1801
            ( $page, $section ) = ( $par, '' );
1802
            ### print STDERR "--> L<$par> to page $par, void section\n";
1803
        }
1804
 
1805
        # now, either $section or $ident is defined. the convoluted logic
1806
        # below tries to resolve L<> according to what the user specified.
1807
        # failing this, we try to find the next best thing...
1808
        my( $url, $ltext, $fid );
1809
 
1810
        RESOLVE: {
1811
            if( defined $ident ){
1812
                ## try to resolve $ident as an item
1813
                ( $url, $fid ) = coderef( $page, $ident );
1814
                if( $url ){
1815
                    if( ! defined( $linktext ) ){
1816
                        $linktext = $ident;
1817
                        $linktext .= " in " if $ident && $page;
1818
                        $linktext .= "the $page manpage" if $page;
1819
                    }
1820
                    ###  print STDERR "got coderef url=$url\n";
1821
                    last RESOLVE;
1822
                }
1823
                ## no luck: go for a section (auto-quoting!)
1824
                $section = $ident;
1825
            }
1826
            ## now go for a section
1827
            my $htmlsection = htmlify( $section );
1828
            $url = page_sect( $page, $htmlsection );
1829
            if( $url ){
1830
                if( ! defined( $linktext ) ){
1831
                    $linktext = $section;
1832
                    $linktext .= " in " if $section && $page;
1833
                    $linktext .= "the $page manpage" if $page;
1834
                }
1835
                ### print STDERR "got page/section url=$url\n";
1836
                last RESOLVE;
1837
            }
1838
            ## no luck: go for an ident
1839
            if( $section ){
1840
                $ident = $section;
1841
            } else {
1842
                $ident = $page;
1843
                $page  = undef();
1844
            }
1845
            ( $url, $fid ) = coderef( $page, $ident );
1846
            if( $url ){
1847
                if( ! defined( $linktext ) ){
1848
                    $linktext = $ident;
1849
                    $linktext .= " in " if $ident && $page;
1850
                    $linktext .= "the $page manpage" if $page;
1851
                }
1852
                ### print STDERR "got section=>coderef url=$url\n";
1853
                last RESOLVE;
1854
            }
1855
 
1856
            # warning; show some text.
1857
            $linktext = $opar unless defined $linktext;
1858
            warn "$0: $Podfile: cannot resolve L<$opar> in paragraph $Paragraph.\n" unless $Quiet;
1859
        }
1860
 
1861
        # now we have a URL or just plain code
1862
        $$rstr = $linktext . '>' . $$rstr;
1863
        if( defined( $url ) ){
1864
            $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>';
1865
        } else {
1866
            $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1867
        }
1868
 
1869
    } elsif( $func eq 'S' ){
1870
        # S<text> - non-breaking spaces
1871
        $res = process_text1( $lev, $rstr );
1872
        $res =~ s/ /&nbsp;/g;
1873
 
1874
    } elsif( $func eq 'X' ){
1875
        # X<> - ignore
1876
        warn "$0: $Podfile: invalid X<> in paragraph $Paragraph.\n"
1877
            unless $$rstr =~ s/^[^>]*>// or $Quiet;
1878
    } elsif( $func eq 'Z' ){
1879
        # Z<> - empty
1880
        warn "$0: $Podfile: invalid Z<> in paragraph $Paragraph.\n"
1881
            unless $$rstr =~ s/^>// or $Quiet;
1882
 
1883
    } else {
1884
        my $term = pattern $closing;
1885
        while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
1886
            # all others: either recurse into new function or
1887
            # terminate at closing angle bracket(s)
1888
            my $pt = $1;
1889
            $pt .= $2 if !$3 &&  $lev == 1;
1890
            $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1891
            return $res if !$3 && $lev > 1;
1892
            if( $3 ){
1893
                $res .= process_text1( $lev, $rstr, $3, closing $4 );
1894
            }
1895
        }
1896
        if( $lev == 1 ){
1897
            $res .= pure_text( $$rstr );
1898
        } elsif( ! $Quiet ) {
1899
            my $snippet = substr($$rstr,0,60);
1900
            warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph: '$snippet'.\n" 
1901
 
1902
        }
1903
        $res = process_text_rfc_links($res);
1904
    }
1905
    return $res;
1906
}
1907
 
1908
#
1909
# go_ahead: extract text of an IS (can be nested)
1910
#
1911
sub go_ahead($$$){
1912
    my( $rstr, $func, $closing ) = @_;
1913
    my $res = '';
1914
    my @closing = ($closing);
1915
    while( $$rstr =~
1916
      s/\A(.*?)(([BCEFILSXZ])<(<+\s+)?|@{[pattern $closing[0]]})//s ){
1917
        $res .= $1;
1918
        unless( $3 ){
1919
            shift @closing;
1920
            return $res unless @closing;
1921
        } else {
1922
            unshift @closing, closing $4;
1923
        }
1924
        $res .= $2;
1925
    }
1926
    unless ($Quiet) {
1927
        my $snippet = substr($$rstr,0,60);
1928
        warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph (go_ahead): '$snippet'.\n" 
1929
    }           
1930
    return $res;
1931
}
1932
 
1933
#
1934
# emit_C - output result of C<text>
1935
#    $text is the depod-ed text
1936
#
1937
sub emit_C($;$$){
1938
    my( $text, $nocode, $args ) = @_;
1939
    $args = '' unless defined $args;
1940
    my $res;
1941
    my( $url, $fid ) = coderef( undef(), $text );
1942
 
1943
    # need HTML-safe text
1944
    my $linktext = html_escape( "$text$args" );
1945
 
1946
    if( defined( $url ) &&
1947
        (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1948
        $res = "<a href=\"$url\"><code>$linktext</code></a>";
1949
    } elsif( 0 && $nocode ){
1950
        $res = $linktext;
1951
    } else {
1952
        $res = "<code>$linktext</code>";
1953
    }
1954
    return $res;
1955
}
1956
 
1957
#
1958
# html_escape: make text safe for HTML
1959
#
1960
sub html_escape {
1961
    my $rest = $_[0];
1962
    $rest   =~ s/&/&amp;/g;
1963
    $rest   =~ s/</&lt;/g;
1964
    $rest   =~ s/>/&gt;/g;
1965
    $rest   =~ s/"/&quot;/g;
1966
    # &apos; is only in XHTML, not HTML4.  Be conservative
1967
    #$rest   =~ s/'/&apos;/g;
1968
    return $rest;
1969
}
1970
 
1971
 
1972
#
1973
# dosify - convert filenames to 8.3
1974
#
1975
sub dosify {
1976
    my($str) = @_;
1977
    return lc($str) if $^O eq 'VMS';     # VMS just needs casing
1978
    if ($Is83) {
1979
        $str = lc $str;
1980
        $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1981
        $str =~ s/(\w+)/substr ($1,0,8)/ge;
1982
    }
1983
    return $str;
1984
}
1985
 
1986
#
1987
# page_sect - make a URL from the text of a L<>
1988
#
1989
sub page_sect($$) {
1990
    my( $page, $section ) = @_;
1991
    my( $linktext, $page83, $link);     # work strings
1992
 
1993
    # check if we know that this is a section in this page
1994
    if (!defined $Pages{$page} && defined $Sections{$page}) {
1995
        $section = $page;
1996
        $page = "";
1997
        ### print STDERR "reset page='', section=$section\n";
1998
    }
1999
 
2000
    $page83=dosify($page);
2001
    $page=$page83 if (defined $Pages{$page83});
2002
    if ($page eq "") {
2003
        $link = "#" . anchorify( $section );
2004
    } elsif ( $page =~ /::/ ) {
2005
        $page =~ s,::,/,g;
2006
        # Search page cache for an entry keyed under the html page name,
2007
        # then look to see what directory that page might be in.  NOTE:
2008
        # this will only find one page. A better solution might be to produce
2009
        # an intermediate page that is an index to all such pages.
2010
        my $page_name = $page ;
2011
        $page_name =~ s,^.*/,,s ;
2012
        if ( defined( $Pages{ $page_name } ) &&
2013
             $Pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
2014
           ) {
2015
            $page = $1 ;
2016
        }
2017
        else {
2018
            # NOTE: This branch assumes that all A::B pages are located in
2019
            # $Htmlroot/A/B.html . This is often incorrect, since they are
2020
            # often in $Htmlroot/lib/A/B.html or such like. Perhaps we could
2021
            # analyze the contents of %Pages and figure out where any
2022
            # cousins of A::B are, then assume that.  So, if A::B isn't found,
2023
            # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
2024
            # lib/A/B.pm. This is also limited, but it's an improvement.
2025
            # Maybe a hints file so that the links point to the correct places
2026
            # nonetheless?
2027
 
2028
        }
2029
        $link = "$Htmlroot/$page.html";
2030
        $link .= "#" . anchorify( $section ) if ($section);
2031
    } elsif (!defined $Pages{$page}) {
2032
        $link = "";
2033
    } else {
2034
        $section = anchorify( $section ) if $section ne "";
2035
        ### print STDERR "...section=$section\n";
2036
 
2037
        # if there is a directory by the name of the page, then assume that an
2038
        # appropriate section will exist in the subdirectory
2039
#       if ($section ne "" && $Pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
2040
        if ($section ne "" && $Pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
2041
            $link = "$Htmlroot/$1/$section.html";
2042
            ### print STDERR "...link=$link\n";
2043
 
2044
        # since there is no directory by the name of the page, the section will
2045
        # have to exist within a .html of the same name.  thus, make sure there
2046
        # is a .pod or .pm that might become that .html
2047
        } else {
2048
            $section = "#$section" if $section;
2049
            ### print STDERR "...section=$section\n";
2050
 
2051
            # check if there is a .pod with the page name.
2052
            # for L<Foo>, Foo.(pod|pm) is preferred to A/Foo.(pod|pm)
2053
            if ($Pages{$page} =~ /([^:]*)\.(?:pod|pm):/) {
2054
                $link = "$Htmlroot/$1.html$section";
2055
            } else {
2056
                $link = "";
2057
            }
2058
        }
2059
    }
2060
 
2061
    if ($link) {
2062
        # Here, we take advantage of the knowledge that $Htmlfileurl ne ''
2063
        # implies $Htmlroot eq ''. This means that the link in question
2064
        # needs a prefix of $Htmldir if it begins with '/'. The test for
2065
        # the initial '/' is done to avoid '#'-only links, and to allow
2066
        # for other kinds of links, like file:, ftp:, etc.
2067
        my $url ;
2068
        if (  $Htmlfileurl ne '' ) {
2069
            $link = "$Htmldir$link" if $link =~ m{^/}s;
2070
            $url = relativize_url( $link, $Htmlfileurl );
2071
# print( "  b: [$link,$Htmlfileurl,$url]\n" );
2072
        }
2073
        else {
2074
            $url = $link ;
2075
        }
2076
        return $url;
2077
 
2078
    } else {
2079
        return undef();
2080
    }
2081
}
2082
 
2083
#
2084
# relativize_url - convert an absolute URL to one relative to a base URL.
2085
# Assumes both end in a filename.
2086
#
2087
sub relativize_url {
2088
    my ($dest,$source) = @_ ;
2089
 
2090
    my ($dest_volume,$dest_directory,$dest_file) =
2091
        File::Spec::Unix->splitpath( $dest ) ;
2092
    $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
2093
 
2094
    my ($source_volume,$source_directory,$source_file) =
2095
        File::Spec::Unix->splitpath( $source ) ;
2096
    $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
2097
 
2098
    my $rel_path = '' ;
2099
    if ( $dest ne '' ) {
2100
       $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
2101
    }
2102
 
2103
    if ( $rel_path ne ''                &&
2104
         substr( $rel_path, -1 ) ne '/' &&
2105
         substr( $dest_file, 0, 1 ) ne '#'
2106
        ) {
2107
        $rel_path .= "/$dest_file" ;
2108
    }
2109
    else {
2110
        $rel_path .= "$dest_file" ;
2111
    }
2112
 
2113
    return $rel_path ;
2114
}
2115
 
2116
 
2117
#
2118
# coderef - make URL from the text of a C<>
2119
#
2120
sub coderef($$){
2121
    my( $page, $item ) = @_;
2122
    my( $url );
2123
 
2124
    my $fid = fragment_id( $item );
2125
 
2126
    if( defined( $page ) && $page ne "" ){
2127
        # we have been given a $page...
2128
        $page =~ s{::}{/}g;
2129
 
2130
        Carp::confess("Undefined fragment '$item' from fragment_id() in coderef() in $Podfile")
2131
            if !defined $fid;    
2132
        # Do we take it? Item could be a section!
2133
        my $base = $Items{$fid} || "";
2134
        $base =~ s{[^/]*/}{};
2135
        if( $base ne "$page.html" ){
2136
            ###   print STDERR "coderef( $page, $item ): items{$fid} = $Items{$fid} = $base => discard page!\n";
2137
            $page = undef();
2138
        }
2139
 
2140
    } else {
2141
        # no page - local items precede cached items
2142
        if( defined( $fid ) ){
2143
            if(  exists $Local_Items{$fid} ){
2144
                $page = $Local_Items{$fid};
2145
            } else {
2146
                $page = $Items{$fid};
2147
            }
2148
        }
2149
    }
2150
 
2151
    # if there was a pod file that we found earlier with an appropriate
2152
    # =item directive, then create a link to that page.
2153
    if( defined $page ){
2154
        if( $page ){
2155
            if( exists $Pages{$page} and $Pages{$page} =~ /([^:.]*)\.[^:]*:/){
2156
                $page = $1 . '.html';
2157
            }
2158
            my $link = "$Htmlroot/$page#item_" . anchorify($fid);
2159
 
2160
            # Here, we take advantage of the knowledge that $Htmlfileurl
2161
            # ne '' implies $Htmlroot eq ''.
2162
            if (  $Htmlfileurl ne '' ) {
2163
                $link = "$Htmldir$link" ;
2164
                $url = relativize_url( $link, $Htmlfileurl ) ;
2165
            } else {
2166
                $url = $link ;
2167
            }
2168
        } else {
2169
            $url = "#item_" . anchorify($fid);
2170
        }
2171
 
2172
        confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
2173
    }
2174
    return( $url, $fid );
2175
}
2176
 
2177
 
2178
 
2179
#
2180
# Adapted from Nick Ing-Simmons' PodToHtml package.
2181
sub relative_url {
2182
    my $source_file = shift ;
2183
    my $destination_file = shift;
2184
 
2185
    my $source = URI::file->new_abs($source_file);
2186
    my $uo = URI::file->new($destination_file,$source)->abs;
2187
    return $uo->rel->as_string;
2188
}
2189
 
2190
 
2191
#
2192
# finish_list - finish off any pending HTML lists.  this should be called
2193
# after the entire pod file has been read and converted.
2194
#
2195
sub finish_list {
5542 dpurdie 2196
    if( $Listlevel ){
2197
    warn "$0: $Podfile: unterminated list(s) at =head in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
2198
    while( $Listlevel ){
2199
            process_back();
2200
        }
361 dpurdie 2201
    }
2202
}
2203
 
2204
#
2205
# htmlify - converts a pod section specification to a suitable section
2206
# specification for HTML. Note that we keep spaces and special characters
2207
# except ", ? (Netscape problem) and the hyphen (writer's problem...).
2208
#
2209
sub htmlify {
2210
    my( $heading) = @_;
2211
    $heading =~ s/(\s+)/ /g;
2212
    $heading =~ s/\s+\Z//;
2213
    $heading =~ s/\A\s+//;
2214
    # The hyphen is a disgrace to the English language.
2215
    # $heading =~ s/[-"?]//g;
2216
    $heading =~ s/["?]//g;
2217
    $heading = lc( $heading );
2218
    return $heading;
2219
}
2220
 
2221
#
2222
# similar to htmlify, but turns non-alphanumerics into underscores
2223
#
2224
sub anchorify {
2225
    my ($anchor) = @_;
2226
    $anchor = htmlify($anchor);
2227
    $anchor =~ s/\W/_/g;
2228
    return $anchor;
2229
}
2230
 
2231
#
2232
# depod - convert text by eliminating all interior sequences
2233
# Note: can be called with copy or modify semantics
2234
#
2235
my %E2c;
2236
$E2c{lt}     = '<';
2237
$E2c{gt}     = '>';
2238
$E2c{sol}    = '/';
2239
$E2c{verbar} = '|';
2240
$E2c{amp}    = '&'; # in Tk's pods
2241
 
2242
sub depod1($;$$);
2243
 
2244
sub depod($){
2245
    my $string;
2246
    if( ref( $_[0] ) ){
2247
        $string =  ${$_[0]};
2248
        ${$_[0]} = depod1( \$string );
2249
    } else {
2250
        $string =  $_[0];
2251
        depod1( \$string );
2252
    }
2253
}
2254
 
2255
sub depod1($;$$){
2256
  my( $rstr, $func, $closing ) = @_;
2257
  my $res = '';
2258
  return $res unless defined $$rstr;
2259
  if( ! defined( $func ) ){
2260
      # skip to next begin of an interior sequence
2261
      while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?//s ){
2262
         # recurse into its text
2263
          $res .= $1 . depod1( $rstr, $2, closing $3);
2264
      }
2265
      $res .= $$rstr;
2266
  } elsif( $func eq 'E' ){
2267
      # E<x> - convert to character
2268
      $$rstr =~ s/^([^>]*)>//;
2269
      $res .= $E2c{$1} || "";
2270
  } elsif( $func eq 'X' ){
2271
      # X<> - ignore
2272
      $$rstr =~ s/^[^>]*>//;
2273
  } elsif( $func eq 'Z' ){
2274
      # Z<> - empty
2275
      $$rstr =~ s/^>//;
2276
  } else {
2277
      # all others: either recurse into new function or
2278
      # terminate at closing angle bracket
2279
      my $term = pattern $closing;
2280
      while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2281
          $res .= $1;
2282
          last unless $3;
2283
          $res .= depod1( $rstr, $3, closing $4 );
2284
      }
2285
      ## If we're here and $2 ne '>': undelimited interior sequence.
2286
      ## Ignored, as this is called without proper indication of where we are.
2287
      ## Rely on process_text to produce diagnostics.
2288
  }
2289
  return $res;
2290
}
2291
 
2292
{
2293
    my %seen;   # static fragment record hash
2294
 
2295
sub fragment_id_readable {
2296
    my $text     = shift;
2297
    my $generate = shift;   # optional flag
2298
 
2299
    my $orig = $text;
2300
 
2301
    # leave the words for the fragment identifier,
2302
    # change everything else to underbars.
2303
    $text =~ s/[^A-Za-z0-9_]+/_/g; # do not use \W to avoid locale dependency.
2304
    $text =~ s/_{2,}/_/g;
2305
    $text =~ s/\A_//;
2306
    $text =~ s/_\Z//;
2307
 
2308
    unless ($text)
2309
    {
2310
        # Nothing left after removing punctuation, so leave it as is
2311
        # E.g. if option is named: "=item -#"
2312
 
2313
        $text = $orig;
2314
    }
2315
 
2316
    if ($generate) {
2317
        if ( exists $seen{$text} ) {
2318
            # This already exists, make it unique
2319
            $seen{$text}++;
2320
            $text = $text . $seen{$text};
2321
        } else {
2322
            $seen{$text} = 1;  # first time seen this fragment
2323
        }
2324
    }
2325
 
2326
    $text;
2327
}}
2328
 
2329
#
2330
# fragment_id - construct a fragment identifier from:
2331
#   a) =item text
2332
#   b) contents of C<...>
2333
#
2334
 
2335
sub fragment_id {
2336
    my $text     = shift;
2337
    my $generate = shift;   # optional flag
2338
 
2339
    $text =~ s/\s+\Z//s;
2340
    if( $text ){
2341
        # a method or function?
2342
        return $1 if $text =~ /(\w+)\s*\(/;
2343
        return $1 if $text =~ /->\s*(\w+)\s*\(?/;
2344
 
2345
        # a variable name?
2346
        return $1 if $text =~ /^([\$\@%*]\S+)/;
2347
 
2348
        # some pattern matching operator?
2349
        return $1 if $text =~ m|^(\w+/).*/\w*$|;
2350
 
2351
        # fancy stuff... like "do { }"
2352
        return $1 if $text =~ m|^(\w+)\s*{.*}$|;
2353
 
2354
        # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
2355
        # and some funnies with ... Module ...
2356
        return $1 if $text =~ m{^([a-z\d_]+)(\s+[A-Z,/& ][A-Z\d,/& ]*)?$};
2357
        return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
2358
 
2359
        return fragment_id_readable($text, $generate);
2360
    } else {
2361
        return;
2362
    }
2363
}
2364
 
2365
#
2366
# make_URL_href - generate HTML href from URL
2367
# Special treatment for CGI queries.
2368
#
2369
sub make_URL_href($){
2370
    my( $url ) = @_;
2371
    if( $url !~
2372
        s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ){
2373
        $url = "<a href=\"$url\">$url</a>";
2374
    }
2375
    return $url;
2376
}
2377
 
2378
1;