| 311 |
dpurdie |
1 |
# Pod::PlainText -- Convert POD data to formatted ASCII text.
|
|
|
2 |
# $Id: Text.pm,v 2.1 1999/09/20 11:53:33 eagle Exp $
|
|
|
3 |
#
|
|
|
4 |
# Copyright 1999-2000 by Russ Allbery <rra@stanford.edu>
|
|
|
5 |
#
|
|
|
6 |
# This program is free software; you can redistribute it and/or modify it
|
|
|
7 |
# under the same terms as Perl itself.
|
|
|
8 |
#
|
|
|
9 |
# This module is intended to be a replacement for Pod::Text, and attempts to
|
|
|
10 |
# match its output except for some specific circumstances where other
|
|
|
11 |
# decisions seemed to produce better output. It uses Pod::Parser and is
|
|
|
12 |
# designed to be very easy to subclass.
|
|
|
13 |
|
|
|
14 |
############################################################################
|
|
|
15 |
# Modules and declarations
|
|
|
16 |
############################################################################
|
|
|
17 |
|
|
|
18 |
package Pod::PlainText;
|
|
|
19 |
use strict;
|
|
|
20 |
|
|
|
21 |
require 5.005;
|
|
|
22 |
|
|
|
23 |
use Carp qw(carp croak);
|
|
|
24 |
use Pod::Select ();
|
|
|
25 |
|
|
|
26 |
use vars qw(@ISA %ESCAPES $VERSION);
|
|
|
27 |
|
|
|
28 |
# We inherit from Pod::Select instead of Pod::Parser so that we can be used
|
|
|
29 |
# by Pod::Usage.
|
|
|
30 |
@ISA = qw(Pod::Select);
|
|
|
31 |
|
|
|
32 |
$VERSION = '2.04';
|
|
|
33 |
|
|
|
34 |
BEGIN {
|
|
|
35 |
if ($] < 5.006) {
|
|
|
36 |
require Symbol;
|
|
|
37 |
import Symbol;
|
|
|
38 |
}
|
|
|
39 |
}
|
|
|
40 |
|
|
|
41 |
############################################################################
|
|
|
42 |
# Table of supported E<> escapes
|
|
|
43 |
############################################################################
|
|
|
44 |
|
|
|
45 |
# This table is taken near verbatim from Pod::PlainText in Pod::Parser,
|
|
|
46 |
# which got it near verbatim from the original Pod::Text. It is therefore
|
|
|
47 |
# credited to Tom Christiansen, and I'm glad I didn't have to write it. :)
|
|
|
48 |
%ESCAPES = (
|
|
|
49 |
'amp' => '&', # ampersand
|
|
|
50 |
'lt' => '<', # left chevron, less-than
|
|
|
51 |
'gt' => '>', # right chevron, greater-than
|
|
|
52 |
'quot' => '"', # double quote
|
|
|
53 |
|
|
|
54 |
"Aacute" => "\xC1", # capital A, acute accent
|
|
|
55 |
"aacute" => "\xE1", # small a, acute accent
|
|
|
56 |
"Acirc" => "\xC2", # capital A, circumflex accent
|
|
|
57 |
"acirc" => "\xE2", # small a, circumflex accent
|
|
|
58 |
"AElig" => "\xC6", # capital AE diphthong (ligature)
|
|
|
59 |
"aelig" => "\xE6", # small ae diphthong (ligature)
|
|
|
60 |
"Agrave" => "\xC0", # capital A, grave accent
|
|
|
61 |
"agrave" => "\xE0", # small a, grave accent
|
|
|
62 |
"Aring" => "\xC5", # capital A, ring
|
|
|
63 |
"aring" => "\xE5", # small a, ring
|
|
|
64 |
"Atilde" => "\xC3", # capital A, tilde
|
|
|
65 |
"atilde" => "\xE3", # small a, tilde
|
|
|
66 |
"Auml" => "\xC4", # capital A, dieresis or umlaut mark
|
|
|
67 |
"auml" => "\xE4", # small a, dieresis or umlaut mark
|
|
|
68 |
"Ccedil" => "\xC7", # capital C, cedilla
|
|
|
69 |
"ccedil" => "\xE7", # small c, cedilla
|
|
|
70 |
"Eacute" => "\xC9", # capital E, acute accent
|
|
|
71 |
"eacute" => "\xE9", # small e, acute accent
|
|
|
72 |
"Ecirc" => "\xCA", # capital E, circumflex accent
|
|
|
73 |
"ecirc" => "\xEA", # small e, circumflex accent
|
|
|
74 |
"Egrave" => "\xC8", # capital E, grave accent
|
|
|
75 |
"egrave" => "\xE8", # small e, grave accent
|
|
|
76 |
"ETH" => "\xD0", # capital Eth, Icelandic
|
|
|
77 |
"eth" => "\xF0", # small eth, Icelandic
|
|
|
78 |
"Euml" => "\xCB", # capital E, dieresis or umlaut mark
|
|
|
79 |
"euml" => "\xEB", # small e, dieresis or umlaut mark
|
|
|
80 |
"Iacute" => "\xCD", # capital I, acute accent
|
|
|
81 |
"iacute" => "\xED", # small i, acute accent
|
|
|
82 |
"Icirc" => "\xCE", # capital I, circumflex accent
|
|
|
83 |
"icirc" => "\xEE", # small i, circumflex accent
|
|
|
84 |
"Igrave" => "\xCD", # capital I, grave accent
|
|
|
85 |
"igrave" => "\xED", # small i, grave accent
|
|
|
86 |
"Iuml" => "\xCF", # capital I, dieresis or umlaut mark
|
|
|
87 |
"iuml" => "\xEF", # small i, dieresis or umlaut mark
|
|
|
88 |
"Ntilde" => "\xD1", # capital N, tilde
|
|
|
89 |
"ntilde" => "\xF1", # small n, tilde
|
|
|
90 |
"Oacute" => "\xD3", # capital O, acute accent
|
|
|
91 |
"oacute" => "\xF3", # small o, acute accent
|
|
|
92 |
"Ocirc" => "\xD4", # capital O, circumflex accent
|
|
|
93 |
"ocirc" => "\xF4", # small o, circumflex accent
|
|
|
94 |
"Ograve" => "\xD2", # capital O, grave accent
|
|
|
95 |
"ograve" => "\xF2", # small o, grave accent
|
|
|
96 |
"Oslash" => "\xD8", # capital O, slash
|
|
|
97 |
"oslash" => "\xF8", # small o, slash
|
|
|
98 |
"Otilde" => "\xD5", # capital O, tilde
|
|
|
99 |
"otilde" => "\xF5", # small o, tilde
|
|
|
100 |
"Ouml" => "\xD6", # capital O, dieresis or umlaut mark
|
|
|
101 |
"ouml" => "\xF6", # small o, dieresis or umlaut mark
|
|
|
102 |
"szlig" => "\xDF", # small sharp s, German (sz ligature)
|
|
|
103 |
"THORN" => "\xDE", # capital THORN, Icelandic
|
|
|
104 |
"thorn" => "\xFE", # small thorn, Icelandic
|
|
|
105 |
"Uacute" => "\xDA", # capital U, acute accent
|
|
|
106 |
"uacute" => "\xFA", # small u, acute accent
|
|
|
107 |
"Ucirc" => "\xDB", # capital U, circumflex accent
|
|
|
108 |
"ucirc" => "\xFB", # small u, circumflex accent
|
|
|
109 |
"Ugrave" => "\xD9", # capital U, grave accent
|
|
|
110 |
"ugrave" => "\xF9", # small u, grave accent
|
|
|
111 |
"Uuml" => "\xDC", # capital U, dieresis or umlaut mark
|
|
|
112 |
"uuml" => "\xFC", # small u, dieresis or umlaut mark
|
|
|
113 |
"Yacute" => "\xDD", # capital Y, acute accent
|
|
|
114 |
"yacute" => "\xFD", # small y, acute accent
|
|
|
115 |
"yuml" => "\xFF", # small y, dieresis or umlaut mark
|
|
|
116 |
|
|
|
117 |
"lchevron" => "\xAB", # left chevron (double less than)
|
|
|
118 |
"rchevron" => "\xBB", # right chevron (double greater than)
|
|
|
119 |
);
|
|
|
120 |
|
|
|
121 |
|
|
|
122 |
############################################################################
|
|
|
123 |
# Initialization
|
|
|
124 |
############################################################################
|
|
|
125 |
|
|
|
126 |
# Initialize the object. Must be sure to call our parent initializer.
|
|
|
127 |
sub initialize {
|
|
|
128 |
my $self = shift;
|
|
|
129 |
|
|
|
130 |
$$self{alt} = 0 unless defined $$self{alt};
|
|
|
131 |
$$self{indent} = 4 unless defined $$self{indent};
|
|
|
132 |
$$self{loose} = 0 unless defined $$self{loose};
|
|
|
133 |
$$self{sentence} = 0 unless defined $$self{sentence};
|
|
|
134 |
$$self{width} = 76 unless defined $$self{width};
|
|
|
135 |
|
|
|
136 |
$$self{INDENTS} = []; # Stack of indentations.
|
|
|
137 |
$$self{MARGIN} = $$self{indent}; # Current left margin in spaces.
|
|
|
138 |
|
|
|
139 |
return $self->SUPER::initialize;
|
|
|
140 |
}
|
|
|
141 |
|
|
|
142 |
|
|
|
143 |
############################################################################
|
|
|
144 |
# Core overrides
|
|
|
145 |
############################################################################
|
|
|
146 |
|
|
|
147 |
# Called for each command paragraph. Gets the command, the associated
|
|
|
148 |
# paragraph, the line number, and a Pod::Paragraph object. Just dispatches
|
|
|
149 |
# the command to a method named the same as the command. =cut is handled
|
|
|
150 |
# internally by Pod::Parser.
|
|
|
151 |
sub command {
|
|
|
152 |
my $self = shift;
|
|
|
153 |
my $command = shift;
|
|
|
154 |
return if $command eq 'pod';
|
|
|
155 |
return if ($$self{EXCLUDE} && $command ne 'end');
|
|
|
156 |
if (defined $$self{ITEM}) {
|
|
|
157 |
$self->item ("\n");
|
|
|
158 |
local $_ = "\n";
|
|
|
159 |
$self->output($_) if($command eq 'back');
|
|
|
160 |
}
|
|
|
161 |
$command = 'cmd_' . $command;
|
|
|
162 |
return $self->$command (@_);
|
|
|
163 |
}
|
|
|
164 |
|
|
|
165 |
# Called for a verbatim paragraph. Gets the paragraph, the line number, and
|
|
|
166 |
# a Pod::Paragraph object. Just output it verbatim, but with tabs converted
|
|
|
167 |
# to spaces.
|
|
|
168 |
sub verbatim {
|
|
|
169 |
my $self = shift;
|
|
|
170 |
return if $$self{EXCLUDE};
|
|
|
171 |
$self->item if defined $$self{ITEM};
|
|
|
172 |
local $_ = shift;
|
|
|
173 |
return if /^\s*$/;
|
|
|
174 |
s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
|
|
|
175 |
return $self->output($_);
|
|
|
176 |
}
|
|
|
177 |
|
|
|
178 |
# Called for a regular text block. Gets the paragraph, the line number, and
|
|
|
179 |
# a Pod::Paragraph object. Perform interpolation and output the results.
|
|
|
180 |
sub textblock {
|
|
|
181 |
my $self = shift;
|
|
|
182 |
return if $$self{EXCLUDE};
|
|
|
183 |
if($$self{VERBATIM}) {
|
|
|
184 |
$self->output($_[0]);
|
|
|
185 |
return;
|
|
|
186 |
}
|
|
|
187 |
local $_ = shift;
|
|
|
188 |
my $line = shift;
|
|
|
189 |
|
|
|
190 |
# Perform a little magic to collapse multiple L<> references. This is
|
|
|
191 |
# here mostly for backwards-compatibility. We'll just rewrite the whole
|
|
|
192 |
# thing into actual text at this part, bypassing the whole internal
|
|
|
193 |
# sequence parsing thing.
|
|
|
194 |
s{
|
|
|
195 |
(
|
|
|
196 |
L< # A link of the form L</something>.
|
|
|
197 |
/
|
|
|
198 |
(
|
|
|
199 |
[:\w]+ # The item has to be a simple word...
|
|
|
200 |
(\(\))? # ...or simple function.
|
|
|
201 |
)
|
|
|
202 |
>
|
|
|
203 |
(
|
|
|
204 |
,?\s+(and\s+)? # Allow lots of them, conjuncted.
|
|
|
205 |
L<
|
|
|
206 |
/
|
|
|
207 |
(
|
|
|
208 |
[:\w]+
|
|
|
209 |
(\(\))?
|
|
|
210 |
)
|
|
|
211 |
>
|
|
|
212 |
)+
|
|
|
213 |
)
|
|
|
214 |
} {
|
|
|
215 |
local $_ = $1;
|
|
|
216 |
s%L</([^>]+)>%$1%g;
|
|
|
217 |
my @items = split /(?:,?\s+(?:and\s+)?)/;
|
|
|
218 |
my $string = "the ";
|
|
|
219 |
my $i;
|
|
|
220 |
for ($i = 0; $i < @items; $i++) {
|
|
|
221 |
$string .= $items[$i];
|
|
|
222 |
$string .= ", " if @items > 2 && $i != $#items;
|
|
|
223 |
$string .= " and " if ($i == $#items - 1);
|
|
|
224 |
}
|
|
|
225 |
$string .= " entries elsewhere in this document";
|
|
|
226 |
$string;
|
|
|
227 |
}gex;
|
|
|
228 |
|
|
|
229 |
# Now actually interpolate and output the paragraph.
|
|
|
230 |
$_ = $self->interpolate ($_, $line);
|
|
|
231 |
s/\s*$/\n/s;
|
|
|
232 |
if (defined $$self{ITEM}) {
|
|
|
233 |
$self->item ($_ . "\n");
|
|
|
234 |
} else {
|
|
|
235 |
$self->output ($self->reformat ($_ . "\n"));
|
|
|
236 |
}
|
|
|
237 |
}
|
|
|
238 |
|
|
|
239 |
# Called for an interior sequence. Gets the command, argument, and a
|
|
|
240 |
# Pod::InteriorSequence object and is expected to return the resulting text.
|
|
|
241 |
# Calls code, bold, italic, file, and link to handle those types of
|
|
|
242 |
# sequences, and handles S<>, E<>, X<>, and Z<> directly.
|
|
|
243 |
sub interior_sequence {
|
|
|
244 |
my $self = shift;
|
|
|
245 |
my $command = shift;
|
|
|
246 |
local $_ = shift;
|
|
|
247 |
return '' if ($command eq 'X' || $command eq 'Z');
|
|
|
248 |
|
|
|
249 |
# Expand escapes into the actual character now, carping if invalid.
|
|
|
250 |
if ($command eq 'E') {
|
|
|
251 |
return $ESCAPES{$_} if defined $ESCAPES{$_};
|
|
|
252 |
carp "Unknown escape: E<$_>";
|
|
|
253 |
return "E<$_>";
|
|
|
254 |
}
|
|
|
255 |
|
|
|
256 |
# For all the other sequences, empty content produces no output.
|
|
|
257 |
return if $_ eq '';
|
|
|
258 |
|
|
|
259 |
# For S<>, compress all internal whitespace and then map spaces to \01.
|
|
|
260 |
# When we output the text, we'll map this back.
|
|
|
261 |
if ($command eq 'S') {
|
|
|
262 |
s/\s{2,}/ /g;
|
|
|
263 |
tr/ /\01/;
|
|
|
264 |
return $_;
|
|
|
265 |
}
|
|
|
266 |
|
|
|
267 |
# Anything else needs to get dispatched to another method.
|
|
|
268 |
if ($command eq 'B') { return $self->seq_b ($_) }
|
|
|
269 |
elsif ($command eq 'C') { return $self->seq_c ($_) }
|
|
|
270 |
elsif ($command eq 'F') { return $self->seq_f ($_) }
|
|
|
271 |
elsif ($command eq 'I') { return $self->seq_i ($_) }
|
|
|
272 |
elsif ($command eq 'L') { return $self->seq_l ($_) }
|
|
|
273 |
else { carp "Unknown sequence $command<$_>" }
|
|
|
274 |
}
|
|
|
275 |
|
|
|
276 |
# Called for each paragraph that's actually part of the POD. We take
|
|
|
277 |
# advantage of this opportunity to untabify the input.
|
|
|
278 |
sub preprocess_paragraph {
|
|
|
279 |
my $self = shift;
|
|
|
280 |
local $_ = shift;
|
|
|
281 |
1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
|
|
|
282 |
return $_;
|
|
|
283 |
}
|
|
|
284 |
|
|
|
285 |
|
|
|
286 |
############################################################################
|
|
|
287 |
# Command paragraphs
|
|
|
288 |
############################################################################
|
|
|
289 |
|
|
|
290 |
# All command paragraphs take the paragraph and the line number.
|
|
|
291 |
|
|
|
292 |
# First level heading.
|
|
|
293 |
sub cmd_head1 {
|
|
|
294 |
my $self = shift;
|
|
|
295 |
local $_ = shift;
|
|
|
296 |
s/\s+$//s;
|
|
|
297 |
$_ = $self->interpolate ($_, shift);
|
|
|
298 |
if ($$self{alt}) {
|
|
|
299 |
$self->output ("\n==== $_ ====\n\n");
|
|
|
300 |
} else {
|
|
|
301 |
$_ .= "\n" if $$self{loose};
|
|
|
302 |
$self->output ($_ . "\n");
|
|
|
303 |
}
|
|
|
304 |
}
|
|
|
305 |
|
|
|
306 |
# Second level heading.
|
|
|
307 |
sub cmd_head2 {
|
|
|
308 |
my $self = shift;
|
|
|
309 |
local $_ = shift;
|
|
|
310 |
s/\s+$//s;
|
|
|
311 |
$_ = $self->interpolate ($_, shift);
|
|
|
312 |
if ($$self{alt}) {
|
|
|
313 |
$self->output ("\n== $_ ==\n\n");
|
|
|
314 |
} else {
|
|
|
315 |
$_ .= "\n" if $$self{loose};
|
|
|
316 |
$self->output (' ' x ($$self{indent} / 2) . $_ . "\n");
|
|
|
317 |
}
|
|
|
318 |
}
|
|
|
319 |
|
|
|
320 |
# third level heading - not strictly perlpodspec compliant
|
|
|
321 |
sub cmd_head3 {
|
|
|
322 |
my $self = shift;
|
|
|
323 |
local $_ = shift;
|
|
|
324 |
s/\s+$//s;
|
|
|
325 |
$_ = $self->interpolate ($_, shift);
|
|
|
326 |
if ($$self{alt}) {
|
|
|
327 |
$self->output ("\n= $_ =\n");
|
|
|
328 |
} else {
|
|
|
329 |
$_ .= "\n" if $$self{loose};
|
|
|
330 |
$self->output (' ' x ($$self{indent}) . $_ . "\n");
|
|
|
331 |
}
|
|
|
332 |
}
|
|
|
333 |
|
|
|
334 |
# fourth level heading - not strictly perlpodspec compliant
|
|
|
335 |
# just like head3
|
|
|
336 |
*cmd_head4 = \&cmd_head3;
|
|
|
337 |
|
|
|
338 |
# Start a list.
|
|
|
339 |
sub cmd_over {
|
|
|
340 |
my $self = shift;
|
|
|
341 |
local $_ = shift;
|
|
|
342 |
unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
|
|
|
343 |
push (@{ $$self{INDENTS} }, $$self{MARGIN});
|
|
|
344 |
$$self{MARGIN} += ($_ + 0);
|
|
|
345 |
}
|
|
|
346 |
|
|
|
347 |
# End a list.
|
|
|
348 |
sub cmd_back {
|
|
|
349 |
my $self = shift;
|
|
|
350 |
$$self{MARGIN} = pop @{ $$self{INDENTS} };
|
|
|
351 |
unless (defined $$self{MARGIN}) {
|
|
|
352 |
carp 'Unmatched =back';
|
|
|
353 |
$$self{MARGIN} = $$self{indent};
|
|
|
354 |
}
|
|
|
355 |
}
|
|
|
356 |
|
|
|
357 |
# An individual list item.
|
|
|
358 |
sub cmd_item {
|
|
|
359 |
my $self = shift;
|
|
|
360 |
if (defined $$self{ITEM}) { $self->item }
|
|
|
361 |
local $_ = shift;
|
|
|
362 |
s/\s+$//s;
|
|
|
363 |
$$self{ITEM} = $self->interpolate ($_);
|
|
|
364 |
}
|
|
|
365 |
|
|
|
366 |
# Begin a block for a particular translator. Setting VERBATIM triggers
|
|
|
367 |
# special handling in textblock().
|
|
|
368 |
sub cmd_begin {
|
|
|
369 |
my $self = shift;
|
|
|
370 |
local $_ = shift;
|
|
|
371 |
my ($kind) = /^(\S+)/ or return;
|
|
|
372 |
if ($kind eq 'text') {
|
|
|
373 |
$$self{VERBATIM} = 1;
|
|
|
374 |
} else {
|
|
|
375 |
$$self{EXCLUDE} = 1;
|
|
|
376 |
}
|
|
|
377 |
}
|
|
|
378 |
|
|
|
379 |
# End a block for a particular translator. We assume that all =begin/=end
|
|
|
380 |
# pairs are properly closed.
|
|
|
381 |
sub cmd_end {
|
|
|
382 |
my $self = shift;
|
|
|
383 |
$$self{EXCLUDE} = 0;
|
|
|
384 |
$$self{VERBATIM} = 0;
|
|
|
385 |
}
|
|
|
386 |
|
|
|
387 |
# One paragraph for a particular translator. Ignore it unless it's intended
|
|
|
388 |
# for text, in which case we treat it as a verbatim text block.
|
|
|
389 |
sub cmd_for {
|
|
|
390 |
my $self = shift;
|
|
|
391 |
local $_ = shift;
|
|
|
392 |
my $line = shift;
|
|
|
393 |
return unless s/^text\b[ \t]*\n?//;
|
|
|
394 |
$self->verbatim ($_, $line);
|
|
|
395 |
}
|
|
|
396 |
|
|
|
397 |
|
|
|
398 |
############################################################################
|
|
|
399 |
# Interior sequences
|
|
|
400 |
############################################################################
|
|
|
401 |
|
|
|
402 |
# The simple formatting ones. These are here mostly so that subclasses can
|
|
|
403 |
# override them and do more complicated things.
|
|
|
404 |
sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
|
|
|
405 |
sub seq_c { return $_[0]{alt} ? "``$_[1]''" : "`$_[1]'" }
|
|
|
406 |
sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
|
|
|
407 |
sub seq_i { return '*' . $_[1] . '*' }
|
|
|
408 |
|
|
|
409 |
# The complicated one. Handle links. Since this is plain text, we can't
|
|
|
410 |
# actually make any real links, so this is all to figure out what text we
|
|
|
411 |
# print out.
|
|
|
412 |
sub seq_l {
|
|
|
413 |
my $self = shift;
|
|
|
414 |
local $_ = shift;
|
|
|
415 |
|
|
|
416 |
# Smash whitespace in case we were split across multiple lines.
|
|
|
417 |
s/\s+/ /g;
|
|
|
418 |
|
|
|
419 |
# If we were given any explicit text, just output it.
|
|
|
420 |
if (/^([^|]+)\|/) { return $1 }
|
|
|
421 |
|
|
|
422 |
# Okay, leading and trailing whitespace isn't important; get rid of it.
|
|
|
423 |
s/^\s+//;
|
|
|
424 |
s/\s+$//;
|
|
|
425 |
|
|
|
426 |
# Default to using the whole content of the link entry as a section
|
|
|
427 |
# name. Note that L<manpage/> forces a manpage interpretation, as does
|
|
|
428 |
# something looking like L<manpage(section)>. The latter is an
|
|
|
429 |
# enhancement over the original Pod::Text.
|
|
|
430 |
my ($manpage, $section) = ('', $_);
|
|
|
431 |
if (/^(?:https?|ftp|news):/) {
|
|
|
432 |
# a URL
|
|
|
433 |
return $_;
|
|
|
434 |
} elsif (/^"\s*(.*?)\s*"$/) {
|
|
|
435 |
$section = '"' . $1 . '"';
|
|
|
436 |
} elsif (m/^[-:.\w]+(?:\(\S+\))?$/) {
|
|
|
437 |
($manpage, $section) = ($_, '');
|
|
|
438 |
} elsif (m{/}) {
|
|
|
439 |
($manpage, $section) = split (/\s*\/\s*/, $_, 2);
|
|
|
440 |
}
|
|
|
441 |
|
|
|
442 |
my $text = '';
|
|
|
443 |
# Now build the actual output text.
|
|
|
444 |
if (!length $section) {
|
|
|
445 |
$text = "the $manpage manpage" if length $manpage;
|
|
|
446 |
} elsif ($section =~ /^[:\w]+(?:\(\))?/) {
|
|
|
447 |
$text .= 'the ' . $section . ' entry';
|
|
|
448 |
$text .= (length $manpage) ? " in the $manpage manpage"
|
|
|
449 |
: ' elsewhere in this document';
|
|
|
450 |
} else {
|
|
|
451 |
$section =~ s/^\"\s*//;
|
|
|
452 |
$section =~ s/\s*\"$//;
|
|
|
453 |
$text .= 'the section on "' . $section . '"';
|
|
|
454 |
$text .= " in the $manpage manpage" if length $manpage;
|
|
|
455 |
}
|
|
|
456 |
return $text;
|
|
|
457 |
}
|
|
|
458 |
|
|
|
459 |
|
|
|
460 |
############################################################################
|
|
|
461 |
# List handling
|
|
|
462 |
############################################################################
|
|
|
463 |
|
|
|
464 |
# This method is called whenever an =item command is complete (in other
|
|
|
465 |
# words, we've seen its associated paragraph or know for certain that it
|
|
|
466 |
# doesn't have one). It gets the paragraph associated with the item as an
|
|
|
467 |
# argument. If that argument is empty, just output the item tag; if it
|
|
|
468 |
# contains a newline, output the item tag followed by the newline.
|
|
|
469 |
# Otherwise, see if there's enough room for us to output the item tag in the
|
|
|
470 |
# margin of the text or if we have to put it on a separate line.
|
|
|
471 |
sub item {
|
|
|
472 |
my $self = shift;
|
|
|
473 |
local $_ = shift;
|
|
|
474 |
my $tag = $$self{ITEM};
|
|
|
475 |
unless (defined $tag) {
|
|
|
476 |
carp 'item called without tag';
|
|
|
477 |
return;
|
|
|
478 |
}
|
|
|
479 |
undef $$self{ITEM};
|
|
|
480 |
my $indent = $$self{INDENTS}[-1];
|
|
|
481 |
unless (defined $indent) { $indent = $$self{indent} }
|
|
|
482 |
my $space = ' ' x $indent;
|
|
|
483 |
$space =~ s/^ /:/ if $$self{alt};
|
|
|
484 |
if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
|
|
|
485 |
my $margin = $$self{MARGIN};
|
| 315 |
dpurdie |
486 |
$$self{MARGIN} = $indent;
|
| 311 |
dpurdie |
487 |
my $output = $self->reformat ($tag);
|
|
|
488 |
$output =~ s/\n*$/\n/;
|
|
|
489 |
$self->output ($output);
|
|
|
490 |
$$self{MARGIN} = $margin;
|
|
|
491 |
$self->output ($self->reformat ($_)) if /\S/;
|
|
|
492 |
} else {
|
|
|
493 |
$_ = $self->reformat ($_);
|
|
|
494 |
s/^ /:/ if ($$self{alt} && $indent > 0);
|
|
|
495 |
my $tagspace = ' ' x length $tag;
|
|
|
496 |
s/^($space)$tagspace/$1$tag/ or carp 'Bizarre space in item';
|
|
|
497 |
$self->output ($_);
|
|
|
498 |
}
|
|
|
499 |
}
|
|
|
500 |
|
|
|
501 |
|
|
|
502 |
############################################################################
|
|
|
503 |
# Output formatting
|
|
|
504 |
############################################################################
|
|
|
505 |
|
|
|
506 |
# Wrap a line, indenting by the current left margin. We can't use
|
|
|
507 |
# Text::Wrap because it plays games with tabs. We can't use formline, even
|
|
|
508 |
# though we'd really like to, because it screws up non-printing characters.
|
|
|
509 |
# So we have to do the wrapping ourselves.
|
|
|
510 |
sub wrap {
|
|
|
511 |
my $self = shift;
|
|
|
512 |
local $_ = shift;
|
|
|
513 |
my $output = '';
|
|
|
514 |
my $spaces = ' ' x $$self{MARGIN};
|
|
|
515 |
my $width = $$self{width} - $$self{MARGIN};
|
|
|
516 |
while (length > $width) {
|
|
|
517 |
if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
|
|
|
518 |
$output .= $spaces . $1 . "\n";
|
|
|
519 |
} else {
|
|
|
520 |
last;
|
|
|
521 |
}
|
|
|
522 |
}
|
|
|
523 |
$output .= $spaces . $_;
|
|
|
524 |
$output =~ s/\s+$/\n\n/;
|
|
|
525 |
return $output;
|
|
|
526 |
}
|
|
|
527 |
|
|
|
528 |
# Reformat a paragraph of text for the current margin. Takes the text to
|
|
|
529 |
# reformat and returns the formatted text.
|
|
|
530 |
sub reformat {
|
|
|
531 |
my $self = shift;
|
|
|
532 |
local $_ = shift;
|
|
|
533 |
|
|
|
534 |
# If we're trying to preserve two spaces after sentences, do some
|
|
|
535 |
# munging to support that. Otherwise, smash all repeated whitespace.
|
|
|
536 |
if ($$self{sentence}) {
|
|
|
537 |
s/ +$//mg;
|
|
|
538 |
s/\.\n/. \n/g;
|
|
|
539 |
s/\n/ /g;
|
|
|
540 |
s/ +/ /g;
|
|
|
541 |
} else {
|
|
|
542 |
s/\s+/ /g;
|
|
|
543 |
}
|
|
|
544 |
return $self->wrap($_);
|
|
|
545 |
}
|
|
|
546 |
|
|
|
547 |
# Output text to the output device.
|
|
|
548 |
sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
|
|
|
549 |
|
|
|
550 |
|
|
|
551 |
############################################################################
|
|
|
552 |
# Backwards compatibility
|
|
|
553 |
############################################################################
|
|
|
554 |
|
|
|
555 |
# The old Pod::Text module did everything in a pod2text() function. This
|
|
|
556 |
# tries to provide the same interface for legacy applications.
|
|
|
557 |
sub pod2text {
|
|
|
558 |
my @args;
|
|
|
559 |
|
|
|
560 |
# This is really ugly; I hate doing option parsing in the middle of a
|
|
|
561 |
# module. But the old Pod::Text module supported passing flags to its
|
|
|
562 |
# entry function, so handle -a and -<number>.
|
|
|
563 |
while ($_[0] =~ /^-/) {
|
|
|
564 |
my $flag = shift;
|
|
|
565 |
if ($flag eq '-a') { push (@args, alt => 1) }
|
|
|
566 |
elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
|
|
|
567 |
else {
|
|
|
568 |
unshift (@_, $flag);
|
|
|
569 |
last;
|
|
|
570 |
}
|
|
|
571 |
}
|
|
|
572 |
|
|
|
573 |
# Now that we know what arguments we're using, create the parser.
|
|
|
574 |
my $parser = Pod::PlainText->new (@args);
|
|
|
575 |
|
|
|
576 |
# If two arguments were given, the second argument is going to be a file
|
|
|
577 |
# handle. That means we want to call parse_from_filehandle(), which
|
|
|
578 |
# means we need to turn the first argument into a file handle. Magic
|
|
|
579 |
# open will handle the <&STDIN case automagically.
|
|
|
580 |
if (defined $_[1]) {
|
|
|
581 |
my $infh;
|
|
|
582 |
if ($] < 5.006) {
|
|
|
583 |
$infh = gensym();
|
|
|
584 |
}
|
|
|
585 |
unless (open ($infh, $_[0])) {
|
|
|
586 |
croak ("Can't open $_[0] for reading: $!\n");
|
|
|
587 |
}
|
|
|
588 |
$_[0] = $infh;
|
|
|
589 |
return $parser->parse_from_filehandle (@_);
|
|
|
590 |
} else {
|
|
|
591 |
return $parser->parse_from_file (@_);
|
|
|
592 |
}
|
|
|
593 |
}
|
|
|
594 |
|
|
|
595 |
|
|
|
596 |
############################################################################
|
|
|
597 |
# Module return value and documentation
|
|
|
598 |
############################################################################
|
|
|
599 |
|
|
|
600 |
1;
|
|
|
601 |
__END__
|
|
|
602 |
|
|
|
603 |
=head1 NAME
|
|
|
604 |
|
|
|
605 |
Pod::PlainText - Convert POD data to formatted ASCII text
|
|
|
606 |
|
|
|
607 |
=head1 SYNOPSIS
|
|
|
608 |
|
|
|
609 |
use Pod::PlainText;
|
|
|
610 |
my $parser = Pod::PlainText->new (sentence => 0, width => 78);
|
|
|
611 |
|
|
|
612 |
# Read POD from STDIN and write to STDOUT.
|
|
|
613 |
$parser->parse_from_filehandle;
|
|
|
614 |
|
|
|
615 |
# Read POD from file.pod and write to file.txt.
|
|
|
616 |
$parser->parse_from_file ('file.pod', 'file.txt');
|
|
|
617 |
|
|
|
618 |
=head1 DESCRIPTION
|
|
|
619 |
|
|
|
620 |
Pod::PlainText is a module that can convert documentation in the POD format (the
|
|
|
621 |
preferred language for documenting Perl) into formatted ASCII. It uses no
|
|
|
622 |
special formatting controls or codes whatsoever, and its output is therefore
|
|
|
623 |
suitable for nearly any device.
|
|
|
624 |
|
|
|
625 |
As a derived class from Pod::Parser, Pod::PlainText supports the same methods and
|
|
|
626 |
interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
|
|
|
627 |
new parser with C<Pod::PlainText-E<gt>new()> and then calls either
|
|
|
628 |
parse_from_filehandle() or parse_from_file().
|
|
|
629 |
|
|
|
630 |
new() can take options, in the form of key/value pairs, that control the
|
|
|
631 |
behavior of the parser. The currently recognized options are:
|
|
|
632 |
|
|
|
633 |
=over 4
|
|
|
634 |
|
|
|
635 |
=item alt
|
|
|
636 |
|
|
|
637 |
If set to a true value, selects an alternate output format that, among other
|
|
|
638 |
things, uses a different heading style and marks C<=item> entries with a
|
|
|
639 |
colon in the left margin. Defaults to false.
|
|
|
640 |
|
|
|
641 |
=item indent
|
|
|
642 |
|
|
|
643 |
The number of spaces to indent regular text, and the default indentation for
|
|
|
644 |
C<=over> blocks. Defaults to 4.
|
|
|
645 |
|
|
|
646 |
=item loose
|
|
|
647 |
|
|
|
648 |
If set to a true value, a blank line is printed after a C<=headN> headings.
|
|
|
649 |
If set to false (the default), no blank line is printed after C<=headN>.
|
|
|
650 |
This is the default because it's the expected formatting for manual pages;
|
|
|
651 |
if you're formatting arbitrary text documents, setting this to true may
|
|
|
652 |
result in more pleasing output.
|
|
|
653 |
|
|
|
654 |
=item sentence
|
|
|
655 |
|
|
|
656 |
If set to a true value, Pod::PlainText will assume that each sentence ends in two
|
|
|
657 |
spaces, and will try to preserve that spacing. If set to false, all
|
|
|
658 |
consecutive whitespace in non-verbatim paragraphs is compressed into a
|
|
|
659 |
single space. Defaults to true.
|
|
|
660 |
|
|
|
661 |
=item width
|
|
|
662 |
|
|
|
663 |
The column at which to wrap text on the right-hand side. Defaults to 76.
|
|
|
664 |
|
|
|
665 |
=back
|
|
|
666 |
|
|
|
667 |
The standard Pod::Parser method parse_from_filehandle() takes up to two
|
|
|
668 |
arguments, the first being the file handle to read POD from and the second
|
|
|
669 |
being the file handle to write the formatted output to. The first defaults
|
|
|
670 |
to STDIN if not given, and the second defaults to STDOUT. The method
|
|
|
671 |
parse_from_file() is almost identical, except that its two arguments are the
|
|
|
672 |
input and output disk files instead. See L<Pod::Parser> for the specific
|
|
|
673 |
details.
|
|
|
674 |
|
|
|
675 |
=head1 DIAGNOSTICS
|
|
|
676 |
|
|
|
677 |
=over 4
|
|
|
678 |
|
|
|
679 |
=item Bizarre space in item
|
|
|
680 |
|
|
|
681 |
(W) Something has gone wrong in internal C<=item> processing. This message
|
|
|
682 |
indicates a bug in Pod::PlainText; you should never see it.
|
|
|
683 |
|
|
|
684 |
=item Can't open %s for reading: %s
|
|
|
685 |
|
|
|
686 |
(F) Pod::PlainText was invoked via the compatibility mode pod2text() interface
|
|
|
687 |
and the input file it was given could not be opened.
|
|
|
688 |
|
|
|
689 |
=item Unknown escape: %s
|
|
|
690 |
|
|
|
691 |
(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::PlainText didn't
|
|
|
692 |
know about.
|
|
|
693 |
|
|
|
694 |
=item Unknown sequence: %s
|
|
|
695 |
|
|
|
696 |
(W) The POD source contained a non-standard internal sequence (something of
|
|
|
697 |
the form C<XE<lt>E<gt>>) that Pod::PlainText didn't know about.
|
|
|
698 |
|
|
|
699 |
=item Unmatched =back
|
|
|
700 |
|
|
|
701 |
(W) Pod::PlainText encountered a C<=back> command that didn't correspond to an
|
|
|
702 |
C<=over> command.
|
|
|
703 |
|
|
|
704 |
=back
|
|
|
705 |
|
|
|
706 |
=head1 RESTRICTIONS
|
|
|
707 |
|
|
|
708 |
Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
|
|
|
709 |
output, due to an internal implementation detail.
|
|
|
710 |
|
|
|
711 |
=head1 NOTES
|
|
|
712 |
|
|
|
713 |
This is a replacement for an earlier Pod::Text module written by Tom
|
|
|
714 |
Christiansen. It has a revamped interface, since it now uses Pod::Parser,
|
|
|
715 |
but an interface roughly compatible with the old Pod::Text::pod2text()
|
|
|
716 |
function is still available. Please change to the new calling convention,
|
|
|
717 |
though.
|
|
|
718 |
|
|
|
719 |
The original Pod::Text contained code to do formatting via termcap
|
|
|
720 |
sequences, although it wasn't turned on by default and it was problematic to
|
|
|
721 |
get it to work at all. This rewrite doesn't even try to do that, but a
|
|
|
722 |
subclass of it does. Look for L<Pod::Text::Termcap|Pod::Text::Termcap>.
|
|
|
723 |
|
|
|
724 |
=head1 SEE ALSO
|
|
|
725 |
|
|
|
726 |
L<Pod::Parser|Pod::Parser>, L<Pod::Text::Termcap|Pod::Text::Termcap>,
|
|
|
727 |
pod2text(1)
|
|
|
728 |
|
|
|
729 |
=head1 AUTHOR
|
|
|
730 |
|
|
|
731 |
Please report bugs using L<http://rt.cpan.org>.
|
|
|
732 |
|
|
|
733 |
Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the
|
|
|
734 |
original Pod::Text by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> and
|
|
|
735 |
its conversion to Pod::Parser by Brad Appleton
|
|
|
736 |
E<lt>bradapp@enteract.comE<gt>.
|
|
|
737 |
|
|
|
738 |
=cut
|