Profile of Pod/Simple/BlackBox.pm

Filename	/usr/share/perl/5.36/Pod/Simple/BlackBox.pm
Statements	Executed 105 statements in 7.66ms

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
9	9	2	622µs	883µs	Pod::Simple::BlackBox::::my_qrPod::Simple::BlackBox::my_qr
9	9	9	66µs	66µs	Pod::Simple::BlackBox::::CORE:matchPod::Simple::BlackBox::CORE:match (opcode)
1	1	1	11µs	15µs	Pod::Simple::BlackBox::::BEGIN@289Pod::Simple::BlackBox::BEGIN@289
1	1	1	10µs	13µs	Pod::Simple::BlackBox::::BEGIN@1520Pod::Simple::BlackBox::BEGIN@1520
10	10	10	8µs	8µs	Pod::Simple::BlackBox::::CORE:qrPod::Simple::BlackBox::CORE:qr (opcode)
1	1	1	7µs	8µs	Pod::Simple::BlackBox::::BEGIN@21Pod::Simple::BlackBox::BEGIN@21
1	1	1	5µs	14µs	Pod::Simple::BlackBox::::BEGIN@273Pod::Simple::BlackBox::BEGIN@273
1	1	1	4µs	16µs	Pod::Simple::BlackBox::::BEGIN@67Pod::Simple::BlackBox::BEGIN@67
1	1	1	3µs	4µs	Pod::Simple::BlackBox::::BEGIN@22Pod::Simple::BlackBox::BEGIN@22
1	1	1	3µs	19µs	Pod::Simple::BlackBox::::BEGIN@24Pod::Simple::BlackBox::BEGIN@24
1	1	1	3µs	3µs	Pod::Simple::BlackBox::::BEGIN@55Pod::Simple::BlackBox::BEGIN@55
1	1	1	2µs	2µs	Pod::Simple::BlackBox::::BEGIN@23Pod::Simple::BlackBox::BEGIN@23
0	0	0	0s	0s	Pod::Simple::BlackBox::_closers_for_all_curr_open
0	0	0	0s	0s	Pod::Simple::BlackBox::_dump_curr_open
0	0	0	0s	0s	Pod::Simple::BlackBox::::_gen_errataPod::Simple::BlackBox::_gen_errata
0	0	0	0s	0s	Pod::Simple::BlackBox::_handle_encoding_line
0	0	0	0s	0s	Pod::Simple::BlackBox::_handle_encoding_second_level
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_DataPod::Simple::BlackBox::_ponder_Data
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_PlainPod::Simple::BlackBox::_ponder_Plain
0	0	0	0s	0s	Pod::Simple::BlackBox::_ponder_Verbatim
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_backPod::Simple::BlackBox::_ponder_back
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_beginPod::Simple::BlackBox::_ponder_begin
0	0	0	0s	0s	Pod::Simple::BlackBox::_ponder_doc_end
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_endPod::Simple::BlackBox::_ponder_end
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_forPod::Simple::BlackBox::_ponder_for
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_itemPod::Simple::BlackBox::_ponder_item
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_overPod::Simple::BlackBox::_ponder_over
0	0	0	0s	0s	Pod::Simple::BlackBox::_ponder_paragraph_buffer
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_podPod::Simple::BlackBox::_ponder_pod
0	0	0	0s	0s	Pod::Simple::BlackBox::::_stringify_lolPod::Simple::BlackBox::_stringify_lol
0	0	0	0s	0s	Pod::Simple::BlackBox::_traverse_treelet_bit
0	0	0	0s	0s	Pod::Simple::BlackBox::_treelet_from_formatting_codes
0	0	0	0s	0s	Pod::Simple::BlackBox::_verbatim_format
0	0	0	0s	0s	Pod::Simple::BlackBox::::parse_linePod::Simple::BlackBox::parse_line
0	0	0	0s	0s	Pod::Simple::BlackBox::::parse_linesPod::Simple::BlackBox::parse_lines
0	0	0	0s	0s	Pod::Simple::BlackBox::::prettyPod::Simple::BlackBox::pretty
0	0	0	0s	0s	Pod::Simple::BlackBox::::reinitPod::Simple::BlackBox::reinit
0	0	0	0s	0s	Pod::Simple::BlackBox::::stringify_lolPod::Simple::BlackBox::stringify_lol
0	0	0	0s	0s	Pod::Simple::BlackBox::text_content_of_treelet

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1					package Pod::Simple::BlackBox;
2					#
3					# "What's in the box?" "Pain."
4					#
5					###########################################################################
6					#
7					# This is where all the scary things happen: parsing lines into
8					# paragraphs; and then into directives, verbatims, and then also
9					# turning formatting sequences into treelets.
10					#
11					# Are you really sure you want to read this code?
12					#
13					#-----------------------------------------------------------------------------
14					#
15					# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
16					# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
17					# to call the proper callbacks on the treelets.
18					#
19					# Every node in a treelet is a ['name', {attrhash}, ...children...]
20
21	2	13µs	2	9µs	# spent 8µs (7+900ns) within Pod::Simple::BlackBox::BEGIN@21 which was called: # once (7µs+900ns) by Pod::Simple::LinkSection::BEGIN@7 at line 21 use integer; # vroom! # spent 8µs making 1 call to Pod::Simple::BlackBox::BEGIN@21 # spent 900ns making 1 call to integer::import
22	2	12µs	2	4µs	# spent 4µs (3+600ns) within Pod::Simple::BlackBox::BEGIN@22 which was called: # once (3µs+600ns) by Pod::Simple::LinkSection::BEGIN@7 at line 22 use strict; # spent 4µs making 1 call to Pod::Simple::BlackBox::BEGIN@22 # spent 600ns making 1 call to strict::import
23	2	12µs	1	2µs	# spent 2µs within Pod::Simple::BlackBox::BEGIN@23 which was called: # once (2µs+0s) by Pod::Simple::LinkSection::BEGIN@7 at line 23 use Carp (); # spent 2µs making 1 call to Pod::Simple::BlackBox::BEGIN@23
24	2	89µs	2	34µs	# spent 19µs (3+16) within Pod::Simple::BlackBox::BEGIN@24 which was called: # once (3µs+16µs) by Pod::Simple::LinkSection::BEGIN@7 at line 24 use vars qw($VERSION ); # spent 19µs making 1 call to Pod::Simple::BlackBox::BEGIN@24 # spent 16µs making 1 call to vars::import
25	1	300ns			$VERSION = '3.43';
26					#use constant DEBUG => 7;
27
28					# spent 883µs (622+261) within Pod::Simple::BlackBox::my_qr which was called 9 times, avg 98µs/call: # once (96µs+35µs) by Pod::Text::BEGIN@26 at line 351 of Pod/Simple.pm # once (87µs+38µs) by Pod::Simple::LinkSection::BEGIN@7 at line 63 # once (77µs+25µs) by Pod::Simple::LinkSection::BEGIN@7 at line 88 # once (58µs+37µs) by Pod::Simple::LinkSection::BEGIN@7 at line 76 # once (69µs+24µs) by Pod::Simple::LinkSection::BEGIN@7 at line 67 # once (60µs+28µs) by Pod::Simple::LinkSection::BEGIN@7 at line 68 # once (63µs+24µs) by Pod::Simple::LinkSection::BEGIN@7 at line 85 # once (59µs+27µs) by Pod::Simple::LinkSection::BEGIN@7 at line 70 # once (54µs+22µs) by Pod::Simple::LinkSection::BEGIN@7 at line 92 sub my_qr ($$) {
29
30					# $1 is a pattern to compile and return. Older perls compile any
31					# syntactically valid property, even if it isn't legal. To cope with
32					# this, return an empty string unless the compiled pattern also
33					# successfully matches $2, which the caller furnishes.
34
35	9	3µs			my ($input_re, $should_match) = @_;
36					# XXX could have a third parameter $shouldnt_match for extra safety
37
38	9	16µs			my $use_utf8 = ($] le 5.006002) ? 'use utf8;' : "";
39
40	9	154µs			my $re = eval "no warnings; $use_utf8 qr/$input_re/"; # spent 34µs executing statements in string eval # includes 8µs spent executing 1 call to 1 sub defined therein. # spent 29µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 25µs executing statements in string eval # includes 8µs spent executing 1 call to 1 sub defined therein. # spent 20µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 18µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 17µs executing statements in string eval # includes 3µs spent executing 1 call to 1 sub defined therein. # spent 16µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 15µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 15µs executing statements in string eval # includes 3µs spent executing 1 call to 1 sub defined therein.
41					#print STDERR __LINE__, ": $input_re: $@\n" if $@;
42	9	1µs			return "" if $@;
43
44	9	140µs			my $matches = eval "no warnings; $use_utf8 '$should_match' =~ /$re/"; # spent 38µs executing statements in string eval # includes 3µs spent executing 1 call to 1 sub defined therein. # spent 31µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 31µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 28µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 27µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 25µs executing statements in string eval # includes 5µs spent executing 1 call to 1 sub defined therein. # spent 24µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 21µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 21µs executing statements in string eval # includes 6µs spent executing 1 call to 1 sub defined therein.
45					#print STDERR __LINE__, ": $input_re: $@\n" if $@;
46	9	1µs			return "" if $@;
47
48					#print STDERR __LINE__, ": SUCCESS: $re\n" if $matches;
49	9	16µs			return $re if $matches;
50
51					#print STDERR __LINE__, ": $re: didn't match\n";
52					return "";
53					}
54
55					# spent 3µs within Pod::Simple::BlackBox::BEGIN@55 which was called: # once (3µs+0s) by Pod::Simple::LinkSection::BEGIN@7 at line 58 BEGIN {
56	1	500ns			require Pod::Simple;
57	1	3µs			*DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
58	1	38µs	1	3µs	} # spent 3µs making 1 call to Pod::Simple::BlackBox::BEGIN@55
59
60					# Matches a character iff the character will have a different meaning
61					# if we choose CP1252 vs UTF-8 if there is no =encoding line.
62					# This is broken for early Perls on non-ASCII platforms.
63	1	2µs	1	125µs	my $non_ascii_re = my_qr('[[:^ascii:]]', "\xB6"); # spent 125µs making 1 call to Pod::Simple::BlackBox::my_qr
64	1	200ns			$non_ascii_re = qr/[\x80-\xFF]/ unless $non_ascii_re;
65
66					# Use patterns understandable by Perl 5.6, if possible
67	4	481µs	3	121µs	# spent 16µs (4+12) within Pod::Simple::BlackBox::BEGIN@67 which was called: # once (4µs+12µs) by Pod::Simple::LinkSection::BEGIN@7 at line 67 my $cs_re = do { no warnings; my_qr('\p{IsCs}', "\x{D800}") }; # spent 93µs making 1 call to Pod::Simple::BlackBox::my_qr # spent 16µs making 1 call to Pod::Simple::BlackBox::BEGIN@67 # spent 12µs making 1 call to warnings::unimport
68	1	800ns	1	87µs	my $cn_re = my_qr('\p{IsCn}', "\x{09E4}"); # <reserved> code point unlikely # spent 87µs making 1 call to Pod::Simple::BlackBox::my_qr
69					# to get assigned
70	1	800ns	1	86µs	my $rare_blocks_re = my_qr('[\p{InIPAExtensions}\p{InSpacingModifierLetters}]', # spent 86µs making 1 call to Pod::Simple::BlackBox::my_qr
71					"\x{250}");
72	1	100ns			$rare_blocks_re = my_qr('[\x{0250}-\x{02FF}]', "\x{250}") unless $rare_blocks_re;
73
74	1	16µs			my $script_run_re = eval 'no warnings "experimental::script_run"; # spent 18µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein.
75					qr/(script_run: ^ . $ )/x';
76	1	900ns	1	95µs	my $latin_re = my_qr('[\p{IsLatin}\p{IsInherited}\p{IsCommon}]', "\x{100}"); # spent 95µs making 1 call to Pod::Simple::BlackBox::my_qr
77	1	200ns			unless ($latin_re) {
78					# This was machine generated to be the ranges of the union of the above
79					# three properties, with things that were undefined by Unicode 4.1 filling
80					# gaps. That is the version in use when Perl advanced enough to
81					# successfully compile and execute the above pattern.
82					$latin_re = my_qr('[\x00-\x{02E9}\x{02EC}-\x{0374}\x{037E}\x{0385}\x{0387}\x{0485}\x{0486}\x{0589}\x{060C}\x{061B}\x{061F}\x{0640}\x{064B}-\x{0655}\x{0670}\x{06DD}\x{0951}-\x{0954}\x{0964}\x{0965}\x{0E3F}\x{10FB}\x{16EB}-\x{16ED}\x{1735}\x{1736}\x{1802}\x{1803}\x{1805}\x{1D00}-\x{1D25}\x{1D2C}-\x{1D5C}\x{1D62}-\x{1D65}\x{1D6B}-\x{1D77}\x{1D79}-\x{1DBE}\x{1DC0}-\x{1EF9}\x{2000}-\x{2125}\x{2127}-\x{27FF}\x{2900}-\x{2B13}\x{2E00}-\x{2E1D}\x{2FF0}-\x{3004}\x{3006}\x{3008}-\x{3020}\x{302A}-\x{302D}\x{3030}-\x{3037}\x{303C}-\x{303F}\x{3099}-\x{309C}\x{30A0}\x{30FB}\x{30FC}\x{3190}-\x{319F}\x{31C0}-\x{31CF}\x{3220}-\x{325F}\x{327F}-\x{32CF}\x{3358}-\x{33FF}\x{4DC0}-\x{4DFF}\x{A700}-\x{A716}\x{FB00}-\x{FB06}\x{FD3E}\x{FD3F}\x{FE00}-\x{FE6B}\x{FEFF}-\x{FF65}\x{FF70}\x{FF9E}\x{FF9F}\x{FFE0}-\x{FFFD}\x{10100}-\x{1013F}\x{1D000}-\x{1D1DD}\x{1D300}-\x{1D7FF}]', "\x{100}");
83					}
84
85	1	1µs	1	87µs	my $every_char_is_latin_re = my_qr("^(?:$latin_re)*\\z", "A"); # spent 87µs making 1 call to Pod::Simple::BlackBox::my_qr
86
87					# Latin script code points not in the first release of Unicode
88	1	800ns	1	103µs	my $later_latin_re = my_qr('[^\P{IsLatin}\p{IsAge=1.1}]', "\x{1F6}"); # spent 103µs making 1 call to Pod::Simple::BlackBox::my_qr
89
90					# If this perl doesn't have the Deprecated property, there's only one code
91					# point in it that we need be concerned with.
92	1	700ns	1	76µs	my $deprecated_re = my_qr('\p{IsDeprecated}', "\x{149}"); # spent 76µs making 1 call to Pod::Simple::BlackBox::my_qr
93	1	200ns			$deprecated_re = qr/\x{149}/ unless $deprecated_re;
94
95	1	200ns			my $utf8_bom;
96	1	2µs			if (($] ge 5.007_003)) {
97	1	300ns			$utf8_bom = "\x{FEFF}";
98	1	3µs	1	700ns	utf8::encode($utf8_bom); # spent 700ns making 1 call to utf8::encode
99					} else {
100					$utf8_bom = "\xEF\xBB\xBF"; # No EBCDIC BOM detection for early Perls.
101					}
102
103					# This is used so that the 'content_seen' method doesn't return true on a
104					# file that just happens to have a line that matches /^=[a-zA-z]/. Only if
105					# there is a valid =foo line will we return that content was seen.
106	1	100ns			my $seen_legal_directive = 0;
107
108					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
109
110					sub parse_line { shift->parse_lines(@_) } # alias
111
112					# - - - Turn back now! Run away! - - -
113
114					sub parse_lines { # Usage: $parser->parse_lines(@lines)
115					# an undef means end-of-stream
116					my $self = shift;
117
118					my $code_handler = $self->{'code_handler'};
119					my $cut_handler = $self->{'cut_handler'};
120					my $wl_handler = $self->{'whiteline_handler'};
121					$self->{'line_count'} \|\|= 0;
122
123					my $scratch;
124
125					DEBUG > 4 and
126					print STDERR "# Parsing starting at line ", $self->{'line_count'}, ".\n";
127
128					DEBUG > 5 and
129					print STDERR "# About to parse lines: ",
130					join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
131
132					my $paras = ($self->{'paras'} \|\|= []);
133					# paragraph buffer. Because we need to defer processing of =over
134					# directives and verbatim paragraphs. We call _ponder_paragraph_buffer
135					# to process this.
136
137					$self->{'pod_para_count'} \|\|= 0;
138
139					# An attempt to match the pod portions of a line. This is not fool proof,
140					# but is good enough to serve as part of the heuristic for guessing the pod
141					# encoding if not specified.
142					my $codes = join '', grep { / ^ [A-Za-z] $/x } sort keys %{$self->{accept_codes}};
143					my $pod_chars_re = qr/ ^ = [A-Za-z]+ \| [\Q$codes\E] < /x;
144
145					my $line;
146					foreach my $source_line (@_) {
147					if( $self->{'source_dead'} ) {
148					DEBUG > 4 and print STDERR "# Source is dead.\n";
149					last;
150					}
151
152					unless( defined $source_line ) {
153					DEBUG > 4 and print STDERR "# Undef-line seen.\n";
154
155					push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
156					push @$paras, $paras->[-1], $paras->[-1];
157					# So that it definitely fills the buffer.
158					$self->{'source_dead'} = 1;
159					$self->_ponder_paragraph_buffer;
160					next;
161					}
162
163
164					if( $self->{'line_count'}++ ) {
165					($line = $source_line) =~ tr/\n\r//d;
166					# If we don't have two vars, we'll end up with that there
167					# tr/// modding the (potentially read-only) original source line!
168
169					} else {
170					DEBUG > 2 and print STDERR "First line: [$source_line]\n";
171
172					if( ($line = $source_line) =~ s/^$utf8_bom//s ) {
173					DEBUG and print STDERR "UTF-8 BOM seen. Faking a '=encoding utf8'.\n";
174					$self->_handle_encoding_line( "=encoding utf8" );
175					delete $self->{'_processed_encoding'};
176					$line =~ tr/\n\r//d;
177
178					} elsif( $line =~ s/^\xFE\xFF//s ) {
179					DEBUG and print STDERR "Big-endian UTF-16 BOM seen. Aborting parsing.\n";
180					$self->scream(
181					$self->{'line_count'},
182					"UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
183					);
184					splice @_;
185					push @_, undef;
186					next;
187
188					# TODO: implement somehow?
189
190					} elsif( $line =~ s/^\xFF\xFE//s ) {
191					DEBUG and print STDERR "Little-endian UTF-16 BOM seen. Aborting parsing.\n";
192					$self->scream(
193					$self->{'line_count'},
194					"UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
195					);
196					splice @_;
197					push @_, undef;
198					next;
199
200					# TODO: implement somehow?
201
202					} else {
203					DEBUG > 2 and print STDERR "First line is BOM-less.\n";
204					($line = $source_line) =~ tr/\n\r//d;
205					}
206					}
207
208					if(!$self->{'parse_characters'} && !$self->{'encoding'}
209					&& ($self->{'in_pod'} \|\| $line =~ /^=/s)
210					&& $line =~ /$non_ascii_re/
211					) {
212
213					my $encoding;
214
215					# No =encoding line, and we are at the first pod line in the input that
216					# contains a non-ascii byte, that is, one whose meaning varies depending
217					# on whether the file is encoded in UTF-8 or CP1252, which are the two
218					# possibilities permitted by the pod spec. (ASCII is assumed if the
219					# file only contains ASCII bytes.) In order to process this line, we
220					# need to figure out what encoding we will use for the file.
221					#
222					# Strictly speaking ISO 8859-1 (Latin 1) refers to the code points
223					# 160-255, but it is used here, as it often colloquially is, to refer to
224					# the complete set of code points 0-255, including ASCII (0-127), the C1
225					# controls (128-159), and strict Latin 1 (160-255).
226					#
227					# CP1252 is effectively a superset of Latin 1, because it differs only
228					# from colloquial 8859-1 in the C1 controls, which are very unlikely to
229					# actually be present in 8859-1 files, so can be used for other purposes
230					# without conflict. CP 1252 uses most of them for graphic characters.
231					#
232					# Note that all ASCII-range bytes represent their corresponding code
233					# points in both CP1252 and UTF-8. In ASCII platform UTF-8, all other
234					# code points require multiple (non-ASCII) bytes to represent. (A
235					# separate paragraph for EBCDIC is below.) The multi-byte
236					# representation is quite structured. If we find an isolated byte that
237					# would require multiple bytes to represent in UTF-8, we know that the
238					# encoding is not UTF-8. If we find a sequence of bytes that violates
239					# the UTF-8 structure, we also can presume the encoding isn't UTF-8, and
240					# hence must be 1252.
241					#
242					# But there are ambiguous cases where we could guess wrong. If so, the
243					# user will end up having to supply an =encoding line. We use all
244					# readily available information to improve our chances of guessing
245					# right. The odds of something not being UTF-8, but still passing a
246					# UTF-8 validity test go down very rapidly with increasing length of the
247					# sequence. Therefore we look at all non-ascii sequences on the line.
248					# If any of the sequences can't be UTF-8, we quit there and choose
249					# CP1252. If all could be UTF-8, we see if any of the code points
250					# represented are unlikely to be in pod. If so, we guess CP1252. If
251					# not, we check if the line is all in the same script; if not guess
252					# CP1252; otherwise UTF-8. For perls that don't have convenient script
253					# run testing, see if there is both Latin and non-Latin. If so, CP1252,
254					# otherwise UTF-8.
255					#
256					# On EBCDIC platforms, the situation is somewhat different. In
257					# UTF-EBCDIC, not only do ASCII-range bytes represent their code points,
258					# but so do the bytes that are for the C1 controls. Recall that these
259					# correspond to the unused portion of 8859-1 that 1252 mostly takes
260					# over. That means that there are fewer code points that are
261					# represented by multi-bytes. But, note that the these controls are
262					# very unlikely to be in pod text. So if we encounter one of them, it
263					# means that it is quite likely CP1252 and not UTF-8. The net result is
264					# the same code below is used for both platforms.
265					#
266					# XXX probably if the line has E<foo> that evaluates to illegal CP1252,
267					# then it is UTF-8. But we haven't processed E<> yet.
268
269					goto set_1252 if $] lt 5.006_000; # No UTF-8 on very early perls
270
271					my $copy;
272
273	2	43µs	2	24µs	# spent 14µs (5+9) within Pod::Simple::BlackBox::BEGIN@273 which was called: # once (5µs+9µs) by Pod::Simple::LinkSection::BEGIN@7 at line 273 no warnings 'utf8'; # spent 14µs making 1 call to Pod::Simple::BlackBox::BEGIN@273 # spent 9µs making 1 call to warnings::unimport
274
275					if ($] ge 5.007_003) {
276					$copy = $line;
277
278					# On perls that have this function, we can use it to easily see if the
279					# sequence is valid UTF-8 or not; if valid it turns on the UTF-8 flag
280					# needed below for script run detection
281					goto set_1252 if ! utf8::decode($copy);
282					}
283					elsif (ord("A") != 65) { # Early EBCDIC, assume UTF-8. What's a windows
284					# code page doing here anyway?
285					goto set_utf8;
286					}
287					else { # ASCII, no decode(): do it ourselves using the fundamental
288					# characteristics of UTF-8
289	2	3.77ms	2	17µs	# spent 15µs (11+4) within Pod::Simple::BlackBox::BEGIN@289 which was called: # once (11µs+4µs) by Pod::Simple::LinkSection::BEGIN@7 at line 289 use if $] le 5.006002, 'utf8'; # spent 15µs making 1 call to Pod::Simple::BlackBox::BEGIN@289 # spent 2µs making 1 call to if::import
290
291					my $char_ord;
292					my $needed; # How many continuation bytes to gobble up
293
294					# Initialize the translated line with a dummy character that will be
295					# deleted after everything else is done. This dummy makes sure that
296					# $copy will be in UTF-8. Doing it now avoids the bugs in early perls
297					# with upgrading in the middle
298					$copy = chr(0x100);
299
300					# Parse through the line
301					for (my $i = 0; $i < length $line; $i++) {
302					my $byte = substr($line, $i, 1);
303
304					# ASCII bytes are trivially dealt with
305					if ($byte !~ $non_ascii_re) {
306					$copy .= $byte;
307					next;
308					}
309
310					my $b_ord = ord $byte;
311
312					# Now figure out what this code point would be if the input is
313					# actually in UTF-8. If, in the process, we discover that it isn't
314					# well-formed UTF-8, we guess CP1252.
315					#
316					# Start the process. If it is UTF-8, we are at the first, start
317					# byte, of a multi-byte sequence. We look at this byte to figure
318					# out how many continuation bytes are needed, and to initialize the
319					# code point accumulator with the data from this byte.
320					#
321					# Normally the minimum continuation byte is 0x80, but in certain
322					# instances the minimum is a higher number. So the code below
323					# overrides this for those instances.
324					my $min_cont = 0x80;
325
326					if ($b_ord < 0xC2) { # A start byte < C2 is malformed
327					goto set_1252;
328					}
329					elsif ($b_ord <= 0xDF) {
330					$needed = 1;
331					$char_ord = $b_ord & 0x1F;
332					}
333					elsif ($b_ord <= 0xEF) {
334					$min_cont = 0xA0 if $b_ord == 0xE0;
335					$needed = 2;
336					$char_ord = $b_ord & (0x1F >> 1);
337					}
338					elsif ($b_ord <= 0xF4) {
339					$min_cont = 0x90 if $b_ord == 0xF0;
340					$needed = 3;
341					$char_ord = $b_ord & (0x1F >> 2);
342					}
343					else { # F4 is the highest start byte for legal Unicode; higher is
344					# unlikely to be in pod.
345					goto set_1252;
346					}
347
348					# ? not enough continuation bytes available
349					goto set_1252 if $i + $needed >= length $line;
350
351					# Accumulate the ordinal of the character from the remaining
352					# (continuation) bytes.
353					while ($needed-- > 0) {
354					my $cont = substr($line, ++$i, 1);
355					$b_ord = ord $cont;
356					goto set_1252 if $b_ord < $min_cont \|\| $b_ord > 0xBF;
357
358					# In all cases, any next continuation bytes all have the same
359					# minimum legal value
360					$min_cont = 0x80;
361
362					# Accumulate this byte's contribution to the code point
363					$char_ord <<= 6;
364					$char_ord \|= ($b_ord & 0x3F);
365					}
366
367					# Here, the sequence that formed this code point was valid UTF-8,
368					# so add the completed character to the output
369					$copy .= chr $char_ord;
370					} # End of loop through line
371
372					# Delete the dummy first character
373					$copy = substr($copy, 1);
374					}
375
376					# Here, $copy is legal UTF-8.
377
378					# If it can't be legal CP1252, no need to look further. (These bytes
379					# aren't valid in CP1252.) This test could have been placed higher in
380					# the code, but it seemed wrong to set the encoding to UTF-8 without
381					# making sure that the very first instance is well-formed. But what if
382					# it isn't legal CP1252 either? We have to choose one or the other, and
383					# It seems safer to favor the single-byte encoding over the multi-byte.
384					goto set_utf8 if ord("A") == 65 && $line =~ /[\x81\x8D\x8F\x90\x9D]/;
385
386					# The C1 controls are not likely to appear in pod
387					goto set_1252 if ord("A") == 65 && $copy =~ /[\x80-\x9F]/;
388
389					# Nor are surrogates nor unassigned, nor deprecated.
390					DEBUG > 8 and print STDERR __LINE__, ": $copy: surrogate\n" if $copy =~ $cs_re;
391					goto set_1252 if $cs_re && $copy =~ $cs_re;
392					DEBUG > 8 and print STDERR __LINE__, ": $copy: unassigned\n" if $cn_re && $copy =~ $cn_re;
393					goto set_1252 if $cn_re && $copy =~ $cn_re;
394					DEBUG > 8 and print STDERR __LINE__, ": $copy: deprecated\n" if $copy =~ $deprecated_re;
395					goto set_1252 if $copy =~ $deprecated_re;
396
397					# Nor are rare code points. But this is hard to determine. khw
398					# believes that IPA characters and the modifier letters are unlikely to
399					# be in pod (and certainly very unlikely to be the in the first line in
400					# the pod containing non-ASCII)
401					DEBUG > 8 and print STDERR __LINE__, ": $copy: rare\n" if $copy =~ $rare_blocks_re;
402					goto set_1252 if $rare_blocks_re && $copy =~ $rare_blocks_re;
403
404					# The first Unicode version included essentially every Latin character
405					# in modern usage. So, a Latin character not in the first release will
406					# unlikely be in pod.
407					DEBUG > 8 and print STDERR __LINE__, ": $copy: later_latin\n" if $later_latin_re && $copy =~ $later_latin_re;
408					goto set_1252 if $later_latin_re && $copy =~ $later_latin_re;
409
410					# On perls that handle script runs, if the UTF-8 interpretation yields
411					# a single script, we guess UTF-8, otherwise just having a mixture of
412					# scripts is suspicious, so guess CP1252. We first strip off, as best
413					# we can, the ASCII characters that look like they are pod directives,
414					# as these would always show as mixed with non-Latin text.
415					$copy =~ s/$pod_chars_re//g;
416
417					if ($script_run_re) {
418					goto set_utf8 if $copy =~ $script_run_re;
419					DEBUG > 8 and print STDERR __LINE__, ": not script run\n";
420					goto set_1252;
421					}
422
423					# Even without script runs, but on recent enough perls and Unicodes, we
424					# can check if there is a mixture of both Latin and non-Latin. Again,
425					# having a mixture of scripts is suspicious, so assume CP1252
426
427					# If it's all non-Latin, there is no CP1252, as that is Latin
428					# characters and punct, etc.
429					DEBUG > 8 and print STDERR __LINE__, ": $copy: not latin\n" if $copy !~ $latin_re;
430					goto set_utf8 if $copy !~ $latin_re;
431
432					DEBUG > 8 and print STDERR __LINE__, ": $copy: all latin\n" if $copy =~ $every_char_is_latin_re;
433					goto set_utf8 if $copy =~ $every_char_is_latin_re;
434
435					DEBUG > 8 and print STDERR __LINE__, ": $copy: mixed\n";
436
437					set_1252:
438					DEBUG > 9 and print STDERR __LINE__, ": $copy: is 1252\n";
439					$encoding = 'CP1252';
440					goto done_set;
441
442					set_utf8:
443					DEBUG > 9 and print STDERR __LINE__, ": $copy: is UTF-8\n";
444					$encoding = 'UTF-8';
445
446					done_set:
447					$self->_handle_encoding_line( "=encoding $encoding" );
448					delete $self->{'_processed_encoding'};
449					$self->{'_transcoder'} && $self->{'_transcoder'}->($line);
450
451					my ($word) = $line =~ /(\S$non_ascii_re\S)/;
452
453					$self->whine(
454					$self->{'line_count'},
455					"Non-ASCII character seen before =encoding in '$word'. Assuming $encoding"
456					);
457					}
458
459					DEBUG > 5 and print STDERR "# Parsing line: [$line]\n";
460
461					if(!$self->{'in_pod'}) {
462					if($line =~ m/^=([a-zA-Z][a-zA-Z0-9]*)(?:\s\|$)/s) {
463					if($1 eq 'cut') {
464					$self->scream(
465					$self->{'line_count'},
466					"=cut found outside a pod block. Skipping to next block."
467					);
468
469					## Before there were errata sections in the world, it was
470					## least-pessimal to abort processing the file. But now we can
471					## just barrel on thru (but still not start a pod block).
472					#splice @_;
473					#push @_, undef;
474
475					next;
476					} else {
477					$self->{'in_pod'} = $self->{'start_of_pod_block'}
478					= $self->{'last_was_blank'} = 1;
479					# And fall thru to the pod-mode block further down
480					}
481					} else {
482					DEBUG > 5 and print STDERR "# It's a code-line.\n";
483					$code_handler->(map $_, $line, $self->{'line_count'}, $self)
484					if $code_handler;
485					# Note: this may cause code to be processed out of order relative
486					# to pods, but in order relative to cuts.
487
488					# Note also that we haven't yet applied the transcoding to $line
489					# by time we call $code_handler!
490
491					if( $line =~ m/^#\sline\s+(\d+)\s(?:\s"([^"]+)")?\s*$/ ) {
492					# That RE is from perlsyn, section "Plain Old Comments (Not!)",
493					#$fname = $2 if defined $2;
494					#DEBUG > 1 and defined $2 and print STDERR "# Setting fname to \"$fname\"\n";
495					DEBUG > 1 and print STDERR "# Setting nextline to $1\n";
496					$self->{'line_count'} = $1 - 1;
497					}
498
499					next;
500					}
501					}
502
503					# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
504					# Else we're in pod mode:
505
506					# Apply any necessary transcoding:
507					$self->{'_transcoder'} && $self->{'_transcoder'}->($line);
508
509					# HERE WE CATCH =encoding EARLY!
510					if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
511					next if $self->parse_characters; # Ignore this line
512					$line = $self->_handle_encoding_line( $line );
513					}
514
515					if($line =~ m/^=cut/s) {
516					# here ends the pod block, and therefore the previous pod para
517					DEBUG > 1 and print STDERR "Noting =cut at line ${$self}{'line_count'}\n";
518					$self->{'in_pod'} = 0;
519					# ++$self->{'pod_para_count'};
520					$self->_ponder_paragraph_buffer();
521					# by now it's safe to consider the previous paragraph as done.
522					DEBUG > 6 and print STDERR "Processing any cut handler, line ${$self}{'line_count'}\n";
523					$cut_handler->(map $_, $line, $self->{'line_count'}, $self)
524					if $cut_handler;
525
526					# TODO: add to docs: Note: this may cause cuts to be processed out
527					# of order relative to pods, but in order relative to code.
528
529					} elsif($line =~ m/^(\s*)$/s) { # it's a blank line
530					if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line
531					$wl_handler->(map $_, $line, $self->{'line_count'}, $self)
532					if $wl_handler;
533					}
534
535					if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
536					DEBUG > 1 and print STDERR "Saving blank line at line ${$self}{'line_count'}\n";
537					push @{$paras->[-1]}, $line;
538					} # otherwise it's not interesting
539
540					if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
541					DEBUG > 1 and print STDERR "Noting para ends with blank line at ${$self}{'line_count'}\n";
542					}
543
544					$self->{'last_was_blank'} = 1;
545
546					} elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para...
547
548					if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9])(\s+\|$)(.)/s) {
549					# THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
550					my $new = [$1, {'start_line' => $self->{'line_count'}}, $3];
551					$new->[1]{'~orig_spacer'} = $2 if $2 && $2 ne " ";
552					# Note that in "=head1 foo", the WS is lost.
553					# Example: ['=head1', {'start_line' => 123}, ' foo']
554
555					++$self->{'pod_para_count'};
556
557					$self->_ponder_paragraph_buffer();
558					# by now it's safe to consider the previous paragraph as done.
559
560					push @$paras, $new; # the new incipient paragraph
561					DEBUG > 1 and print STDERR "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
562
563					} elsif($line =~ m/^\s/s) {
564
565					if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
566					DEBUG > 1 and print STDERR "Resuming verbatim para at line ${$self}{'line_count'}\n";
567					push @{$paras->[-1]}, $line;
568					} else {
569					++$self->{'pod_para_count'};
570					$self->_ponder_paragraph_buffer();
571					# by now it's safe to consider the previous paragraph as done.
572					DEBUG > 1 and print STDERR "Starting verbatim para at line ${$self}{'line_count'}\n";
573					push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
574					}
575					} else {
576					++$self->{'pod_para_count'};
577					$self->_ponder_paragraph_buffer();
578					# by now it's safe to consider the previous paragraph as done.
579					push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line];
580					DEBUG > 1 and print STDERR "Starting plain para at line ${$self}{'line_count'}\n";
581					}
582					$self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
583
584					} else {
585					# It's a non-blank line /continuing/ the current para
586					if(@$paras) {
587					DEBUG > 2 and print STDERR "Line ${$self}{'line_count'} continues current paragraph\n";
588					push @{$paras->[-1]}, $line;
589					} else {
590					# Unexpected case!
591					die "Continuing a paragraph but \@\$paras is empty?";
592					}
593					$self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
594					}
595
596					} # ends the big while loop
597
598					DEBUG > 1 and print STDERR (pretty(@$paras), "\n");
599					return $self;
600					}
601
602					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
603
604					sub _handle_encoding_line {
605					my($self, $line) = @_;
606
607					return if $self->parse_characters;
608
609					# The point of this routine is to set $self->{'_transcoder'} as indicated.
610
611					return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
612					DEBUG > 1 and print STDERR "Found an encoding line \"=encoding $1\"\n";
613
614					my $e = $1;
615					my $orig = $e;
616					push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
617
618					my $enc_error;
619
620					# Cf. perldoc Encode and perldoc Encode::Supported
621
622					require Pod::Simple::Transcode;
623
624					if( $self->{'encoding'} ) {
625					my $norm_current = $self->{'encoding'};
626					my $norm_e = $e;
627					foreach my $that ($norm_current, $norm_e) {
628					$that = lc($that);
629					$that =~ s/[-_]//g;
630					}
631					if($norm_current eq $norm_e) {
632					DEBUG > 1 and print STDERR "The '=encoding $orig' line is ",
633					"redundant. ($norm_current eq $norm_e). Ignoring.\n";
634					$enc_error = '';
635					# But that doesn't necessarily mean that the earlier one went okay
636					} else {
637					$enc_error = "Encoding is already set to " . $self->{'encoding'};
638					DEBUG > 1 and print STDERR $enc_error;
639					}
640					} elsif (
641					# OK, let's turn on the encoding
642					do {
643					DEBUG > 1 and print STDERR " Setting encoding to $e\n";
644					$self->{'encoding'} = $e;
645					1;
646					}
647					and $e eq 'HACKRAW'
648					) {
649					DEBUG and print STDERR " Putting in HACKRAW (no-op) encoding mode.\n";
650
651					} elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
652
653					die($enc_error = "WHAT? _transcoder is already set?!")
654					if $self->{'_transcoder'}; # should never happen
655					require Pod::Simple::Transcode;
656					$self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
657					eval {
658					my @x = ('', "abc", "123");
659					$self->{'_transcoder'}->(@x);
660					};
661					$@ && die( $enc_error =
662					"Really unexpected error setting up encoding $e: $@\nAborting"
663					);
664					$self->{'detected_encoding'} = $e;
665
666					} else {
667					my @supported = Pod::Simple::Transcode::->all_encodings;
668
669					# Note unsupported, and complain
670					DEBUG and print STDERR " Encoding [$e] is unsupported.",
671					"\nSupporteds: @supported\n";
672					my $suggestion = '';
673
674					# Look for a near match:
675					my $norm = lc($e);
676					$norm =~ tr[-_][]d;
677					my $n;
678					foreach my $enc (@supported) {
679					$n = lc($enc);
680					$n =~ tr[-_][]d;
681					next unless $n eq $norm;
682					$suggestion = " (Maybe \"$e\" should be \"$enc\"?)";
683					last;
684					}
685					my $encmodver = Pod::Simple::Transcode::->encmodver;
686					$enc_error = join '' =>
687					"This document probably does not appear as it should, because its ",
688					"\"=encoding $e\" line calls for an unsupported encoding.",
689					$suggestion, " [$encmodver\'s supported encodings are: @supported]"
690					;
691
692					$self->scream( $self->{'line_count'}, $enc_error );
693					}
694					push @{ $self->{'encoding_command_statuses'} }, $enc_error;
695					if (defined($self->{'_processed_encoding'})) {
696					# Double declaration.
697					$self->scream( $self->{'line_count'}, 'Cannot have multiple =encoding directives');
698					}
699					$self->{'_processed_encoding'} = $orig;
700
701					return $line;
702					}
703
704					# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
705
706					sub _handle_encoding_second_level {
707					# By time this is called, the encoding (if well formed) will already
708					# have been acted on.
709					my($self, $para) = @_;
710					my @x = @$para;
711					my $content = join ' ', splice @x, 2;
712					$content =~ s/^\s+//s;
713					$content =~ s/\s+$//s;
714
715					DEBUG > 2 and print STDERR "Ogling encoding directive: =encoding $content\n";
716
717					if (defined($self->{'_processed_encoding'})) {
718					#if($content ne $self->{'_processed_encoding'}) {
719					# Could it happen?
720					#}
721					delete $self->{'_processed_encoding'};
722					# It's already been handled. Check for errors.
723					if(! $self->{'encoding_command_statuses'} ) {
724					DEBUG > 2 and print STDERR " CRAZY ERROR: It wasn't really handled?!\n";
725					} elsif( $self->{'encoding_command_statuses'}[-1] ) {
726					$self->whine( $para->[1]{'start_line'},
727					sprintf "Couldn't do %s: %s",
728					$self->{'encoding_command_reqs' }[-1],
729					$self->{'encoding_command_statuses'}[-1],
730					);
731					} else {
732					DEBUG > 2 and print STDERR " (Yup, it was successfully handled already.)\n";
733					}
734
735					} else {
736					# Otherwise it's a syntax error
737					$self->whine( $para->[1]{'start_line'},
738					"Invalid =encoding syntax: $content"
739					);
740					}
741
742					return;
743					}
744
745					#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
746
747					{
748	1	200ns			my $m = -321; # magic line number
749
750					sub _gen_errata {
751					my $self = $_[0];
752					# Return 0 or more fake-o paragraphs explaining the accumulated
753					# errors on this document.
754
755					return() unless $self->{'errata'} and keys %{$self->{'errata'}};
756
757					my @out;
758
759					foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
760					push @out,
761					['=item', {'start_line' => $m}, "Around line $line:"],
762					map( ['~Para', {'start_line' => $m, '~cooked' => 1},
763					#['~Top', {'start_line' => $m},
764					$_
765					#]
766					],
767					@{$self->{'errata'}{$line}}
768					)
769					;
770					}
771
772					# TODO: report of unknown entities? unrenderable characters?
773
774					unshift @out,
775					['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
776					['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
777					"Hey! ",
778					['B', {},
779					'The above document had some coding errors, which are explained below:'
780					]
781					],
782					['=over', {'start_line' => $m, 'errata' => 1}, ''],
783					;
784
785					push @out,
786					['=back', {'start_line' => $m, 'errata' => 1}, ''],
787					;
788
789					DEBUG and print STDERR "\n<<\n", pretty(\@out), "\n>>\n\n";
790
791					return @out;
792					}
793
794					}
795
796					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
797
798					##############################################################################
799					##
800					## stop reading now stop reading now stop reading now stop reading now stop
801					##
802					## HERE IT BECOMES REALLY SCARY
803					##
804					## stop reading now stop reading now stop reading now stop reading now stop
805					##
806					##############################################################################
807
808	1	300ns			sub _ponder_paragraph_buffer {
809
810					# Para-token types as found in the buffer.
811					# ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
812					# =over, =back, =item
813					# and the null =pod (to be complained about if over one line)
814					#
815					# "~data" paragraphs are something we generate at this level, depending on
816					# a currently open =over region
817
818					# Events fired: Begin and end for:
819					# directivename (like head1 .. head4), item, extend,
820					# for (from =begin...=end, =for),
821					# over-bullet, over-number, over-text, over-block,
822					# item-bullet, item-number, item-text,
823					# Document,
824					# Data, Para, Verbatim
825					# B, C, longdirname (TODO -- wha?), etc. for all directives
826					#
827
828					my $self = $_[0];
829					my $paras;
830					return unless @{$paras = $self->{'paras'}};
831					my $curr_open = ($self->{'curr_open'} \|\|= []);
832
833					my $scratch;
834
835					DEBUG > 10 and print STDERR "# Paragraph buffer: <<", pretty($paras), ">>\n";
836
837					# We have something in our buffer. So apparently the document has started.
838					unless($self->{'doc_has_started'}) {
839					$self->{'doc_has_started'} = 1;
840
841					my $starting_contentless;
842					$starting_contentless =
843					(
844					!@$curr_open
845					and @$paras and ! grep $_->[0] ne '~end', @$paras
846					# i.e., if the paras is all ~ends
847					)
848					;
849					DEBUG and print STDERR "# Starting ",
850					$starting_contentless ? 'contentless' : 'contentful',
851					" document\n"
852					;
853
854					$self->_handle_element_start(
855					($scratch = 'Document'),
856					{
857					'start_line' => $paras->[0][1]{'start_line'},
858					$starting_contentless ? ( 'contentless' => 1 ) : (),
859					},
860					);
861					}
862
863					my($para, $para_type);
864					while(@$paras) {
865
866					# If a directive, assume it's legal; subtract below if found not to be
867					$seen_legal_directive++ if $paras->[0][0] =~ /^=/;
868
869					last if @$paras == 1
870					and ( $paras->[0][0] eq '=over'
871					or $paras->[0][0] eq '=item'
872					or ($paras->[0][0] eq '~Verbatim' and $self->{'in_pod'}));
873					# Those're the three kinds of paragraphs that require lookahead.
874					# Actually, an "=item Foo" inside an <over type=text> region
875					# and any =item inside an <over type=block> region (rare)
876					# don't require any lookahead, but all others (bullets
877					# and numbers) do.
878					# The verbatim is different from the other two, because those might be
879					# like:
880					#
881					# =item
882					# ...
883					# =cut
884					# ...
885					# =item
886					#
887					# The =cut here finishes the paragraph but doesn't terminate the =over
888					# they should be in. (khw apologizes that he didn't comment at the time
889					# why the 'in_pod' works, and no longer remembers why, and doesn't think
890					# it is currently worth the effort to re-figure it out.)
891
892					# TODO: whinge about many kinds of directives in non-resolving =for regions?
893					# TODO: many? like what? =head1 etc?
894
895					$para = shift @$paras;
896					$para_type = $para->[0];
897
898					DEBUG > 1 and print STDERR "Pondering a $para_type paragraph, given the stack: (",
899					$self->_dump_curr_open(), ")\n";
900
901					if($para_type eq '=for') {
902					next if $self->_ponder_for($para,$curr_open,$paras);
903
904					} elsif($para_type eq '=begin') {
905					next if $self->_ponder_begin($para,$curr_open,$paras);
906
907					} elsif($para_type eq '=end') {
908					next if $self->_ponder_end($para,$curr_open,$paras);
909
910					} elsif($para_type eq '~end') { # The virtual end-document signal
911					next if $self->_ponder_doc_end($para,$curr_open,$paras);
912					}
913
914
915					# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
916					#~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
917					if(grep $_->[1]{'~ignore'}, @$curr_open) {
918					DEBUG > 1 and
919					print STDERR "Skipping $para_type paragraph because in ignore mode.\n";
920					next;
921					}
922					#~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
923					# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
924
925					if($para_type eq '=pod') {
926					$self->_ponder_pod($para,$curr_open,$paras);
927
928					} elsif($para_type eq '=over') {
929					next if $self->_ponder_over($para,$curr_open,$paras);
930
931					} elsif($para_type eq '=back') {
932					next if $self->_ponder_back($para,$curr_open,$paras);
933
934					} else {
935
936					# All non-magical codes!!!
937
938					# Here we start using $para_type for our own twisted purposes, to
939					# mean how it should get treated, not as what the element name
940					# should be.
941
942					DEBUG > 1 and print STDERR "Pondering non-magical $para_type\n";
943
944					my $i;
945
946					# Enforce some =headN discipline
947					if($para_type =~ m/^=head\d$/s
948					and ! $self->{'accept_heads_anywhere'}
949					and @$curr_open
950					and $curr_open->[-1][0] eq '=over'
951					) {
952					DEBUG > 2 and print STDERR "'=$para_type' inside an '=over'!\n";
953					$self->whine(
954					$para->[1]{'start_line'},
955					"You forgot a '=back' before '$para_type'"
956					);
957					unshift @$paras, ['=back', {}, ''], $para; # close the =over
958					next;
959					}
960
961
962					if($para_type eq '=item') {
963
964					my $over;
965					unless(@$curr_open and
966					$over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
967					$self->whine(
968					$para->[1]{'start_line'},
969					"'=item' outside of any '=over'"
970					);
971					unshift @$paras,
972					['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
973					$para
974					;
975					next;
976					}
977
978
979					my $over_type = $over->[1]{'~type'};
980
981					if(!$over_type) {
982					# Shouldn't happen1
983					die "Typeless over in stack, starting at line "
984					. $over->[1]{'start_line'};
985
986					} elsif($over_type eq 'block') {
987					unless($curr_open->[-1][1]{'~bitched_about'}) {
988					$curr_open->[-1][1]{'~bitched_about'} = 1;
989					$self->whine(
990					$curr_open->[-1][1]{'start_line'},
991					"You can't have =items (as at line "
992					. $para->[1]{'start_line'}
993					. ") unless the first thing after the =over is an =item"
994					);
995					}
996					# Just turn it into a paragraph and reconsider it
997					$para->[0] = '~Para';
998					unshift @$paras, $para;
999					next;
1000
1001					} elsif($over_type eq 'text') {
1002					my $item_type = $self->_get_item_type($para);
1003					# That kills the content of the item if it's a number or bullet.
1004					DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1005
1006					if($item_type eq 'text') {
1007					# Nothing special needs doing for 'text'
1008					} elsif($item_type eq 'number' or $item_type eq 'bullet') {
1009					$self->whine(
1010					$para->[1]{'start_line'},
1011					"Expected text after =item, not a $item_type"
1012					);
1013					# Undo our clobbering:
1014					push @$para, $para->[1]{'~orig_content'};
1015					delete $para->[1]{'number'};
1016					# Only a PROPER item-number element is allowed
1017					# to have a number attribute.
1018					} else {
1019					die "Unhandled item type $item_type"; # should never happen
1020					}
1021
1022					# =item-text thingies don't need any assimilation, it seems.
1023
1024					} elsif($over_type eq 'number') {
1025					my $item_type = $self->_get_item_type($para);
1026					# That kills the content of the item if it's a number or bullet.
1027					DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1028
1029					my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1030
1031					if($item_type eq 'bullet') {
1032					# Hm, it's not numeric. Correct for this.
1033					$para->[1]{'number'} = $expected_value;
1034					$self->whine(
1035					$para->[1]{'start_line'},
1036					"Expected '=item $expected_value'"
1037					);
1038					push @$para, $para->[1]{'~orig_content'};
1039					# restore the bullet, blocking the assimilation of next para
1040
1041					} elsif($item_type eq 'text') {
1042					# Hm, it's not numeric. Correct for this.
1043					$para->[1]{'number'} = $expected_value;
1044					$self->whine(
1045					$para->[1]{'start_line'},
1046					"Expected '=item $expected_value'"
1047					);
1048					# Text content will still be there and will block next ~Para
1049
1050					} elsif($item_type ne 'number') {
1051					die "Unknown item type $item_type"; # should never happen
1052
1053					} elsif($expected_value == $para->[1]{'number'}) {
1054					DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n";
1055
1056					} else {
1057					DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'},
1058					" instead of the expected value of $expected_value\n";
1059					$self->whine(
1060					$para->[1]{'start_line'},
1061					"You have '=item " . $para->[1]{'number'} .
1062					"' instead of the expected '=item $expected_value'"
1063					);
1064					$para->[1]{'number'} = $expected_value; # correcting!!
1065					}
1066
1067					if(@$para == 2) {
1068					# For the cases where we /didn't/ push to @$para
1069					if($paras->[0][0] eq '~Para') {
1070					DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
1071					push @$para, splice @{shift @$paras},2;
1072					} else {
1073					DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
1074					push @$para, ''; # Just so it's not contentless
1075					}
1076					}
1077
1078
1079					} elsif($over_type eq 'bullet') {
1080					my $item_type = $self->_get_item_type($para);
1081					# That kills the content of the item if it's a number or bullet.
1082					DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1083
1084					if($item_type eq 'bullet') {
1085					# as expected!
1086
1087					if( $para->[1]{'~_freaky_para_hack'} ) {
1088					DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n";
1089					push @$para, $para->[1]{'~_freaky_para_hack'};
1090					}
1091
1092					} elsif($item_type eq 'number') {
1093					$self->whine(
1094					$para->[1]{'start_line'},
1095					"Expected '=item *'"
1096					);
1097					push @$para, $para->[1]{'~orig_content'};
1098					# and block assimilation of the next paragraph
1099					delete $para->[1]{'number'};
1100					# Only a PROPER item-number element is allowed
1101					# to have a number attribute.
1102					} elsif($item_type eq 'text') {
1103					$self->whine(
1104					$para->[1]{'start_line'},
1105					"Expected '=item *'"
1106					);
1107					# But doesn't need processing. But it'll block assimilation
1108					# of the next para.
1109					} else {
1110					die "Unhandled item type $item_type"; # should never happen
1111					}
1112
1113					if(@$para == 2) {
1114					# For the cases where we /didn't/ push to @$para
1115					if($paras->[0][0] eq '~Para') {
1116					DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
1117					push @$para, splice @{shift @$paras},2;
1118					} else {
1119					DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
1120					push @$para, ''; # Just so it's not contentless
1121					}
1122					}
1123
1124					} else {
1125					die "Unhandled =over type \"$over_type\"?";
1126					# Shouldn't happen!
1127					}
1128
1129					$para_type = 'Plain';
1130					$para->[0] .= '-' . $over_type;
1131					# Whew. Now fall thru and process it.
1132
1133
1134					} elsif($para_type eq '=extend') {
1135					# Well, might as well implement it here.
1136					$self->_ponder_extend($para);
1137					next; # and skip
1138					} elsif($para_type eq '=encoding') {
1139					# Not actually acted on here, but we catch errors here.
1140					$self->_handle_encoding_second_level($para);
1141					next unless $self->keep_encoding_directive;
1142					$para_type = 'Plain';
1143					} elsif($para_type eq '~Verbatim') {
1144					$para->[0] = 'Verbatim';
1145					$para_type = '?Verbatim';
1146					} elsif($para_type eq '~Para') {
1147					$para->[0] = 'Para';
1148					$para_type = '?Plain';
1149					} elsif($para_type eq 'Data') {
1150					$para->[0] = 'Data';
1151					$para_type = '?Data';
1152					} elsif( $para_type =~ s/^=//s
1153					and defined( $para_type = $self->{'accept_directives'}{$para_type} )
1154					) {
1155					DEBUG > 1 and print STDERR " Pondering known directive ${$para}[0] as $para_type\n";
1156					} else {
1157					# An unknown directive!
1158					$seen_legal_directive--;
1159					DEBUG > 1 and printf STDERR "Unhandled directive %s (Handled: %s)\n",
1160					$para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
1161					;
1162					$self->whine(
1163					$para->[1]{'start_line'},
1164					"Unknown directive: $para->[0]"
1165					);
1166
1167					# And maybe treat it as text instead of just letting it go?
1168					next;
1169					}
1170
1171					if($para_type =~ s/^\?//s) {
1172					if(! @$curr_open) { # usual case
1173					DEBUG and print STDERR "Treating $para_type paragraph as such because stack is empty.\n";
1174					} else {
1175					my @fors = grep $_->[0] eq '=for', @$curr_open;
1176					DEBUG > 1 and print STDERR "Containing fors: ",
1177					join(',', map $_->[1]{'target'}, @fors), "\n";
1178
1179					if(! @fors) {
1180					DEBUG and print STDERR "Treating $para_type paragraph as such because stack has no =for's\n";
1181
1182					#} elsif(grep $_->[1]{'~resolve'}, @fors) {
1183					#} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
1184					} elsif( $fors[-1][1]{'~resolve'} ) {
1185					# Look to the immediately containing for
1186
1187					if($para_type eq 'Data') {
1188					DEBUG and print STDERR "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
1189					$para->[0] = 'Para';
1190					$para_type = 'Plain';
1191					} else {
1192					DEBUG and print STDERR "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
1193					}
1194					} else {
1195					DEBUG and print STDERR "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
1196					$para->[0] = $para_type = 'Data';
1197					}
1198					}
1199					}
1200
1201					#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1202					if($para_type eq 'Plain') {
1203					$self->_ponder_Plain($para);
1204					} elsif($para_type eq 'Verbatim') {
1205					$self->_ponder_Verbatim($para);
1206					} elsif($para_type eq 'Data') {
1207					$self->_ponder_Data($para);
1208					} else {
1209					die "\$para type is $para_type -- how did that happen?";
1210					# Shouldn't happen.
1211					}
1212
1213					#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1214					$para->[0] =~ s/^[~=]//s;
1215
1216					DEBUG and print STDERR "\n", pretty($para), "\n";
1217
1218					# traverse the treelet (which might well be just one string scalar)
1219					$self->{'content_seen'} \|\|= 1 if $seen_legal_directive
1220					&& ! $self->{'~tried_gen_errata'};
1221					$self->_traverse_treelet_bit(@$para);
1222					}
1223					}
1224
1225					return;
1226					}
1227
1228					###########################################################################
1229					# The sub-ponderers...
1230
- -
1233					sub _ponder_for {
1234					my ($self,$para,$curr_open,$paras) = @_;
1235
1236					# Fake it out as a begin/end
1237					my $target;
1238
1239					if(grep $_->[1]{'~ignore'}, @$curr_open) {
1240					DEBUG > 1 and print STDERR "Ignoring ignorable =for\n";
1241					return 1;
1242					}
1243
1244					for(my $i = 2; $i < @$para; ++$i) {
1245					if($para->[$i] =~ s/^\s(\S+)\s//s) {
1246					$target = $1;
1247					last;
1248					}
1249					}
1250					unless(defined $target) {
1251					$self->whine(
1252					$para->[1]{'start_line'},
1253					"=for without a target?"
1254					);
1255					return 1;
1256					}
1257					DEBUG > 1 and
1258					print STDERR "Faking out a =for $target as a =begin $target / =end $target\n";
1259
1260					$para->[0] = 'Data';
1261
1262					unshift @$paras,
1263					['=begin',
1264					{'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
1265					$target,
1266					],
1267					$para,
1268					['=end',
1269					{'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
1270					$target,
1271					],
1272					;
1273
1274					return 1;
1275					}
1276
1277					sub _ponder_begin {
1278					my ($self,$para,$curr_open,$paras) = @_;
1279					my $content = join ' ', splice @$para, 2;
1280					$content =~ s/^\s+//s;
1281					$content =~ s/\s+$//s;
1282					unless(length($content)) {
1283					$self->whine(
1284					$para->[1]{'start_line'},
1285					"=begin without a target?"
1286					);
1287					DEBUG and print STDERR "Ignoring targetless =begin\n";
1288					return 1;
1289					}
1290
1291					my ($target, $title) = $content =~ m/^(\S+)\s(.)$/;
1292					$para->[1]{'title'} = $title if ($title);
1293					$para->[1]{'target'} = $target; # without any ':'
1294					$content = $target; # strip off the title
1295
1296					$content =~ s/^:!/!:/s;
1297					my $neg; # whether this is a negation-match
1298					$neg = 1 if $content =~ s/^!//s;
1299					my $to_resolve; # whether to process formatting codes
1300					$to_resolve = 1 if $content =~ s/^://s;
1301
1302					my $dont_ignore; # whether this target matches us
1303
1304					foreach my $target_name (
1305					split(',', $content, -1),
1306					$neg ? () : '*'
1307					) {
1308					DEBUG > 2 and
1309					print STDERR " Considering whether =begin $content matches $target_name\n";
1310					next unless $self->{'accept_targets'}{$target_name};
1311
1312					DEBUG > 2 and
1313					print STDERR " It DOES match the acceptable target $target_name!\n";
1314					$to_resolve = 1
1315					if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
1316					$dont_ignore = 1;
1317					$para->[1]{'target_matching'} = $target_name;
1318					last; # stop looking at other target names
1319					}
1320
1321					if($neg) {
1322					if( $dont_ignore ) {
1323					$dont_ignore = '';
1324					delete $para->[1]{'target_matching'};
1325					DEBUG > 2 and print STDERR " But the leading ! means that this is a NON-match!\n";
1326					} else {
1327					$dont_ignore = 1;
1328					$para->[1]{'target_matching'} = '!';
1329					DEBUG > 2 and print STDERR " But the leading ! means that this IS a match!\n";
1330					}
1331					}
1332
1333					$para->[0] = '=for'; # Just what we happen to call these, internally
1334					$para->[1]{'~really'} \|\|= '=begin';
1335					$para->[1]{'~ignore'} = (! $dont_ignore) \|\| 0;
1336					$para->[1]{'~resolve'} = $to_resolve \|\| 0;
1337
1338					DEBUG > 1 and print STDERR " Making note to ", $dont_ignore ? 'not ' : '',
1339					"ignore contents of this region\n";
1340					DEBUG > 1 and $dont_ignore and print STDERR " Making note to treat contents as ",
1341					($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
1342					DEBUG > 1 and print STDERR " (Stack now: ", $self->_dump_curr_open(), ")\n";
1343
1344					push @$curr_open, $para;
1345					if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
1346					DEBUG > 1 and print STDERR "Ignoring ignorable =begin\n";
1347					} else {
1348					$self->{'content_seen'} \|\|= 1 unless $self->{'~tried_gen_errata'};
1349					$self->_handle_element_start((my $scratch='for'), $para->[1]);
1350					}
1351
1352					return 1;
1353					}
1354
1355					sub _ponder_end {
1356					my ($self,$para,$curr_open,$paras) = @_;
1357					my $content = join ' ', splice @$para, 2;
1358					$content =~ s/^\s+//s;
1359					$content =~ s/\s+$//s;
1360					DEBUG and print STDERR "Ogling '=end $content' directive\n";
1361
1362					unless(length($content)) {
1363					$self->whine(
1364					$para->[1]{'start_line'},
1365					"'=end' without a target?" . (
1366					( @$curr_open and $curr_open->[-1][0] eq '=for' )
1367					? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
1368					: ''
1369					)
1370					);
1371					DEBUG and print STDERR "Ignoring targetless =end\n";
1372					return 1;
1373					}
1374
1375					unless($content =~ m/^\S+$/) { # i.e., unless it's one word
1376					$self->whine(
1377					$para->[1]{'start_line'},
1378					"'=end $content' is invalid. (Stack: "
1379					. $self->_dump_curr_open() . ')'
1380					);
1381					DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
1382					return 1;
1383					}
1384
1385					unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
1386					$self->whine(
1387					$para->[1]{'start_line'},
1388					"=end $content without matching =begin. (Stack: "
1389					. $self->_dump_curr_open() . ')'
1390					);
1391					DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
1392					return 1;
1393					}
1394
1395					unless($content eq $curr_open->[-1][1]{'target'}) {
1396					$self->whine(
1397					$para->[1]{'start_line'},
1398					"=end $content doesn't match =begin "
1399					. $curr_open->[-1][1]{'target'}
1400					. ". (Stack: "
1401					. $self->_dump_curr_open() . ')'
1402					);
1403					DEBUG and print STDERR "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
1404					return 1;
1405					}
1406
1407					# Else it's okay to close...
1408					if(grep $_->[1]{'~ignore'}, @$curr_open) {
1409					DEBUG > 1 and print STDERR "Not firing any event for this =end $content because in an ignored region\n";
1410					# And that may be because of this to-be-closed =for region, or some
1411					# other one, but it doesn't matter.
1412					} else {
1413					$curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
1414					# what's that for?
1415
1416					$self->{'content_seen'} \|\|= 1 unless $self->{'~tried_gen_errata'};
1417					$self->_handle_element_end( my $scratch = 'for', $para->[1]);
1418					}
1419					DEBUG > 1 and print STDERR "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
1420					pop @$curr_open;
1421
1422					return 1;
1423					}
1424
1425					sub _ponder_doc_end {
1426					my ($self,$para,$curr_open,$paras) = @_;
1427					if(@$curr_open) { # Deal with things left open
1428					DEBUG and print STDERR "Stack is nonempty at end-document: (",
1429					$self->_dump_curr_open(), ")\n";
1430
1431					DEBUG > 9 and print STDERR "Stack: ", pretty($curr_open), "\n";
1432					unshift @$paras, $self->_closers_for_all_curr_open;
1433					# Make sure there is exactly one ~end in the parastack, at the end:
1434					@$paras = grep $_->[0] ne '~end', @$paras;
1435					push @$paras, $para, $para;
1436					# We need two -- once for the next cycle where we
1437					# generate errata, and then another to be at the end
1438					# when that loop back around to process the errata.
1439					return 1;
1440
1441					} else {
1442					DEBUG and print STDERR "Okay, stack is empty now.\n";
1443					}
1444
1445					# Try generating errata section, if applicable
1446					unless($self->{'~tried_gen_errata'}) {
1447					$self->{'~tried_gen_errata'} = 1;
1448					my @extras = $self->_gen_errata();
1449					if(@extras) {
1450					unshift @$paras, @extras;
1451					DEBUG and print STDERR "Generated errata... relooping...\n";
1452					return 1; # I.e., loop around again to process these fake-o paragraphs
1453					}
1454					}
1455
1456					splice @$paras; # Well, that's that for this paragraph buffer.
1457					DEBUG and print STDERR "Throwing end-document event.\n";
1458
1459					$self->_handle_element_end( my $scratch = 'Document' );
1460					return 1; # Hasta la byebye
1461					}
1462
1463					sub _ponder_pod {
1464					my ($self,$para,$curr_open,$paras) = @_;
1465					$self->whine(
1466					$para->[1]{'start_line'},
1467					"=pod directives shouldn't be over one line long! Ignoring all "
1468					. (@$para - 2) . " lines of content"
1469					) if @$para > 3;
1470
1471					# Content ignored unless 'pod_handler' is set
1472					if (my $pod_handler = $self->{'pod_handler'}) {
1473					my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2];
1474					$line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output
1475					$pod_handler->($line, $line_num, $self);
1476					}
1477
1478					# The surrounding methods set content_seen, so let us remain consistent.
1479					# I do not know why it was not here before -- should it not be here?
1480					# $self->{'content_seen'} \|\|= 1 unless $self->{'~tried_gen_errata'};
1481
1482					return;
1483					}
1484
1485					sub _ponder_over {
1486					my ($self,$para,$curr_open,$paras) = @_;
1487					return 1 unless @$paras;
1488					my $list_type;
1489
1490					if($paras->[0][0] eq '=item') { # most common case
1491					$list_type = $self->_get_initial_item_type($paras->[0]);
1492
1493					} elsif($paras->[0][0] eq '=back') {
1494					# Ignore empty lists by default
1495					if ($self->{'parse_empty_lists'}) {
1496					$list_type = 'empty';
1497					} else {
1498					shift @$paras;
1499					return 1;
1500					}
1501					} elsif($paras->[0][0] eq '~end') {
1502					$self->whine(
1503					$para->[1]{'start_line'},
1504					"=over is the last thing in the document?!"
1505					);
1506					return 1; # But feh, ignore it.
1507					} else {
1508					$list_type = 'block';
1509					}
1510					$para->[1]{'~type'} = $list_type;
1511					push @$curr_open, $para;
1512					# yes, we reuse the paragraph as a stack item
1513
1514					my $content = join ' ', splice @$para, 2;
1515					$para->[1]{'~orig_content'} = $content;
1516					my $overness;
1517					if($content =~ m/^\s*$/s) {
1518					$para->[1]{'indent'} = 4;
1519					} elsif($content =~ m/^\s((?:\d\.)?\d+)\s*$/s) {
1520	2	2.83ms	2	15µs	# spent 13µs (10+2) within Pod::Simple::BlackBox::BEGIN@1520 which was called: # once (10µs+2µs) by Pod::Simple::LinkSection::BEGIN@7 at line 1520 no integer; # spent 13µs making 1 call to Pod::Simple::BlackBox::BEGIN@1520 # spent 2µs making 1 call to integer::unimport
1521					$para->[1]{'indent'} = $1;
1522					if($1 == 0) {
1523					$self->whine(
1524					$para->[1]{'start_line'},
1525					"Can't have a 0 in =over $content"
1526					);
1527					$para->[1]{'indent'} = 4;
1528					}
1529					} else {
1530					$self->whine(
1531					$para->[1]{'start_line'},
1532					"=over should be: '=over' or '=over positive_number'"
1533					);
1534					$para->[1]{'indent'} = 4;
1535					}
1536					DEBUG > 1 and print STDERR "=over found of type $list_type\n";
1537
1538					$self->{'content_seen'} \|\|= 1 unless $self->{'~tried_gen_errata'};
1539					$self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
1540
1541					return;
1542					}
1543
1544					sub _ponder_back {
1545					my ($self,$para,$curr_open,$paras) = @_;
1546					# TODO: fire off </item-number> or </item-bullet> or </item-text> ??
1547
1548					my $content = join ' ', splice @$para, 2;
1549					if($content =~ m/\S/) {
1550					$self->whine(
1551					$para->[1]{'start_line'},
1552					"=back doesn't take any parameters, but you said =back $content"
1553					);
1554					}
1555
1556					if(@$curr_open and $curr_open->[-1][0] eq '=over') {
1557					DEBUG > 1 and print STDERR "=back happily closes matching =over\n";
1558					# Expected case: we're closing the most recently opened thing
1559					#my $over = pop @$curr_open;
1560					$self->{'content_seen'} \|\|= 1 unless $self->{'~tried_gen_errata'};
1561					$self->_handle_element_end( my $scratch =
1562					'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1]
1563					);
1564					} else {
1565					DEBUG > 1 and print STDERR "=back found without a matching =over. Stack: (",
1566					join(', ', map $_->[0], @$curr_open), ").\n";
1567					$self->whine(
1568					$para->[1]{'start_line'},
1569					'=back without =over'
1570					);
1571					return 1; # and ignore it
1572					}
1573					}
1574
1575					sub _ponder_item {
1576					my ($self,$para,$curr_open,$paras) = @_;
1577					my $over;
1578					unless(@$curr_open and
1579					$over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
1580					$self->whine(
1581					$para->[1]{'start_line'},
1582					"'=item' outside of any '=over'"
1583					);
1584					unshift @$paras,
1585					['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
1586					$para
1587					;
1588					return 1;
1589					}
1590
1591
1592					my $over_type = $over->[1]{'~type'};
1593
1594					if(!$over_type) {
1595					# Shouldn't happen1
1596					die "Typeless over in stack, starting at line "
1597					. $over->[1]{'start_line'};
1598
1599					} elsif($over_type eq 'block') {
1600					unless($curr_open->[-1][1]{'~bitched_about'}) {
1601					$curr_open->[-1][1]{'~bitched_about'} = 1;
1602					$self->whine(
1603					$curr_open->[-1][1]{'start_line'},
1604					"You can't have =items (as at line "
1605					. $para->[1]{'start_line'}
1606					. ") unless the first thing after the =over is an =item"
1607					);
1608					}
1609					# Just turn it into a paragraph and reconsider it
1610					$para->[0] = '~Para';
1611					unshift @$paras, $para;
1612					return 1;
1613
1614					} elsif($over_type eq 'text') {
1615					my $item_type = $self->_get_item_type($para);
1616					# That kills the content of the item if it's a number or bullet.
1617					DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1618
1619					if($item_type eq 'text') {
1620					# Nothing special needs doing for 'text'
1621					} elsif($item_type eq 'number' or $item_type eq 'bullet') {
1622					$self->whine(
1623					$para->[1]{'start_line'},
1624					"Expected text after =item, not a $item_type"
1625					);
1626					# Undo our clobbering:
1627					push @$para, $para->[1]{'~orig_content'};
1628					delete $para->[1]{'number'};
1629					# Only a PROPER item-number element is allowed
1630					# to have a number attribute.
1631					} else {
1632					die "Unhandled item type $item_type"; # should never happen
1633					}
1634
1635					# =item-text thingies don't need any assimilation, it seems.
1636
1637					} elsif($over_type eq 'number') {
1638					my $item_type = $self->_get_item_type($para);
1639					# That kills the content of the item if it's a number or bullet.
1640					DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1641
1642					my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1643
1644					if($item_type eq 'bullet') {
1645					# Hm, it's not numeric. Correct for this.
1646					$para->[1]{'number'} = $expected_value;
1647					$self->whine(
1648					$para->[1]{'start_line'},
1649					"Expected '=item $expected_value'"
1650					);
1651					push @$para, $para->[1]{'~orig_content'};
1652					# restore the bullet, blocking the assimilation of next para
1653
1654					} elsif($item_type eq 'text') {
1655					# Hm, it's not numeric. Correct for this.
1656					$para->[1]{'number'} = $expected_value;
1657					$self->whine(
1658					$para->[1]{'start_line'},
1659					"Expected '=item $expected_value'"
1660					);
1661					# Text content will still be there and will block next ~Para
1662
1663					} elsif($item_type ne 'number') {
1664					die "Unknown item type $item_type"; # should never happen
1665
1666					} elsif($expected_value == $para->[1]{'number'}) {
1667					DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n";
1668
1669					} else {
1670					DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'},
1671					" instead of the expected value of $expected_value\n";
1672					$self->whine(
1673					$para->[1]{'start_line'},
1674					"You have '=item " . $para->[1]{'number'} .
1675					"' instead of the expected '=item $expected_value'"
1676					);
1677					$para->[1]{'number'} = $expected_value; # correcting!!
1678					}
1679
1680					if(@$para == 2) {
1681					# For the cases where we /didn't/ push to @$para
1682					if($paras->[0][0] eq '~Para') {
1683					DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
1684					push @$para, splice @{shift @$paras},2;
1685					} else {
1686					DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
1687					push @$para, ''; # Just so it's not contentless
1688					}
1689					}
1690
1691
1692					} elsif($over_type eq 'bullet') {
1693					my $item_type = $self->_get_item_type($para);
1694					# That kills the content of the item if it's a number or bullet.
1695					DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1696
1697					if($item_type eq 'bullet') {
1698					# as expected!
1699
1700					if( $para->[1]{'~_freaky_para_hack'} ) {
1701					DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n";
1702					push @$para, $para->[1]{'~_freaky_para_hack'};
1703					}
1704
1705					} elsif($item_type eq 'number') {
1706					$self->whine(
1707					$para->[1]{'start_line'},
1708					"Expected '=item *'"
1709					);
1710					push @$para, $para->[1]{'~orig_content'};
1711					# and block assimilation of the next paragraph
1712					delete $para->[1]{'number'};
1713					# Only a PROPER item-number element is allowed
1714					# to have a number attribute.
1715					} elsif($item_type eq 'text') {
1716					$self->whine(
1717					$para->[1]{'start_line'},
1718					"Expected '=item *'"
1719					);
1720					# But doesn't need processing. But it'll block assimilation
1721					# of the next para.
1722					} else {
1723					die "Unhandled item type $item_type"; # should never happen
1724					}
1725
1726					if(@$para == 2) {
1727					# For the cases where we /didn't/ push to @$para
1728					if($paras->[0][0] eq '~Para') {
1729					DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
1730					push @$para, splice @{shift @$paras},2;
1731					} else {
1732					DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
1733					push @$para, ''; # Just so it's not contentless
1734					}
1735					}
1736
1737					} else {
1738					die "Unhandled =over type \"$over_type\"?";
1739					# Shouldn't happen!
1740					}
1741					$para->[0] .= '-' . $over_type;
1742
1743					return;
1744					}
1745
1746					sub _ponder_Plain {
1747					my ($self,$para) = @_;
1748					DEBUG and print STDERR " giving plain treatment...\n";
1749					unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
1750					or $para->[1]{'~cooked'}
1751					) {
1752					push @$para,
1753					@{$self->_make_treelet(
1754					join("\n", splice(@$para, 2)),
1755					$para->[1]{'start_line'}
1756					)};
1757					}
1758					# Empty paragraphs don't need a treelet for any reason I can see.
1759					# And precooked paragraphs already have a treelet.
1760					return;
1761					}
1762
1763					sub _ponder_Verbatim {
1764					my ($self,$para) = @_;
1765					DEBUG and print STDERR " giving verbatim treatment...\n";
1766
1767					$para->[1]{'xml:space'} = 'preserve';
1768
1769					unless ($self->{'_output_is_for_JustPod'}) {
1770					# Fix illegal settings for expand_verbatim_tabs()
1771					# This is because this module doesn't do input error checking, but khw
1772					# doesn't want to add yet another instance of that.
1773					$self->expand_verbatim_tabs(8)
1774					if ! defined $self->expand_verbatim_tabs()
1775					\|\| $self->expand_verbatim_tabs() =~ /\D/;
1776
1777					my $indent = $self->strip_verbatim_indent;
1778					if ($indent && ref $indent eq 'CODE') {
1779					my @shifted = (shift @{$para}, shift @{$para});
1780					$indent = $indent->($para);
1781					unshift @{$para}, @shifted;
1782					}
1783
1784					for(my $i = 2; $i < @$para; $i++) {
1785					foreach my $line ($para->[$i]) { # just for aliasing
1786					# Strip indentation.
1787					$line =~ s/^\Q$indent// if $indent;
1788					next unless $self->expand_verbatim_tabs;
1789
1790					# This is commented out because of github issue #85, and the
1791					# current maintainers don't know why it was there in the first
1792					# place.
1793					#&& !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
1794					while( $line =~
1795					# Sort of adapted from Text::Tabs.
1796					s/^([^\t]*)(\t+)/$1.(" " x ((length($2)
1797					* $self->expand_verbatim_tabs)
1798					-(length($1)&7)))/e
1799					) {}
1800
1801					# TODO: whinge about (or otherwise treat) unindented or overlong lines
1802
1803					}
1804					}
1805					}
1806
1807					# Now the VerbatimFormatted hoodoo...
1808					if( $self->{'accept_codes'} and
1809					$self->{'accept_codes'}{'VerbatimFormatted'}
1810					) {
1811					while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
1812					# Kill any number of terminal newlines
1813					$self->_verbatim_format($para);
1814					} elsif ($self->{'codes_in_verbatim'}) {
1815					push @$para,
1816					@{$self->_make_treelet(
1817					join("\n", splice(@$para, 2)),
1818					$para->[1]{'start_line'}, $para->[1]{'xml:space'}
1819					)};
1820					$para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1821					} else {
1822					push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1823					$para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1824					}
1825					return;
1826					}
1827
1828					sub _ponder_Data {
1829					my ($self,$para) = @_;
1830					DEBUG and print STDERR " giving data treatment...\n";
1831					$para->[1]{'xml:space'} = 'preserve';
1832					push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1833					return;
1834					}
1835
- -
1839					###########################################################################
1840
1841					sub _traverse_treelet_bit { # for use only by the routine above
1842					my($self, $name) = splice @_,0,2;
1843
1844					my $scratch;
1845					$self->_handle_element_start(($scratch=$name), shift @_);
1846
1847					while (@_) {
1848					my $x = shift;
1849					if (ref($x)) {
1850					&_traverse_treelet_bit($self, @$x);
1851					} else {
1852					$x .= shift while @_ && !ref($_[0]);
1853					$self->_handle_text($x);
1854					}
1855					}
1856
1857					$self->_handle_element_end($scratch=$name);
1858					return;
1859					}
1860
1861					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1862
1863					sub _closers_for_all_curr_open {
1864					my $self = $_[0];
1865					my @closers;
1866					foreach my $still_open (@{ $self->{'curr_open'} \|\| return }) {
1867					my @copy = @$still_open;
1868					$copy[1] = {%{ $copy[1] }};
1869					#$copy[1]{'start_line'} = -1;
1870					if($copy[0] eq '=for') {
1871					$copy[0] = '=end';
1872					} elsif($copy[0] eq '=over') {
1873					$self->whine(
1874					$still_open->[1]{start_line} ,
1875					"=over without closing =back"
1876					);
1877
1878					$copy[0] = '=back';
1879					} else {
1880					die "I don't know how to auto-close an open $copy[0] region";
1881					}
1882
1883					unless( @copy > 2 ) {
1884					push @copy, $copy[1]{'target'};
1885					$copy[-1] = '' unless defined $copy[-1];
1886					# since =over's don't have targets
1887					}
1888
1889					$copy[1]{'fake-closer'} = 1;
1890
1891					DEBUG and print STDERR "Queuing up fake-o event: ", pretty(\@copy), "\n";
1892					unshift @closers, \@copy;
1893					}
1894					return @closers;
1895					}
1896
1897					#--------------------------------------------------------------------------
1898
1899					sub _verbatim_format {
1900					my($it, $p) = @_;
1901
1902					my $formatting;
1903
1904					for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
1905					DEBUG and print STDERR "_verbatim_format appends a newline to $i: $p->[$i]\n";
1906					$p->[$i] .= "\n";
1907					# Unlike with simple Verbatim blocks, we don't end up just doing
1908					# a join("\n", ...) on the contents, so we have to append a
1909					# newline to every line, and then nix the last one later.
1910					}
1911
1912					if( DEBUG > 4 ) {
1913					print STDERR "<<\n";
1914					for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
1915					print STDERR "_verbatim_format $i: $p->[$i]";
1916					}
1917					print STDERR ">>\n";
1918					}
1919
1920					for(my $i = $#$p; $i > 2; $i--) {
1921					# work backwards over the lines, except the first (#2)
1922
1923					#next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s
1924					# and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
1925					# look at a formatty line preceding a nonformatty one
1926					DEBUG > 5 and print STDERR "Scrutinizing line $i: $$p[$i]\n";
1927					if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) {
1928					DEBUG > 5 and print STDERR " It's a formatty line. ",
1929					"Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
1930
1931					if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
1932					DEBUG > 5 and print STDERR " Previous line is formatty! Skipping this one.\n";
1933					next;
1934					} else {
1935					DEBUG > 5 and print STDERR " Previous line is non-formatty! Yay!\n";
1936					}
1937					} else {
1938					DEBUG > 5 and print STDERR " It's not a formatty line. Ignoring\n";
1939					next;
1940					}
1941
1942					# A formatty line has to have #: in the first two columns, and uses
1943					# "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
1944					# Example:
1945					# What do you want? i like pie. [or whatever]
1946					# #:^^^^^^^^^^^^^^^^^ /////////////
1947
1948
1949					DEBUG > 4 and print STDERR "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
1950
1951					$formatting = ' ' . $1;
1952					$formatting =~ s/\s+$//s; # nix trailing whitespace
1953					unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
1954					splice @$p,$i,1; # remove this line
1955					$i--; # don't consider next line
1956					next;
1957					}
1958
1959					if( length($formatting) >= length($p->[$i-1]) ) {
1960					$formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
1961					} else {
1962					$formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
1963					}
1964					# Make $formatting and the previous line be exactly the same length,
1965					# with $formatting having a " " as the last character.
1966
1967					DEBUG > 4 and print STDERR "Formatting <$formatting> on <", $p->[$i-1], ">\n";
1968
1969
1970					my @new_line;
1971					while( $formatting =~ m{\G(( +)\|(\^+)\|(\/+)\|(\%+))}g ) {
1972					#print STDERR "Format matches $1\n";
1973
1974					if($2) {
1975					#print STDERR "SKIPPING <$2>\n";
1976					push @new_line,
1977					substr($p->[$i-1], pos($formatting)-length($1), length($1));
1978					} else {
1979					#print STDERR "SNARING $+\n";
1980					push @new_line, [
1981					(
1982					$3 ? 'VerbatimB' :
1983					$4 ? 'VerbatimI' :
1984					$5 ? 'VerbatimBI' : die("Should never get called")
1985					), {},
1986					substr($p->[$i-1], pos($formatting)-length($1), length($1))
1987					];
1988					#print STDERR "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
1989					}
1990					}
1991					my @nixed =
1992					splice @$p, $i-1, 2, @new_line; # replace myself and the next line
1993					DEBUG > 10 and print STDERR "Nixed count: ", scalar(@nixed), "\n";
1994
1995					DEBUG > 6 and print STDERR "New version of the above line is these tokens (",
1996					scalar(@new_line), "):",
1997					map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
1998					$i--; # So the next line we scrutinize is the line before the one
1999					# that we just went and formatted
2000					}
2001
2002					$p->[0] = 'VerbatimFormatted';
2003
2004					# Collapse adjacent text nodes, just for kicks.
2005					for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
2006					if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
2007					DEBUG > 5 and print STDERR "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
2008					$p->[$i] .= splice @$p, $i+1, 1; # merge
2009					--$i; # and back up
2010					}
2011					}
2012
2013					# Now look for the last text token, and remove the terminal newline
2014					for( my $i = $#$p; $i >= 2; $i-- ) {
2015					# work backwards over the tokens, even the first
2016					if( !ref($p->[$i]) ) {
2017					if($p->[$i] =~ s/\n$//s) {
2018					DEBUG > 5 and print STDERR "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
2019					} else {
2020					DEBUG > 5 and print STDERR
2021					"No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
2022					}
2023					last; # we only want the next one
2024					}
2025					}
2026
2027					return;
2028					}
2029
2030
2031					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2032
2033
2034					sub _treelet_from_formatting_codes {
2035					# Given a paragraph, returns a treelet. Full of scary tokenizing code.
2036					# Like [ '~Top', {'start_line' => $start_line},
2037					# "I like ",
2038					# [ 'B', {}, "pie" ],
2039					# "!"
2040					# ]
2041					# This illustrates the general format of a treelet. It is an array:
2042					# [0] is a scalar indicating its type. In the example above, the
2043					# types are '~Top' and 'B'
2044					# [1] is a hash of various flags about it, possibly empty
2045					# [2] - [N] are an ordered list of the subcomponents of the treelet.
2046					# Scalars are literal text, refs are sub-treelets, to
2047					# arbitrary levels. Stringifying a treelet will recursively
2048					# stringify the sub-treelets, concatentating everything
2049					# together to form the exact text of the treelet.
2050
2051					my($self, $para, $start_line, $preserve_space) = @_;
2052
2053					my $treelet = ['~Top', {'start_line' => $start_line},];
2054
2055					unless ($preserve_space \|\| $self->{'preserve_whitespace'}) {
2056					$para =~ s/\s+/ /g; # collapse and trim all whitespace first.
2057					$para =~ s/ $//;
2058					$para =~ s/^ //;
2059					}
2060
2061					# Only apparent problem the above code is that N<< >> turns into
2062					# N<< >>. But then, word wrapping does that too! So don't do that!
2063
2064
2065					# As a Start-code is encountered, the number of opening bracket '<'
2066					# characters minus 1 is pushed onto @stack (so 0 means a single bracket,
2067					# etc). When closing brackets are found in the text, at least this number
2068					# (plus the 1) will be required to mean the Start-code is terminated. When
2069					# those are found, @stack is popped.
2070					my @stack;
2071
2072					my @lineage = ($treelet);
2073					my $raw = ''; # raw content of L<> fcode before splitting/processing
2074					# XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed
2075					# into just 1 ' '. Is this the regex's doing or 'raw's? Answer is it's
2076					# the 'collapse and trim all whitespace first' lines just above.
2077					my $inL = 0;
2078
2079					DEBUG > 4 and print STDERR "Paragraph:\n$para\n\n";
2080
2081					# Here begins our frightening tokenizer RE. The following regex matches
2082					# text in four main parts:
2083					#
2084					# * Start-codes. The first alternative matches C< or C<<, the latter
2085					# followed by some whitespace. $1 will hold the entire start code
2086					# (including any space following a multiple-angle-bracket delimiter),
2087					# and $2 will hold only the additional brackets past the first in a
2088					# multiple-bracket delimiter. length($2) + 1 will be the number of
2089					# closing brackets we have to find.
2090					#
2091					# * Closing brackets. Match some amount of whitespace followed by
2092					# multiple close brackets. The logic to see if this closes anything
2093					# is down below. Note that in order to parse C<< >> correctly, we
2094					# have to use look-behind (?<=\s\s), since the match of the starting
2095					# code will have consumed the whitespace.
2096					#
2097					# * A single closing bracket, to close a simple code like C<>.
2098					#
2099					# * Something that isn't a start or end code. We have to be careful
2100					# about accepting whitespace, since perlpodspec says that any whitespace
2101					# before a multiple-bracket closing delimiter should be ignored.
2102					#
2103					while($para =~
2104					m/\G
2105					(?:
2106					# Match starting codes, including the whitespace following a
2107					# multiple-delimiter start code. $1 gets the whole start code and
2108					# $2 gets all but one of the <s in the multiple-bracket case.
2109					([A-Z]<(?:(<+)\s+)?)
2110					\|
2111					# Match multiple-bracket end codes. $3 gets the whitespace that
2112					# should be discarded before an end bracket but kept in other cases
2113					# and $4 gets the end brackets themselves. ($3 can be empty if the
2114					# construct is empty, like C<< >>, and all the white-space has been
2115					# gobbled up already, considered to be space after the opening
2116					# bracket. In this case we use look-behind to verify that there are
2117					# at least 2 spaces in a row before the ">".)
2118					(\s+\|(?<=\s\s))(>{2,})
2119					\|
2120					(\s?>) # $5: simple end-codes
2121					\|
2122					( # $6: stuff containing no start-codes or end-codes
2123					(?:
2124					[^A-Z\s>]
2125					\|
2126					(?:
2127					[A-Z](?!<)
2128					)
2129					\|
2130					# whitespace is ok, but we don't want to eat the whitespace before
2131					# a multiple-bracket end code.
2132					# NOTE: we may still have problems with e.g. S<< >>
2133					(?:
2134					\s(?!\s*>{2,})
2135					)
2136					)+
2137					)
2138					)
2139					/xgo
2140					) {
2141					DEBUG > 4 and print STDERR "\nParagraphic tokenstack = (@stack)\n";
2142					if(defined $1) {
2143					my $bracket_count; # How many '<<<' in a row this has. Needed for
2144					# Pod::Simple::JustPod
2145					if(defined $2) {
2146					DEBUG > 3 and print STDERR "Found complex start-text code \"$1\"\n";
2147					$bracket_count = length($2) + 1;
2148					push @stack, $bracket_count; # length of the necessary complex
2149					# end-code string
2150					} else {
2151					DEBUG > 3 and print STDERR "Found simple start-text code \"$1\"\n";
2152					push @stack, 0; # signal that we're looking for simple
2153					$bracket_count = 1;
2154					}
2155					my $code = substr($1,0,1);
2156					if ('L' eq $code) {
2157					if ($inL) {
2158					$raw .= $1;
2159					$self->scream( $start_line,
2160					'Nested L<> are illegal. Pretending inner one is '
2161					. 'X<...> so can continue looking for other errors.');
2162					$code = "X";
2163					}
2164					else {
2165					$raw = ""; # reset raw content accumulator
2166					$inL = @stack;
2167					}
2168					} else {
2169					$raw .= $1 if $inL;
2170					}
2171					push @lineage, [ $code, {}, ]; # new node object
2172
2173					# Tell Pod::Simple::JustPod how many brackets there were, but to save
2174					# space, not in the most usual case of there was just 1. It can be
2175					# inferred by the absence of this element. Similarly, if there is more
2176					# than one bracket, extract the white space between the final bracket
2177					# and the real beginning of the interior. Save that if it isn't just a
2178					# single space
2179					if ($self->{'_output_is_for_JustPod'} && $bracket_count > 1) {
2180					$lineage[-1][1]{'~bracket_count'} = $bracket_count;
2181					my $lspacer = substr($1, 1 + $bracket_count);
2182					$lineage[-1][1]{'~lspacer'} = $lspacer if $lspacer ne " ";
2183					}
2184					push @{ $lineage[-2] }, $lineage[-1];
2185					} elsif(defined $4) {
2186					DEBUG > 3 and print STDERR "Found apparent complex end-text code \"$3$4\"\n";
2187					# This is where it gets messy...
2188					if(! @stack) {
2189					# We saw " >>>>" but needed nothing. This is ALL just stuff then.
2190					DEBUG > 4 and print STDERR " But it's really just stuff.\n";
2191					push @{ $lineage[-1] }, $3, $4;
2192					next;
2193					} elsif(!$stack[-1]) {
2194					# We saw " >>>>" but needed only ">". Back pos up.
2195					DEBUG > 4 and print STDERR " And that's more than we needed to close simple.\n";
2196					push @{ $lineage[-1] }, $3; # That was a for-real space, too.
2197					pos($para) = pos($para) - length($4) + 1;
2198					} elsif($stack[-1] == length($4)) {
2199					# We found " >>>>", and it was exactly what we needed. Commonest case.
2200					DEBUG > 4 and print STDERR " And that's exactly what we needed to close complex.\n";
2201					} elsif($stack[-1] < length($4)) {
2202					# We saw " >>>>" but needed only " >>". Back pos up.
2203					DEBUG > 4 and print STDERR " And that's more than we needed to close complex.\n";
2204					pos($para) = pos($para) - length($4) + $stack[-1];
2205					} else {
2206					# We saw " >>>>" but needed " >>>>>>". So this is all just stuff!
2207					DEBUG > 4 and print STDERR " But it's really just stuff, because we needed more.\n";
2208					push @{ $lineage[-1] }, $3, $4;
2209					next;
2210					}
2211					#print STDERR "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
2212
2213					if ($3 ne " " && $self->{'_output_is_for_JustPod'}) {
2214					if ($3 ne "") {
2215					$lineage[-1][1]{'~rspacer'} = $3;
2216					}
2217					elsif ($lineage[-1][1]{'~lspacer'} eq " ") {
2218
2219					# Here we had something like C<< >> which was a false positive
2220					delete $lineage[-1][1]{'~lspacer'};
2221					}
2222					else {
2223					$lineage[-1][1]{'~rspacer'}
2224					= substr($lineage[-1][1]{'~lspacer'}, -1, 1);
2225					chop $lineage[-1][1]{'~lspacer'};
2226					}
2227					}
2228
2229					push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
2230					# Keep the element from being childless
2231
2232					if ($inL == @stack) {
2233					$lineage[-1][1]{'raw'} = $raw;
2234					$inL = 0;
2235					}
2236
2237					pop @stack;
2238					pop @lineage;
2239
2240					$raw .= $3.$4 if $inL;
2241
2242					} elsif(defined $5) {
2243					DEBUG > 3 and print STDERR "Found apparent simple end-text code \"$5\"\n";
2244
2245					if(@stack and ! $stack[-1]) {
2246					# We're indeed expecting a simple end-code
2247					DEBUG > 4 and print STDERR " It's indeed an end-code.\n";
2248
2249					if(length($5) == 2) { # There was a space there: " >"
2250					push @{ $lineage[-1] }, ' ';
2251					} elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
2252					push @{ $lineage[-1] }, ''; # keep it from being really childless
2253					}
2254
2255					if ($inL == @stack) {
2256					$lineage[-1][1]{'raw'} = $raw;
2257					$inL = 0;
2258					}
2259
2260					pop @stack;
2261					pop @lineage;
2262					} else {
2263					DEBUG > 4 and print STDERR " It's just stuff.\n";
2264					push @{ $lineage[-1] }, $5;
2265					}
2266
2267					$raw .= $5 if $inL;
2268
2269					} elsif(defined $6) {
2270					DEBUG > 3 and print STDERR "Found stuff \"$6\"\n";
2271					push @{ $lineage[-1] }, $6;
2272					$raw .= $6 if $inL;
2273					# XXX does not capture multiplace whitespaces -- 'raw' ends up with
2274					# at most 1 leading/trailing whitespace, why not all of it?
2275					# Answer, because we deliberately trimmed it above
2276
2277					} else {
2278					# should never ever ever ever happen
2279					DEBUG and print STDERR "AYYAYAAAAA at line ", __LINE__, "\n";
2280					die "SPORK 512512!";
2281					}
2282					}
2283
2284					if(@stack) { # Uhoh, some sequences weren't closed.
2285					my $x= "...";
2286					while(@stack) {
2287					push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
2288					# Hmmmmm!
2289
2290					my $code = (pop @lineage)->[0];
2291					my $ender_length = pop @stack;
2292					if($ender_length) {
2293					--$ender_length;
2294					$x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
2295					} else {
2296					$x = $code . "<$x>";
2297					}
2298					}
2299					DEBUG > 1 and print STDERR "Unterminated $x sequence\n";
2300					$self->whine($start_line,
2301					"Unterminated $x sequence",
2302					);
2303					}
2304
2305					return $treelet;
2306					}
2307
2308					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2309
2310					sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol)
2311					return stringify_lol($_[1]);
2312					}
2313
2314					sub stringify_lol { # function: stringify_lol($lol)
2315					my $string_form = '';
2316					_stringify_lol( $_[0] => \$string_form );
2317					return $string_form;
2318					}
2319
2320					sub _stringify_lol { # the real recursor
2321					my($lol, $to) = @_;
2322					for(my $i = 2; $i < @$lol; ++$i) {
2323					if( ref($lol->[$i] \|\| '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
2324					_stringify_lol( $lol->[$i], $to); # recurse!
2325					} else {
2326					$$to .= $lol->[$i];
2327					}
2328					}
2329					return;
2330					}
2331
2332					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2333
2334					sub _dump_curr_open { # return a string representation of the stack
2335					my $curr_open = $_[0]{'curr_open'};
2336
2337					return '[empty]' unless @$curr_open;
2338					return join '; ',
2339					map {;
2340					($_->[0] eq '=for')
2341					? ( ($_->[1]{'~really'} \|\| '=over')
2342					. ' ' . $_->[1]{'target'})
2343					: $_->[0]
2344					}
2345					@$curr_open
2346					;
2347					}
2348
2349					###########################################################################
2350	1	5µs			my %pretty_form = (
2351					"\a" => '\a', # ding!
2352					"\b" => '\b', # BS
2353					"\e" => '\e', # ESC
2354					"\f" => '\f', # FF
2355					"\t" => '\t', # tab
2356					"\cm" => '\cm',
2357					"\cj" => '\cj',
2358					"\n" => '\n', # probably overrides one of either \cm or \cj
2359					'"' => '\"',
2360					'\\' => '\\\\',
2361					'$' => '\\$',
2362					'@' => '\\@',
2363					'%' => '\\%',
2364					'#' => '\\#',
2365					);
2366
2367					sub pretty { # adopted from Class::Classless
2368					# Not the most brilliant routine, but passable.
2369					# Don't give it a cyclic data structure!
2370					my @stuff = @_; # copy
2371					my $x;
2372					my $out =
2373					# join ",\n" .
2374					join ", ",
2375					map {;
2376					if(!defined($_)) {
2377					"undef";
2378					} elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
2379					$x = "[ " . pretty(@$_) . " ]" ;
2380					$x;
2381					} elsif(ref($_) eq 'SCALAR') {
2382					$x = "\\" . pretty($$_) ;
2383					$x;
2384					} elsif(ref($_) eq 'HASH') {
2385					my $hr = $_;
2386					$x = "{" . join(", ",
2387					map(pretty($_) . '=>' . pretty($hr->{$_}),
2388					sort keys %$hr ) ) . "}" ;
2389					$x;
2390					} elsif(!length($_)) { q{''} # empty string
2391					} elsif(
2392					$_ eq '0' # very common case
2393					or(
2394					m/^-?(?:[123456789]\d*\|0)(?:\.\d+)?$/s
2395					and $_ ne '-0' # the strange case that RE lets thru
2396					)
2397					) { $_;
2398					} else {
2399					# Yes, explicitly name every character desired. There are shorcuts one
2400					# could make, but I (Karl Williamson) was afraid that some Perl
2401					# releases would have bugs in some of them. For example [A-Z] works
2402					# even on EBCDIC platforms to match exactly the 26 uppercase English
2403					# letters, but I don't know if it has always worked without bugs. It
2404					# seemed safest just to list the characters.
2405					# s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
2406					s<([^ !"#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{\|}~])>
2407					<$pretty_form{$1} \|\| '\\x{'.sprintf("%x", ord($1)).'}'>eg;
2408					#<$pretty_form{$1} \|\| '\\x'.(unpack("H2",$1))>eg;
2409					qq{"$_"};
2410					}
2411					} @stuff;
2412					# $out =~ s/\n */ /g if length($out) < 75;
2413					return $out;
2414					}
2415
2416					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2417
2418					# A rather unsubtle method of blowing away all the state information
2419					# from a parser object so it can be reused. Provided as a utility for
2420					# backward compatibility in Pod::Man, etc. but not recommended for
2421					# general use.
2422
2423					sub reinit {
2424					my $self = shift;
2425					foreach (qw(source_dead source_filename doc_has_started
2426					start_of_pod_block content_seen last_was_blank paras curr_open
2427					line_count pod_para_count in_pod ~tried_gen_errata all_errata errata errors_seen
2428					Title)) {
2429
2430					delete $self->{$_};
2431					}
2432					}
2433
2434					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2435	1	12µs			1;
2436

					# spent 66µs within Pod::Simple::BlackBox::CORE:match which was called 9 times, avg 7µs/call: # once (20µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 37)[Pod/Simple/BlackBox.pm:44] # once (10µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 32)[Pod/Simple/BlackBox.pm:44] # once (10µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 34)[Pod/Simple/BlackBox.pm:44] # once (7µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 41)[Pod/Simple/BlackBox.pm:44] # once (7µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 39)[Pod/Simple/BlackBox.pm:44] # once (6µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 43)[Pod/Simple/BlackBox.pm:44] # once (5µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 30)[Pod/Simple/BlackBox.pm:44] # once (1µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 28)[Pod/Simple/BlackBox.pm:44] # once (1µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 45)[Pod/Simple/BlackBox.pm:44] sub Pod::Simple::BlackBox::CORE:match; # opcode
					# spent 8µs within Pod::Simple::BlackBox::CORE:qr which was called 10 times, avg 840ns/call: # once (2µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 44)[Pod/Simple/BlackBox.pm:40] # once (1µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 27)[Pod/Simple/BlackBox.pm:40] # once (700ns+0s) by Pod::Simple::LinkSection::BEGIN@7 at line 2 of (eval 35)[Pod/Simple/BlackBox.pm:74] # once (700ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 31)[Pod/Simple/BlackBox.pm:40] # once (600ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 42)[Pod/Simple/BlackBox.pm:40] # once (600ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 36)[Pod/Simple/BlackBox.pm:40] # once (600ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 29)[Pod/Simple/BlackBox.pm:40] # once (600ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 40)[Pod/Simple/BlackBox.pm:40] # once (600ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 38)[Pod/Simple/BlackBox.pm:40] # once (600ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 33)[Pod/Simple/BlackBox.pm:40] sub Pod::Simple::BlackBox::CORE:qr; # opcode