| Filename | /usr/share/perl/5.36/Pod/Simple/BlackBox.pm |
| Statements | Executed 105 statements in 7.66ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 9 | 9 | 2 | 622µs | 883µs | Pod::Simple::BlackBox::my_qr |
| 9 | 9 | 9 | 66µs | 66µs | Pod::Simple::BlackBox::CORE:match (opcode) |
| 1 | 1 | 1 | 11µs | 15µs | Pod::Simple::BlackBox::BEGIN@289 |
| 1 | 1 | 1 | 10µs | 13µs | Pod::Simple::BlackBox::BEGIN@1520 |
| 10 | 10 | 10 | 8µs | 8µs | Pod::Simple::BlackBox::CORE:qr (opcode) |
| 1 | 1 | 1 | 7µs | 8µs | Pod::Simple::BlackBox::BEGIN@21 |
| 1 | 1 | 1 | 5µs | 14µs | Pod::Simple::BlackBox::BEGIN@273 |
| 1 | 1 | 1 | 4µs | 16µs | Pod::Simple::BlackBox::BEGIN@67 |
| 1 | 1 | 1 | 3µs | 4µs | Pod::Simple::BlackBox::BEGIN@22 |
| 1 | 1 | 1 | 3µs | 19µs | Pod::Simple::BlackBox::BEGIN@24 |
| 1 | 1 | 1 | 3µs | 3µs | Pod::Simple::BlackBox::BEGIN@55 |
| 1 | 1 | 1 | 2µs | 2µs | Pod::Simple::BlackBox::BEGIN@23 |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_closers_for_all_curr_open |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_dump_curr_open |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_gen_errata |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_handle_encoding_line |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_handle_encoding_second_level |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_Data |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_Plain |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_Verbatim |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_back |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_begin |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_doc_end |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_end |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_for |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_item |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_over |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_paragraph_buffer |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_pod |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_stringify_lol |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_traverse_treelet_bit |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_treelet_from_formatting_codes |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_verbatim_format |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::parse_line |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::parse_lines |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::pretty |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::reinit |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::stringify_lol |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::text_content_of_treelet |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package Pod::Simple::BlackBox; | ||||
| 2 | # | ||||
| 3 | # "What's in the box?" "Pain." | ||||
| 4 | # | ||||
| 5 | ########################################################################### | ||||
| 6 | # | ||||
| 7 | # This is where all the scary things happen: parsing lines into | ||||
| 8 | # paragraphs; and then into directives, verbatims, and then also | ||||
| 9 | # turning formatting sequences into treelets. | ||||
| 10 | # | ||||
| 11 | # Are you really sure you want to read this code? | ||||
| 12 | # | ||||
| 13 | #----------------------------------------------------------------------------- | ||||
| 14 | # | ||||
| 15 | # The basic work of this module Pod::Simple::BlackBox is doing the dirty work | ||||
| 16 | # of parsing Pod into treelets (generally one per non-verbatim paragraph), and | ||||
| 17 | # to call the proper callbacks on the treelets. | ||||
| 18 | # | ||||
| 19 | # Every node in a treelet is a ['name', {attrhash}, ...children...] | ||||
| 20 | |||||
| 21 | 2 | 13µs | 2 | 9µs | # spent 8µs (7+900ns) within Pod::Simple::BlackBox::BEGIN@21 which was called:
# once (7µs+900ns) by Pod::Simple::LinkSection::BEGIN@7 at line 21 # spent 8µs making 1 call to Pod::Simple::BlackBox::BEGIN@21
# spent 900ns making 1 call to integer::import |
| 22 | 2 | 12µs | 2 | 4µs | # spent 4µs (3+600ns) within Pod::Simple::BlackBox::BEGIN@22 which was called:
# once (3µs+600ns) by Pod::Simple::LinkSection::BEGIN@7 at line 22 # spent 4µs making 1 call to Pod::Simple::BlackBox::BEGIN@22
# spent 600ns making 1 call to strict::import |
| 23 | 2 | 12µs | 1 | 2µs | # spent 2µs within Pod::Simple::BlackBox::BEGIN@23 which was called:
# once (2µs+0s) by Pod::Simple::LinkSection::BEGIN@7 at line 23 # spent 2µs making 1 call to Pod::Simple::BlackBox::BEGIN@23 |
| 24 | 2 | 89µs | 2 | 34µs | # spent 19µs (3+16) within Pod::Simple::BlackBox::BEGIN@24 which was called:
# once (3µs+16µs) by Pod::Simple::LinkSection::BEGIN@7 at line 24 # spent 19µs making 1 call to Pod::Simple::BlackBox::BEGIN@24
# spent 16µs making 1 call to vars::import |
| 25 | 1 | 300ns | $VERSION = '3.43'; | ||
| 26 | #use constant DEBUG => 7; | ||||
| 27 | |||||
| 28 | # spent 883µs (622+261) within Pod::Simple::BlackBox::my_qr which was called 9 times, avg 98µs/call:
# once (96µs+35µs) by Pod::Text::BEGIN@26 at line 351 of Pod/Simple.pm
# once (87µs+38µs) by Pod::Simple::LinkSection::BEGIN@7 at line 63
# once (77µs+25µs) by Pod::Simple::LinkSection::BEGIN@7 at line 88
# once (58µs+37µs) by Pod::Simple::LinkSection::BEGIN@7 at line 76
# once (69µs+24µs) by Pod::Simple::LinkSection::BEGIN@7 at line 67
# once (60µs+28µs) by Pod::Simple::LinkSection::BEGIN@7 at line 68
# once (63µs+24µs) by Pod::Simple::LinkSection::BEGIN@7 at line 85
# once (59µs+27µs) by Pod::Simple::LinkSection::BEGIN@7 at line 70
# once (54µs+22µs) by Pod::Simple::LinkSection::BEGIN@7 at line 92 | ||||
| 29 | |||||
| 30 | # $1 is a pattern to compile and return. Older perls compile any | ||||
| 31 | # syntactically valid property, even if it isn't legal. To cope with | ||||
| 32 | # this, return an empty string unless the compiled pattern also | ||||
| 33 | # successfully matches $2, which the caller furnishes. | ||||
| 34 | |||||
| 35 | 9 | 3µs | my ($input_re, $should_match) = @_; | ||
| 36 | # XXX could have a third parameter $shouldnt_match for extra safety | ||||
| 37 | |||||
| 38 | 9 | 16µs | my $use_utf8 = ($] le 5.006002) ? 'use utf8;' : ""; | ||
| 39 | |||||
| 40 | 9 | 154µs | my $re = eval "no warnings; $use_utf8 qr/$input_re/"; # spent 34µs executing statements in string eval # includes 8µs spent executing 1 call to 1 sub defined therein. # spent 29µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 25µs executing statements in string eval # includes 8µs spent executing 1 call to 1 sub defined therein. # spent 20µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 18µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 17µs executing statements in string eval # includes 3µs spent executing 1 call to 1 sub defined therein. # spent 16µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 15µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 15µs executing statements in string eval # includes 3µs spent executing 1 call to 1 sub defined therein. | ||
| 41 | #print STDERR __LINE__, ": $input_re: $@\n" if $@; | ||||
| 42 | 9 | 1µs | return "" if $@; | ||
| 43 | |||||
| 44 | 9 | 140µs | my $matches = eval "no warnings; $use_utf8 '$should_match' =~ /$re/"; # spent 38µs executing statements in string eval # includes 3µs spent executing 1 call to 1 sub defined therein. # spent 31µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 31µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 28µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 27µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 25µs executing statements in string eval # includes 5µs spent executing 1 call to 1 sub defined therein. # spent 24µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 21µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. # spent 21µs executing statements in string eval # includes 6µs spent executing 1 call to 1 sub defined therein. | ||
| 45 | #print STDERR __LINE__, ": $input_re: $@\n" if $@; | ||||
| 46 | 9 | 1µs | return "" if $@; | ||
| 47 | |||||
| 48 | #print STDERR __LINE__, ": SUCCESS: $re\n" if $matches; | ||||
| 49 | 9 | 16µs | return $re if $matches; | ||
| 50 | |||||
| 51 | #print STDERR __LINE__, ": $re: didn't match\n"; | ||||
| 52 | return ""; | ||||
| 53 | } | ||||
| 54 | |||||
| 55 | # spent 3µs within Pod::Simple::BlackBox::BEGIN@55 which was called:
# once (3µs+0s) by Pod::Simple::LinkSection::BEGIN@7 at line 58 | ||||
| 56 | 1 | 500ns | require Pod::Simple; | ||
| 57 | 1 | 3µs | *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG | ||
| 58 | 1 | 38µs | 1 | 3µs | } # spent 3µs making 1 call to Pod::Simple::BlackBox::BEGIN@55 |
| 59 | |||||
| 60 | # Matches a character iff the character will have a different meaning | ||||
| 61 | # if we choose CP1252 vs UTF-8 if there is no =encoding line. | ||||
| 62 | # This is broken for early Perls on non-ASCII platforms. | ||||
| 63 | 1 | 2µs | 1 | 125µs | my $non_ascii_re = my_qr('[[:^ascii:]]', "\xB6"); # spent 125µs making 1 call to Pod::Simple::BlackBox::my_qr |
| 64 | 1 | 200ns | $non_ascii_re = qr/[\x80-\xFF]/ unless $non_ascii_re; | ||
| 65 | |||||
| 66 | # Use patterns understandable by Perl 5.6, if possible | ||||
| 67 | 4 | 481µs | 3 | 121µs | # spent 16µs (4+12) within Pod::Simple::BlackBox::BEGIN@67 which was called:
# once (4µs+12µs) by Pod::Simple::LinkSection::BEGIN@7 at line 67 # spent 93µs making 1 call to Pod::Simple::BlackBox::my_qr
# spent 16µs making 1 call to Pod::Simple::BlackBox::BEGIN@67
# spent 12µs making 1 call to warnings::unimport |
| 68 | 1 | 800ns | 1 | 87µs | my $cn_re = my_qr('\p{IsCn}', "\x{09E4}"); # <reserved> code point unlikely # spent 87µs making 1 call to Pod::Simple::BlackBox::my_qr |
| 69 | # to get assigned | ||||
| 70 | 1 | 800ns | 1 | 86µs | my $rare_blocks_re = my_qr('[\p{InIPAExtensions}\p{InSpacingModifierLetters}]', # spent 86µs making 1 call to Pod::Simple::BlackBox::my_qr |
| 71 | "\x{250}"); | ||||
| 72 | 1 | 100ns | $rare_blocks_re = my_qr('[\x{0250}-\x{02FF}]', "\x{250}") unless $rare_blocks_re; | ||
| 73 | |||||
| 74 | 1 | 16µs | my $script_run_re = eval 'no warnings "experimental::script_run"; # spent 18µs executing statements in string eval # includes 4µs spent executing 1 call to 1 sub defined therein. | ||
| 75 | qr/(*script_run: ^ .* $ )/x'; | ||||
| 76 | 1 | 900ns | 1 | 95µs | my $latin_re = my_qr('[\p{IsLatin}\p{IsInherited}\p{IsCommon}]', "\x{100}"); # spent 95µs making 1 call to Pod::Simple::BlackBox::my_qr |
| 77 | 1 | 200ns | unless ($latin_re) { | ||
| 78 | # This was machine generated to be the ranges of the union of the above | ||||
| 79 | # three properties, with things that were undefined by Unicode 4.1 filling | ||||
| 80 | # gaps. That is the version in use when Perl advanced enough to | ||||
| 81 | # successfully compile and execute the above pattern. | ||||
| 82 | $latin_re = my_qr('[\x00-\x{02E9}\x{02EC}-\x{0374}\x{037E}\x{0385}\x{0387}\x{0485}\x{0486}\x{0589}\x{060C}\x{061B}\x{061F}\x{0640}\x{064B}-\x{0655}\x{0670}\x{06DD}\x{0951}-\x{0954}\x{0964}\x{0965}\x{0E3F}\x{10FB}\x{16EB}-\x{16ED}\x{1735}\x{1736}\x{1802}\x{1803}\x{1805}\x{1D00}-\x{1D25}\x{1D2C}-\x{1D5C}\x{1D62}-\x{1D65}\x{1D6B}-\x{1D77}\x{1D79}-\x{1DBE}\x{1DC0}-\x{1EF9}\x{2000}-\x{2125}\x{2127}-\x{27FF}\x{2900}-\x{2B13}\x{2E00}-\x{2E1D}\x{2FF0}-\x{3004}\x{3006}\x{3008}-\x{3020}\x{302A}-\x{302D}\x{3030}-\x{3037}\x{303C}-\x{303F}\x{3099}-\x{309C}\x{30A0}\x{30FB}\x{30FC}\x{3190}-\x{319F}\x{31C0}-\x{31CF}\x{3220}-\x{325F}\x{327F}-\x{32CF}\x{3358}-\x{33FF}\x{4DC0}-\x{4DFF}\x{A700}-\x{A716}\x{FB00}-\x{FB06}\x{FD3E}\x{FD3F}\x{FE00}-\x{FE6B}\x{FEFF}-\x{FF65}\x{FF70}\x{FF9E}\x{FF9F}\x{FFE0}-\x{FFFD}\x{10100}-\x{1013F}\x{1D000}-\x{1D1DD}\x{1D300}-\x{1D7FF}]', "\x{100}"); | ||||
| 83 | } | ||||
| 84 | |||||
| 85 | 1 | 1µs | 1 | 87µs | my $every_char_is_latin_re = my_qr("^(?:$latin_re)*\\z", "A"); # spent 87µs making 1 call to Pod::Simple::BlackBox::my_qr |
| 86 | |||||
| 87 | # Latin script code points not in the first release of Unicode | ||||
| 88 | 1 | 800ns | 1 | 103µs | my $later_latin_re = my_qr('[^\P{IsLatin}\p{IsAge=1.1}]', "\x{1F6}"); # spent 103µs making 1 call to Pod::Simple::BlackBox::my_qr |
| 89 | |||||
| 90 | # If this perl doesn't have the Deprecated property, there's only one code | ||||
| 91 | # point in it that we need be concerned with. | ||||
| 92 | 1 | 700ns | 1 | 76µs | my $deprecated_re = my_qr('\p{IsDeprecated}', "\x{149}"); # spent 76µs making 1 call to Pod::Simple::BlackBox::my_qr |
| 93 | 1 | 200ns | $deprecated_re = qr/\x{149}/ unless $deprecated_re; | ||
| 94 | |||||
| 95 | 1 | 200ns | my $utf8_bom; | ||
| 96 | 1 | 2µs | if (($] ge 5.007_003)) { | ||
| 97 | 1 | 300ns | $utf8_bom = "\x{FEFF}"; | ||
| 98 | 1 | 3µs | 1 | 700ns | utf8::encode($utf8_bom); # spent 700ns making 1 call to utf8::encode |
| 99 | } else { | ||||
| 100 | $utf8_bom = "\xEF\xBB\xBF"; # No EBCDIC BOM detection for early Perls. | ||||
| 101 | } | ||||
| 102 | |||||
| 103 | # This is used so that the 'content_seen' method doesn't return true on a | ||||
| 104 | # file that just happens to have a line that matches /^=[a-zA-z]/. Only if | ||||
| 105 | # there is a valid =foo line will we return that content was seen. | ||||
| 106 | 1 | 100ns | my $seen_legal_directive = 0; | ||
| 107 | |||||
| 108 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 109 | |||||
| 110 | sub parse_line { shift->parse_lines(@_) } # alias | ||||
| 111 | |||||
| 112 | # - - - Turn back now! Run away! - - - | ||||
| 113 | |||||
| 114 | sub parse_lines { # Usage: $parser->parse_lines(@lines) | ||||
| 115 | # an undef means end-of-stream | ||||
| 116 | my $self = shift; | ||||
| 117 | |||||
| 118 | my $code_handler = $self->{'code_handler'}; | ||||
| 119 | my $cut_handler = $self->{'cut_handler'}; | ||||
| 120 | my $wl_handler = $self->{'whiteline_handler'}; | ||||
| 121 | $self->{'line_count'} ||= 0; | ||||
| 122 | |||||
| 123 | my $scratch; | ||||
| 124 | |||||
| 125 | DEBUG > 4 and | ||||
| 126 | print STDERR "# Parsing starting at line ", $self->{'line_count'}, ".\n"; | ||||
| 127 | |||||
| 128 | DEBUG > 5 and | ||||
| 129 | print STDERR "# About to parse lines: ", | ||||
| 130 | join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; | ||||
| 131 | |||||
| 132 | my $paras = ($self->{'paras'} ||= []); | ||||
| 133 | # paragraph buffer. Because we need to defer processing of =over | ||||
| 134 | # directives and verbatim paragraphs. We call _ponder_paragraph_buffer | ||||
| 135 | # to process this. | ||||
| 136 | |||||
| 137 | $self->{'pod_para_count'} ||= 0; | ||||
| 138 | |||||
| 139 | # An attempt to match the pod portions of a line. This is not fool proof, | ||||
| 140 | # but is good enough to serve as part of the heuristic for guessing the pod | ||||
| 141 | # encoding if not specified. | ||||
| 142 | my $codes = join '', grep { / ^ [A-Za-z] $/x } sort keys %{$self->{accept_codes}}; | ||||
| 143 | my $pod_chars_re = qr/ ^ = [A-Za-z]+ | [\Q$codes\E] < /x; | ||||
| 144 | |||||
| 145 | my $line; | ||||
| 146 | foreach my $source_line (@_) { | ||||
| 147 | if( $self->{'source_dead'} ) { | ||||
| 148 | DEBUG > 4 and print STDERR "# Source is dead.\n"; | ||||
| 149 | last; | ||||
| 150 | } | ||||
| 151 | |||||
| 152 | unless( defined $source_line ) { | ||||
| 153 | DEBUG > 4 and print STDERR "# Undef-line seen.\n"; | ||||
| 154 | |||||
| 155 | push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; | ||||
| 156 | push @$paras, $paras->[-1], $paras->[-1]; | ||||
| 157 | # So that it definitely fills the buffer. | ||||
| 158 | $self->{'source_dead'} = 1; | ||||
| 159 | $self->_ponder_paragraph_buffer; | ||||
| 160 | next; | ||||
| 161 | } | ||||
| 162 | |||||
| 163 | |||||
| 164 | if( $self->{'line_count'}++ ) { | ||||
| 165 | ($line = $source_line) =~ tr/\n\r//d; | ||||
| 166 | # If we don't have two vars, we'll end up with that there | ||||
| 167 | # tr/// modding the (potentially read-only) original source line! | ||||
| 168 | |||||
| 169 | } else { | ||||
| 170 | DEBUG > 2 and print STDERR "First line: [$source_line]\n"; | ||||
| 171 | |||||
| 172 | if( ($line = $source_line) =~ s/^$utf8_bom//s ) { | ||||
| 173 | DEBUG and print STDERR "UTF-8 BOM seen. Faking a '=encoding utf8'.\n"; | ||||
| 174 | $self->_handle_encoding_line( "=encoding utf8" ); | ||||
| 175 | delete $self->{'_processed_encoding'}; | ||||
| 176 | $line =~ tr/\n\r//d; | ||||
| 177 | |||||
| 178 | } elsif( $line =~ s/^\xFE\xFF//s ) { | ||||
| 179 | DEBUG and print STDERR "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; | ||||
| 180 | $self->scream( | ||||
| 181 | $self->{'line_count'}, | ||||
| 182 | "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." | ||||
| 183 | ); | ||||
| 184 | splice @_; | ||||
| 185 | push @_, undef; | ||||
| 186 | next; | ||||
| 187 | |||||
| 188 | # TODO: implement somehow? | ||||
| 189 | |||||
| 190 | } elsif( $line =~ s/^\xFF\xFE//s ) { | ||||
| 191 | DEBUG and print STDERR "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; | ||||
| 192 | $self->scream( | ||||
| 193 | $self->{'line_count'}, | ||||
| 194 | "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." | ||||
| 195 | ); | ||||
| 196 | splice @_; | ||||
| 197 | push @_, undef; | ||||
| 198 | next; | ||||
| 199 | |||||
| 200 | # TODO: implement somehow? | ||||
| 201 | |||||
| 202 | } else { | ||||
| 203 | DEBUG > 2 and print STDERR "First line is BOM-less.\n"; | ||||
| 204 | ($line = $source_line) =~ tr/\n\r//d; | ||||
| 205 | } | ||||
| 206 | } | ||||
| 207 | |||||
| 208 | if(!$self->{'parse_characters'} && !$self->{'encoding'} | ||||
| 209 | && ($self->{'in_pod'} || $line =~ /^=/s) | ||||
| 210 | && $line =~ /$non_ascii_re/ | ||||
| 211 | ) { | ||||
| 212 | |||||
| 213 | my $encoding; | ||||
| 214 | |||||
| 215 | # No =encoding line, and we are at the first pod line in the input that | ||||
| 216 | # contains a non-ascii byte, that is, one whose meaning varies depending | ||||
| 217 | # on whether the file is encoded in UTF-8 or CP1252, which are the two | ||||
| 218 | # possibilities permitted by the pod spec. (ASCII is assumed if the | ||||
| 219 | # file only contains ASCII bytes.) In order to process this line, we | ||||
| 220 | # need to figure out what encoding we will use for the file. | ||||
| 221 | # | ||||
| 222 | # Strictly speaking ISO 8859-1 (Latin 1) refers to the code points | ||||
| 223 | # 160-255, but it is used here, as it often colloquially is, to refer to | ||||
| 224 | # the complete set of code points 0-255, including ASCII (0-127), the C1 | ||||
| 225 | # controls (128-159), and strict Latin 1 (160-255). | ||||
| 226 | # | ||||
| 227 | # CP1252 is effectively a superset of Latin 1, because it differs only | ||||
| 228 | # from colloquial 8859-1 in the C1 controls, which are very unlikely to | ||||
| 229 | # actually be present in 8859-1 files, so can be used for other purposes | ||||
| 230 | # without conflict. CP 1252 uses most of them for graphic characters. | ||||
| 231 | # | ||||
| 232 | # Note that all ASCII-range bytes represent their corresponding code | ||||
| 233 | # points in both CP1252 and UTF-8. In ASCII platform UTF-8, all other | ||||
| 234 | # code points require multiple (non-ASCII) bytes to represent. (A | ||||
| 235 | # separate paragraph for EBCDIC is below.) The multi-byte | ||||
| 236 | # representation is quite structured. If we find an isolated byte that | ||||
| 237 | # would require multiple bytes to represent in UTF-8, we know that the | ||||
| 238 | # encoding is not UTF-8. If we find a sequence of bytes that violates | ||||
| 239 | # the UTF-8 structure, we also can presume the encoding isn't UTF-8, and | ||||
| 240 | # hence must be 1252. | ||||
| 241 | # | ||||
| 242 | # But there are ambiguous cases where we could guess wrong. If so, the | ||||
| 243 | # user will end up having to supply an =encoding line. We use all | ||||
| 244 | # readily available information to improve our chances of guessing | ||||
| 245 | # right. The odds of something not being UTF-8, but still passing a | ||||
| 246 | # UTF-8 validity test go down very rapidly with increasing length of the | ||||
| 247 | # sequence. Therefore we look at all non-ascii sequences on the line. | ||||
| 248 | # If any of the sequences can't be UTF-8, we quit there and choose | ||||
| 249 | # CP1252. If all could be UTF-8, we see if any of the code points | ||||
| 250 | # represented are unlikely to be in pod. If so, we guess CP1252. If | ||||
| 251 | # not, we check if the line is all in the same script; if not guess | ||||
| 252 | # CP1252; otherwise UTF-8. For perls that don't have convenient script | ||||
| 253 | # run testing, see if there is both Latin and non-Latin. If so, CP1252, | ||||
| 254 | # otherwise UTF-8. | ||||
| 255 | # | ||||
| 256 | # On EBCDIC platforms, the situation is somewhat different. In | ||||
| 257 | # UTF-EBCDIC, not only do ASCII-range bytes represent their code points, | ||||
| 258 | # but so do the bytes that are for the C1 controls. Recall that these | ||||
| 259 | # correspond to the unused portion of 8859-1 that 1252 mostly takes | ||||
| 260 | # over. That means that there are fewer code points that are | ||||
| 261 | # represented by multi-bytes. But, note that the these controls are | ||||
| 262 | # very unlikely to be in pod text. So if we encounter one of them, it | ||||
| 263 | # means that it is quite likely CP1252 and not UTF-8. The net result is | ||||
| 264 | # the same code below is used for both platforms. | ||||
| 265 | # | ||||
| 266 | # XXX probably if the line has E<foo> that evaluates to illegal CP1252, | ||||
| 267 | # then it is UTF-8. But we haven't processed E<> yet. | ||||
| 268 | |||||
| 269 | goto set_1252 if $] lt 5.006_000; # No UTF-8 on very early perls | ||||
| 270 | |||||
| 271 | my $copy; | ||||
| 272 | |||||
| 273 | 2 | 43µs | 2 | 24µs | # spent 14µs (5+9) within Pod::Simple::BlackBox::BEGIN@273 which was called:
# once (5µs+9µs) by Pod::Simple::LinkSection::BEGIN@7 at line 273 # spent 14µs making 1 call to Pod::Simple::BlackBox::BEGIN@273
# spent 9µs making 1 call to warnings::unimport |
| 274 | |||||
| 275 | if ($] ge 5.007_003) { | ||||
| 276 | $copy = $line; | ||||
| 277 | |||||
| 278 | # On perls that have this function, we can use it to easily see if the | ||||
| 279 | # sequence is valid UTF-8 or not; if valid it turns on the UTF-8 flag | ||||
| 280 | # needed below for script run detection | ||||
| 281 | goto set_1252 if ! utf8::decode($copy); | ||||
| 282 | } | ||||
| 283 | elsif (ord("A") != 65) { # Early EBCDIC, assume UTF-8. What's a windows | ||||
| 284 | # code page doing here anyway? | ||||
| 285 | goto set_utf8; | ||||
| 286 | } | ||||
| 287 | else { # ASCII, no decode(): do it ourselves using the fundamental | ||||
| 288 | # characteristics of UTF-8 | ||||
| 289 | 2 | 3.77ms | 2 | 17µs | # spent 15µs (11+4) within Pod::Simple::BlackBox::BEGIN@289 which was called:
# once (11µs+4µs) by Pod::Simple::LinkSection::BEGIN@7 at line 289 # spent 15µs making 1 call to Pod::Simple::BlackBox::BEGIN@289
# spent 2µs making 1 call to if::import |
| 290 | |||||
| 291 | my $char_ord; | ||||
| 292 | my $needed; # How many continuation bytes to gobble up | ||||
| 293 | |||||
| 294 | # Initialize the translated line with a dummy character that will be | ||||
| 295 | # deleted after everything else is done. This dummy makes sure that | ||||
| 296 | # $copy will be in UTF-8. Doing it now avoids the bugs in early perls | ||||
| 297 | # with upgrading in the middle | ||||
| 298 | $copy = chr(0x100); | ||||
| 299 | |||||
| 300 | # Parse through the line | ||||
| 301 | for (my $i = 0; $i < length $line; $i++) { | ||||
| 302 | my $byte = substr($line, $i, 1); | ||||
| 303 | |||||
| 304 | # ASCII bytes are trivially dealt with | ||||
| 305 | if ($byte !~ $non_ascii_re) { | ||||
| 306 | $copy .= $byte; | ||||
| 307 | next; | ||||
| 308 | } | ||||
| 309 | |||||
| 310 | my $b_ord = ord $byte; | ||||
| 311 | |||||
| 312 | # Now figure out what this code point would be if the input is | ||||
| 313 | # actually in UTF-8. If, in the process, we discover that it isn't | ||||
| 314 | # well-formed UTF-8, we guess CP1252. | ||||
| 315 | # | ||||
| 316 | # Start the process. If it is UTF-8, we are at the first, start | ||||
| 317 | # byte, of a multi-byte sequence. We look at this byte to figure | ||||
| 318 | # out how many continuation bytes are needed, and to initialize the | ||||
| 319 | # code point accumulator with the data from this byte. | ||||
| 320 | # | ||||
| 321 | # Normally the minimum continuation byte is 0x80, but in certain | ||||
| 322 | # instances the minimum is a higher number. So the code below | ||||
| 323 | # overrides this for those instances. | ||||
| 324 | my $min_cont = 0x80; | ||||
| 325 | |||||
| 326 | if ($b_ord < 0xC2) { # A start byte < C2 is malformed | ||||
| 327 | goto set_1252; | ||||
| 328 | } | ||||
| 329 | elsif ($b_ord <= 0xDF) { | ||||
| 330 | $needed = 1; | ||||
| 331 | $char_ord = $b_ord & 0x1F; | ||||
| 332 | } | ||||
| 333 | elsif ($b_ord <= 0xEF) { | ||||
| 334 | $min_cont = 0xA0 if $b_ord == 0xE0; | ||||
| 335 | $needed = 2; | ||||
| 336 | $char_ord = $b_ord & (0x1F >> 1); | ||||
| 337 | } | ||||
| 338 | elsif ($b_ord <= 0xF4) { | ||||
| 339 | $min_cont = 0x90 if $b_ord == 0xF0; | ||||
| 340 | $needed = 3; | ||||
| 341 | $char_ord = $b_ord & (0x1F >> 2); | ||||
| 342 | } | ||||
| 343 | else { # F4 is the highest start byte for legal Unicode; higher is | ||||
| 344 | # unlikely to be in pod. | ||||
| 345 | goto set_1252; | ||||
| 346 | } | ||||
| 347 | |||||
| 348 | # ? not enough continuation bytes available | ||||
| 349 | goto set_1252 if $i + $needed >= length $line; | ||||
| 350 | |||||
| 351 | # Accumulate the ordinal of the character from the remaining | ||||
| 352 | # (continuation) bytes. | ||||
| 353 | while ($needed-- > 0) { | ||||
| 354 | my $cont = substr($line, ++$i, 1); | ||||
| 355 | $b_ord = ord $cont; | ||||
| 356 | goto set_1252 if $b_ord < $min_cont || $b_ord > 0xBF; | ||||
| 357 | |||||
| 358 | # In all cases, any next continuation bytes all have the same | ||||
| 359 | # minimum legal value | ||||
| 360 | $min_cont = 0x80; | ||||
| 361 | |||||
| 362 | # Accumulate this byte's contribution to the code point | ||||
| 363 | $char_ord <<= 6; | ||||
| 364 | $char_ord |= ($b_ord & 0x3F); | ||||
| 365 | } | ||||
| 366 | |||||
| 367 | # Here, the sequence that formed this code point was valid UTF-8, | ||||
| 368 | # so add the completed character to the output | ||||
| 369 | $copy .= chr $char_ord; | ||||
| 370 | } # End of loop through line | ||||
| 371 | |||||
| 372 | # Delete the dummy first character | ||||
| 373 | $copy = substr($copy, 1); | ||||
| 374 | } | ||||
| 375 | |||||
| 376 | # Here, $copy is legal UTF-8. | ||||
| 377 | |||||
| 378 | # If it can't be legal CP1252, no need to look further. (These bytes | ||||
| 379 | # aren't valid in CP1252.) This test could have been placed higher in | ||||
| 380 | # the code, but it seemed wrong to set the encoding to UTF-8 without | ||||
| 381 | # making sure that the very first instance is well-formed. But what if | ||||
| 382 | # it isn't legal CP1252 either? We have to choose one or the other, and | ||||
| 383 | # It seems safer to favor the single-byte encoding over the multi-byte. | ||||
| 384 | goto set_utf8 if ord("A") == 65 && $line =~ /[\x81\x8D\x8F\x90\x9D]/; | ||||
| 385 | |||||
| 386 | # The C1 controls are not likely to appear in pod | ||||
| 387 | goto set_1252 if ord("A") == 65 && $copy =~ /[\x80-\x9F]/; | ||||
| 388 | |||||
| 389 | # Nor are surrogates nor unassigned, nor deprecated. | ||||
| 390 | DEBUG > 8 and print STDERR __LINE__, ": $copy: surrogate\n" if $copy =~ $cs_re; | ||||
| 391 | goto set_1252 if $cs_re && $copy =~ $cs_re; | ||||
| 392 | DEBUG > 8 and print STDERR __LINE__, ": $copy: unassigned\n" if $cn_re && $copy =~ $cn_re; | ||||
| 393 | goto set_1252 if $cn_re && $copy =~ $cn_re; | ||||
| 394 | DEBUG > 8 and print STDERR __LINE__, ": $copy: deprecated\n" if $copy =~ $deprecated_re; | ||||
| 395 | goto set_1252 if $copy =~ $deprecated_re; | ||||
| 396 | |||||
| 397 | # Nor are rare code points. But this is hard to determine. khw | ||||
| 398 | # believes that IPA characters and the modifier letters are unlikely to | ||||
| 399 | # be in pod (and certainly very unlikely to be the in the first line in | ||||
| 400 | # the pod containing non-ASCII) | ||||
| 401 | DEBUG > 8 and print STDERR __LINE__, ": $copy: rare\n" if $copy =~ $rare_blocks_re; | ||||
| 402 | goto set_1252 if $rare_blocks_re && $copy =~ $rare_blocks_re; | ||||
| 403 | |||||
| 404 | # The first Unicode version included essentially every Latin character | ||||
| 405 | # in modern usage. So, a Latin character not in the first release will | ||||
| 406 | # unlikely be in pod. | ||||
| 407 | DEBUG > 8 and print STDERR __LINE__, ": $copy: later_latin\n" if $later_latin_re && $copy =~ $later_latin_re; | ||||
| 408 | goto set_1252 if $later_latin_re && $copy =~ $later_latin_re; | ||||
| 409 | |||||
| 410 | # On perls that handle script runs, if the UTF-8 interpretation yields | ||||
| 411 | # a single script, we guess UTF-8, otherwise just having a mixture of | ||||
| 412 | # scripts is suspicious, so guess CP1252. We first strip off, as best | ||||
| 413 | # we can, the ASCII characters that look like they are pod directives, | ||||
| 414 | # as these would always show as mixed with non-Latin text. | ||||
| 415 | $copy =~ s/$pod_chars_re//g; | ||||
| 416 | |||||
| 417 | if ($script_run_re) { | ||||
| 418 | goto set_utf8 if $copy =~ $script_run_re; | ||||
| 419 | DEBUG > 8 and print STDERR __LINE__, ": not script run\n"; | ||||
| 420 | goto set_1252; | ||||
| 421 | } | ||||
| 422 | |||||
| 423 | # Even without script runs, but on recent enough perls and Unicodes, we | ||||
| 424 | # can check if there is a mixture of both Latin and non-Latin. Again, | ||||
| 425 | # having a mixture of scripts is suspicious, so assume CP1252 | ||||
| 426 | |||||
| 427 | # If it's all non-Latin, there is no CP1252, as that is Latin | ||||
| 428 | # characters and punct, etc. | ||||
| 429 | DEBUG > 8 and print STDERR __LINE__, ": $copy: not latin\n" if $copy !~ $latin_re; | ||||
| 430 | goto set_utf8 if $copy !~ $latin_re; | ||||
| 431 | |||||
| 432 | DEBUG > 8 and print STDERR __LINE__, ": $copy: all latin\n" if $copy =~ $every_char_is_latin_re; | ||||
| 433 | goto set_utf8 if $copy =~ $every_char_is_latin_re; | ||||
| 434 | |||||
| 435 | DEBUG > 8 and print STDERR __LINE__, ": $copy: mixed\n"; | ||||
| 436 | |||||
| 437 | set_1252: | ||||
| 438 | DEBUG > 9 and print STDERR __LINE__, ": $copy: is 1252\n"; | ||||
| 439 | $encoding = 'CP1252'; | ||||
| 440 | goto done_set; | ||||
| 441 | |||||
| 442 | set_utf8: | ||||
| 443 | DEBUG > 9 and print STDERR __LINE__, ": $copy: is UTF-8\n"; | ||||
| 444 | $encoding = 'UTF-8'; | ||||
| 445 | |||||
| 446 | done_set: | ||||
| 447 | $self->_handle_encoding_line( "=encoding $encoding" ); | ||||
| 448 | delete $self->{'_processed_encoding'}; | ||||
| 449 | $self->{'_transcoder'} && $self->{'_transcoder'}->($line); | ||||
| 450 | |||||
| 451 | my ($word) = $line =~ /(\S*$non_ascii_re\S*)/; | ||||
| 452 | |||||
| 453 | $self->whine( | ||||
| 454 | $self->{'line_count'}, | ||||
| 455 | "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding" | ||||
| 456 | ); | ||||
| 457 | } | ||||
| 458 | |||||
| 459 | DEBUG > 5 and print STDERR "# Parsing line: [$line]\n"; | ||||
| 460 | |||||
| 461 | if(!$self->{'in_pod'}) { | ||||
| 462 | if($line =~ m/^=([a-zA-Z][a-zA-Z0-9]*)(?:\s|$)/s) { | ||||
| 463 | if($1 eq 'cut') { | ||||
| 464 | $self->scream( | ||||
| 465 | $self->{'line_count'}, | ||||
| 466 | "=cut found outside a pod block. Skipping to next block." | ||||
| 467 | ); | ||||
| 468 | |||||
| 469 | ## Before there were errata sections in the world, it was | ||||
| 470 | ## least-pessimal to abort processing the file. But now we can | ||||
| 471 | ## just barrel on thru (but still not start a pod block). | ||||
| 472 | #splice @_; | ||||
| 473 | #push @_, undef; | ||||
| 474 | |||||
| 475 | next; | ||||
| 476 | } else { | ||||
| 477 | $self->{'in_pod'} = $self->{'start_of_pod_block'} | ||||
| 478 | = $self->{'last_was_blank'} = 1; | ||||
| 479 | # And fall thru to the pod-mode block further down | ||||
| 480 | } | ||||
| 481 | } else { | ||||
| 482 | DEBUG > 5 and print STDERR "# It's a code-line.\n"; | ||||
| 483 | $code_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
| 484 | if $code_handler; | ||||
| 485 | # Note: this may cause code to be processed out of order relative | ||||
| 486 | # to pods, but in order relative to cuts. | ||||
| 487 | |||||
| 488 | # Note also that we haven't yet applied the transcoding to $line | ||||
| 489 | # by time we call $code_handler! | ||||
| 490 | |||||
| 491 | if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) { | ||||
| 492 | # That RE is from perlsyn, section "Plain Old Comments (Not!)", | ||||
| 493 | #$fname = $2 if defined $2; | ||||
| 494 | #DEBUG > 1 and defined $2 and print STDERR "# Setting fname to \"$fname\"\n"; | ||||
| 495 | DEBUG > 1 and print STDERR "# Setting nextline to $1\n"; | ||||
| 496 | $self->{'line_count'} = $1 - 1; | ||||
| 497 | } | ||||
| 498 | |||||
| 499 | next; | ||||
| 500 | } | ||||
| 501 | } | ||||
| 502 | |||||
| 503 | # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | ||||
| 504 | # Else we're in pod mode: | ||||
| 505 | |||||
| 506 | # Apply any necessary transcoding: | ||||
| 507 | $self->{'_transcoder'} && $self->{'_transcoder'}->($line); | ||||
| 508 | |||||
| 509 | # HERE WE CATCH =encoding EARLY! | ||||
| 510 | if( $line =~ m/^=encoding\s+\S+\s*$/s ) { | ||||
| 511 | next if $self->parse_characters; # Ignore this line | ||||
| 512 | $line = $self->_handle_encoding_line( $line ); | ||||
| 513 | } | ||||
| 514 | |||||
| 515 | if($line =~ m/^=cut/s) { | ||||
| 516 | # here ends the pod block, and therefore the previous pod para | ||||
| 517 | DEBUG > 1 and print STDERR "Noting =cut at line ${$self}{'line_count'}\n"; | ||||
| 518 | $self->{'in_pod'} = 0; | ||||
| 519 | # ++$self->{'pod_para_count'}; | ||||
| 520 | $self->_ponder_paragraph_buffer(); | ||||
| 521 | # by now it's safe to consider the previous paragraph as done. | ||||
| 522 | DEBUG > 6 and print STDERR "Processing any cut handler, line ${$self}{'line_count'}\n"; | ||||
| 523 | $cut_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
| 524 | if $cut_handler; | ||||
| 525 | |||||
| 526 | # TODO: add to docs: Note: this may cause cuts to be processed out | ||||
| 527 | # of order relative to pods, but in order relative to code. | ||||
| 528 | |||||
| 529 | } elsif($line =~ m/^(\s*)$/s) { # it's a blank line | ||||
| 530 | if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line | ||||
| 531 | $wl_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
| 532 | if $wl_handler; | ||||
| 533 | } | ||||
| 534 | |||||
| 535 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { | ||||
| 536 | DEBUG > 1 and print STDERR "Saving blank line at line ${$self}{'line_count'}\n"; | ||||
| 537 | push @{$paras->[-1]}, $line; | ||||
| 538 | } # otherwise it's not interesting | ||||
| 539 | |||||
| 540 | if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { | ||||
| 541 | DEBUG > 1 and print STDERR "Noting para ends with blank line at ${$self}{'line_count'}\n"; | ||||
| 542 | } | ||||
| 543 | |||||
| 544 | $self->{'last_was_blank'} = 1; | ||||
| 545 | |||||
| 546 | } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... | ||||
| 547 | |||||
| 548 | if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(\s+|$)(.*)/s) { | ||||
| 549 | # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS | ||||
| 550 | my $new = [$1, {'start_line' => $self->{'line_count'}}, $3]; | ||||
| 551 | $new->[1]{'~orig_spacer'} = $2 if $2 && $2 ne " "; | ||||
| 552 | # Note that in "=head1 foo", the WS is lost. | ||||
| 553 | # Example: ['=head1', {'start_line' => 123}, ' foo'] | ||||
| 554 | |||||
| 555 | ++$self->{'pod_para_count'}; | ||||
| 556 | |||||
| 557 | $self->_ponder_paragraph_buffer(); | ||||
| 558 | # by now it's safe to consider the previous paragraph as done. | ||||
| 559 | |||||
| 560 | push @$paras, $new; # the new incipient paragraph | ||||
| 561 | DEBUG > 1 and print STDERR "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; | ||||
| 562 | |||||
| 563 | } elsif($line =~ m/^\s/s) { | ||||
| 564 | |||||
| 565 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { | ||||
| 566 | DEBUG > 1 and print STDERR "Resuming verbatim para at line ${$self}{'line_count'}\n"; | ||||
| 567 | push @{$paras->[-1]}, $line; | ||||
| 568 | } else { | ||||
| 569 | ++$self->{'pod_para_count'}; | ||||
| 570 | $self->_ponder_paragraph_buffer(); | ||||
| 571 | # by now it's safe to consider the previous paragraph as done. | ||||
| 572 | DEBUG > 1 and print STDERR "Starting verbatim para at line ${$self}{'line_count'}\n"; | ||||
| 573 | push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; | ||||
| 574 | } | ||||
| 575 | } else { | ||||
| 576 | ++$self->{'pod_para_count'}; | ||||
| 577 | $self->_ponder_paragraph_buffer(); | ||||
| 578 | # by now it's safe to consider the previous paragraph as done. | ||||
| 579 | push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; | ||||
| 580 | DEBUG > 1 and print STDERR "Starting plain para at line ${$self}{'line_count'}\n"; | ||||
| 581 | } | ||||
| 582 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; | ||||
| 583 | |||||
| 584 | } else { | ||||
| 585 | # It's a non-blank line /continuing/ the current para | ||||
| 586 | if(@$paras) { | ||||
| 587 | DEBUG > 2 and print STDERR "Line ${$self}{'line_count'} continues current paragraph\n"; | ||||
| 588 | push @{$paras->[-1]}, $line; | ||||
| 589 | } else { | ||||
| 590 | # Unexpected case! | ||||
| 591 | die "Continuing a paragraph but \@\$paras is empty?"; | ||||
| 592 | } | ||||
| 593 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; | ||||
| 594 | } | ||||
| 595 | |||||
| 596 | } # ends the big while loop | ||||
| 597 | |||||
| 598 | DEBUG > 1 and print STDERR (pretty(@$paras), "\n"); | ||||
| 599 | return $self; | ||||
| 600 | } | ||||
| 601 | |||||
| 602 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 603 | |||||
| 604 | sub _handle_encoding_line { | ||||
| 605 | my($self, $line) = @_; | ||||
| 606 | |||||
| 607 | return if $self->parse_characters; | ||||
| 608 | |||||
| 609 | # The point of this routine is to set $self->{'_transcoder'} as indicated. | ||||
| 610 | |||||
| 611 | return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s; | ||||
| 612 | DEBUG > 1 and print STDERR "Found an encoding line \"=encoding $1\"\n"; | ||||
| 613 | |||||
| 614 | my $e = $1; | ||||
| 615 | my $orig = $e; | ||||
| 616 | push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; | ||||
| 617 | |||||
| 618 | my $enc_error; | ||||
| 619 | |||||
| 620 | # Cf. perldoc Encode and perldoc Encode::Supported | ||||
| 621 | |||||
| 622 | require Pod::Simple::Transcode; | ||||
| 623 | |||||
| 624 | if( $self->{'encoding'} ) { | ||||
| 625 | my $norm_current = $self->{'encoding'}; | ||||
| 626 | my $norm_e = $e; | ||||
| 627 | foreach my $that ($norm_current, $norm_e) { | ||||
| 628 | $that = lc($that); | ||||
| 629 | $that =~ s/[-_]//g; | ||||
| 630 | } | ||||
| 631 | if($norm_current eq $norm_e) { | ||||
| 632 | DEBUG > 1 and print STDERR "The '=encoding $orig' line is ", | ||||
| 633 | "redundant. ($norm_current eq $norm_e). Ignoring.\n"; | ||||
| 634 | $enc_error = ''; | ||||
| 635 | # But that doesn't necessarily mean that the earlier one went okay | ||||
| 636 | } else { | ||||
| 637 | $enc_error = "Encoding is already set to " . $self->{'encoding'}; | ||||
| 638 | DEBUG > 1 and print STDERR $enc_error; | ||||
| 639 | } | ||||
| 640 | } elsif ( | ||||
| 641 | # OK, let's turn on the encoding | ||||
| 642 | do { | ||||
| 643 | DEBUG > 1 and print STDERR " Setting encoding to $e\n"; | ||||
| 644 | $self->{'encoding'} = $e; | ||||
| 645 | 1; | ||||
| 646 | } | ||||
| 647 | and $e eq 'HACKRAW' | ||||
| 648 | ) { | ||||
| 649 | DEBUG and print STDERR " Putting in HACKRAW (no-op) encoding mode.\n"; | ||||
| 650 | |||||
| 651 | } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { | ||||
| 652 | |||||
| 653 | die($enc_error = "WHAT? _transcoder is already set?!") | ||||
| 654 | if $self->{'_transcoder'}; # should never happen | ||||
| 655 | require Pod::Simple::Transcode; | ||||
| 656 | $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); | ||||
| 657 | eval { | ||||
| 658 | my @x = ('', "abc", "123"); | ||||
| 659 | $self->{'_transcoder'}->(@x); | ||||
| 660 | }; | ||||
| 661 | $@ && die( $enc_error = | ||||
| 662 | "Really unexpected error setting up encoding $e: $@\nAborting" | ||||
| 663 | ); | ||||
| 664 | $self->{'detected_encoding'} = $e; | ||||
| 665 | |||||
| 666 | } else { | ||||
| 667 | my @supported = Pod::Simple::Transcode::->all_encodings; | ||||
| 668 | |||||
| 669 | # Note unsupported, and complain | ||||
| 670 | DEBUG and print STDERR " Encoding [$e] is unsupported.", | ||||
| 671 | "\nSupporteds: @supported\n"; | ||||
| 672 | my $suggestion = ''; | ||||
| 673 | |||||
| 674 | # Look for a near match: | ||||
| 675 | my $norm = lc($e); | ||||
| 676 | $norm =~ tr[-_][]d; | ||||
| 677 | my $n; | ||||
| 678 | foreach my $enc (@supported) { | ||||
| 679 | $n = lc($enc); | ||||
| 680 | $n =~ tr[-_][]d; | ||||
| 681 | next unless $n eq $norm; | ||||
| 682 | $suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; | ||||
| 683 | last; | ||||
| 684 | } | ||||
| 685 | my $encmodver = Pod::Simple::Transcode::->encmodver; | ||||
| 686 | $enc_error = join '' => | ||||
| 687 | "This document probably does not appear as it should, because its ", | ||||
| 688 | "\"=encoding $e\" line calls for an unsupported encoding.", | ||||
| 689 | $suggestion, " [$encmodver\'s supported encodings are: @supported]" | ||||
| 690 | ; | ||||
| 691 | |||||
| 692 | $self->scream( $self->{'line_count'}, $enc_error ); | ||||
| 693 | } | ||||
| 694 | push @{ $self->{'encoding_command_statuses'} }, $enc_error; | ||||
| 695 | if (defined($self->{'_processed_encoding'})) { | ||||
| 696 | # Double declaration. | ||||
| 697 | $self->scream( $self->{'line_count'}, 'Cannot have multiple =encoding directives'); | ||||
| 698 | } | ||||
| 699 | $self->{'_processed_encoding'} = $orig; | ||||
| 700 | |||||
| 701 | return $line; | ||||
| 702 | } | ||||
| 703 | |||||
| 704 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||||
| 705 | |||||
| 706 | sub _handle_encoding_second_level { | ||||
| 707 | # By time this is called, the encoding (if well formed) will already | ||||
| 708 | # have been acted on. | ||||
| 709 | my($self, $para) = @_; | ||||
| 710 | my @x = @$para; | ||||
| 711 | my $content = join ' ', splice @x, 2; | ||||
| 712 | $content =~ s/^\s+//s; | ||||
| 713 | $content =~ s/\s+$//s; | ||||
| 714 | |||||
| 715 | DEBUG > 2 and print STDERR "Ogling encoding directive: =encoding $content\n"; | ||||
| 716 | |||||
| 717 | if (defined($self->{'_processed_encoding'})) { | ||||
| 718 | #if($content ne $self->{'_processed_encoding'}) { | ||||
| 719 | # Could it happen? | ||||
| 720 | #} | ||||
| 721 | delete $self->{'_processed_encoding'}; | ||||
| 722 | # It's already been handled. Check for errors. | ||||
| 723 | if(! $self->{'encoding_command_statuses'} ) { | ||||
| 724 | DEBUG > 2 and print STDERR " CRAZY ERROR: It wasn't really handled?!\n"; | ||||
| 725 | } elsif( $self->{'encoding_command_statuses'}[-1] ) { | ||||
| 726 | $self->whine( $para->[1]{'start_line'}, | ||||
| 727 | sprintf "Couldn't do %s: %s", | ||||
| 728 | $self->{'encoding_command_reqs' }[-1], | ||||
| 729 | $self->{'encoding_command_statuses'}[-1], | ||||
| 730 | ); | ||||
| 731 | } else { | ||||
| 732 | DEBUG > 2 and print STDERR " (Yup, it was successfully handled already.)\n"; | ||||
| 733 | } | ||||
| 734 | |||||
| 735 | } else { | ||||
| 736 | # Otherwise it's a syntax error | ||||
| 737 | $self->whine( $para->[1]{'start_line'}, | ||||
| 738 | "Invalid =encoding syntax: $content" | ||||
| 739 | ); | ||||
| 740 | } | ||||
| 741 | |||||
| 742 | return; | ||||
| 743 | } | ||||
| 744 | |||||
| 745 | #~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~` | ||||
| 746 | |||||
| 747 | { | ||||
| 748 | 1 | 200ns | my $m = -321; # magic line number | ||
| 749 | |||||
| 750 | sub _gen_errata { | ||||
| 751 | my $self = $_[0]; | ||||
| 752 | # Return 0 or more fake-o paragraphs explaining the accumulated | ||||
| 753 | # errors on this document. | ||||
| 754 | |||||
| 755 | return() unless $self->{'errata'} and keys %{$self->{'errata'}}; | ||||
| 756 | |||||
| 757 | my @out; | ||||
| 758 | |||||
| 759 | foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { | ||||
| 760 | push @out, | ||||
| 761 | ['=item', {'start_line' => $m}, "Around line $line:"], | ||||
| 762 | map( ['~Para', {'start_line' => $m, '~cooked' => 1}, | ||||
| 763 | #['~Top', {'start_line' => $m}, | ||||
| 764 | $_ | ||||
| 765 | #] | ||||
| 766 | ], | ||||
| 767 | @{$self->{'errata'}{$line}} | ||||
| 768 | ) | ||||
| 769 | ; | ||||
| 770 | } | ||||
| 771 | |||||
| 772 | # TODO: report of unknown entities? unrenderable characters? | ||||
| 773 | |||||
| 774 | unshift @out, | ||||
| 775 | ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'], | ||||
| 776 | ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1}, | ||||
| 777 | "Hey! ", | ||||
| 778 | ['B', {}, | ||||
| 779 | 'The above document had some coding errors, which are explained below:' | ||||
| 780 | ] | ||||
| 781 | ], | ||||
| 782 | ['=over', {'start_line' => $m, 'errata' => 1}, ''], | ||||
| 783 | ; | ||||
| 784 | |||||
| 785 | push @out, | ||||
| 786 | ['=back', {'start_line' => $m, 'errata' => 1}, ''], | ||||
| 787 | ; | ||||
| 788 | |||||
| 789 | DEBUG and print STDERR "\n<<\n", pretty(\@out), "\n>>\n\n"; | ||||
| 790 | |||||
| 791 | return @out; | ||||
| 792 | } | ||||
| 793 | |||||
| 794 | } | ||||
| 795 | |||||
| 796 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 797 | |||||
| 798 | ############################################################################## | ||||
| 799 | ## | ||||
| 800 | ## stop reading now stop reading now stop reading now stop reading now stop | ||||
| 801 | ## | ||||
| 802 | ## HERE IT BECOMES REALLY SCARY | ||||
| 803 | ## | ||||
| 804 | ## stop reading now stop reading now stop reading now stop reading now stop | ||||
| 805 | ## | ||||
| 806 | ############################################################################## | ||||
| 807 | |||||
| 808 | 1 | 300ns | sub _ponder_paragraph_buffer { | ||
| 809 | |||||
| 810 | # Para-token types as found in the buffer. | ||||
| 811 | # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end, | ||||
| 812 | # =over, =back, =item | ||||
| 813 | # and the null =pod (to be complained about if over one line) | ||||
| 814 | # | ||||
| 815 | # "~data" paragraphs are something we generate at this level, depending on | ||||
| 816 | # a currently open =over region | ||||
| 817 | |||||
| 818 | # Events fired: Begin and end for: | ||||
| 819 | # directivename (like head1 .. head4), item, extend, | ||||
| 820 | # for (from =begin...=end, =for), | ||||
| 821 | # over-bullet, over-number, over-text, over-block, | ||||
| 822 | # item-bullet, item-number, item-text, | ||||
| 823 | # Document, | ||||
| 824 | # Data, Para, Verbatim | ||||
| 825 | # B, C, longdirname (TODO -- wha?), etc. for all directives | ||||
| 826 | # | ||||
| 827 | |||||
| 828 | my $self = $_[0]; | ||||
| 829 | my $paras; | ||||
| 830 | return unless @{$paras = $self->{'paras'}}; | ||||
| 831 | my $curr_open = ($self->{'curr_open'} ||= []); | ||||
| 832 | |||||
| 833 | my $scratch; | ||||
| 834 | |||||
| 835 | DEBUG > 10 and print STDERR "# Paragraph buffer: <<", pretty($paras), ">>\n"; | ||||
| 836 | |||||
| 837 | # We have something in our buffer. So apparently the document has started. | ||||
| 838 | unless($self->{'doc_has_started'}) { | ||||
| 839 | $self->{'doc_has_started'} = 1; | ||||
| 840 | |||||
| 841 | my $starting_contentless; | ||||
| 842 | $starting_contentless = | ||||
| 843 | ( | ||||
| 844 | !@$curr_open | ||||
| 845 | and @$paras and ! grep $_->[0] ne '~end', @$paras | ||||
| 846 | # i.e., if the paras is all ~ends | ||||
| 847 | ) | ||||
| 848 | ; | ||||
| 849 | DEBUG and print STDERR "# Starting ", | ||||
| 850 | $starting_contentless ? 'contentless' : 'contentful', | ||||
| 851 | " document\n" | ||||
| 852 | ; | ||||
| 853 | |||||
| 854 | $self->_handle_element_start( | ||||
| 855 | ($scratch = 'Document'), | ||||
| 856 | { | ||||
| 857 | 'start_line' => $paras->[0][1]{'start_line'}, | ||||
| 858 | $starting_contentless ? ( 'contentless' => 1 ) : (), | ||||
| 859 | }, | ||||
| 860 | ); | ||||
| 861 | } | ||||
| 862 | |||||
| 863 | my($para, $para_type); | ||||
| 864 | while(@$paras) { | ||||
| 865 | |||||
| 866 | # If a directive, assume it's legal; subtract below if found not to be | ||||
| 867 | $seen_legal_directive++ if $paras->[0][0] =~ /^=/; | ||||
| 868 | |||||
| 869 | last if @$paras == 1 | ||||
| 870 | and ( $paras->[0][0] eq '=over' | ||||
| 871 | or $paras->[0][0] eq '=item' | ||||
| 872 | or ($paras->[0][0] eq '~Verbatim' and $self->{'in_pod'})); | ||||
| 873 | # Those're the three kinds of paragraphs that require lookahead. | ||||
| 874 | # Actually, an "=item Foo" inside an <over type=text> region | ||||
| 875 | # and any =item inside an <over type=block> region (rare) | ||||
| 876 | # don't require any lookahead, but all others (bullets | ||||
| 877 | # and numbers) do. | ||||
| 878 | # The verbatim is different from the other two, because those might be | ||||
| 879 | # like: | ||||
| 880 | # | ||||
| 881 | # =item | ||||
| 882 | # ... | ||||
| 883 | # =cut | ||||
| 884 | # ... | ||||
| 885 | # =item | ||||
| 886 | # | ||||
| 887 | # The =cut here finishes the paragraph but doesn't terminate the =over | ||||
| 888 | # they should be in. (khw apologizes that he didn't comment at the time | ||||
| 889 | # why the 'in_pod' works, and no longer remembers why, and doesn't think | ||||
| 890 | # it is currently worth the effort to re-figure it out.) | ||||
| 891 | |||||
| 892 | # TODO: whinge about many kinds of directives in non-resolving =for regions? | ||||
| 893 | # TODO: many? like what? =head1 etc? | ||||
| 894 | |||||
| 895 | $para = shift @$paras; | ||||
| 896 | $para_type = $para->[0]; | ||||
| 897 | |||||
| 898 | DEBUG > 1 and print STDERR "Pondering a $para_type paragraph, given the stack: (", | ||||
| 899 | $self->_dump_curr_open(), ")\n"; | ||||
| 900 | |||||
| 901 | if($para_type eq '=for') { | ||||
| 902 | next if $self->_ponder_for($para,$curr_open,$paras); | ||||
| 903 | |||||
| 904 | } elsif($para_type eq '=begin') { | ||||
| 905 | next if $self->_ponder_begin($para,$curr_open,$paras); | ||||
| 906 | |||||
| 907 | } elsif($para_type eq '=end') { | ||||
| 908 | next if $self->_ponder_end($para,$curr_open,$paras); | ||||
| 909 | |||||
| 910 | } elsif($para_type eq '~end') { # The virtual end-document signal | ||||
| 911 | next if $self->_ponder_doc_end($para,$curr_open,$paras); | ||||
| 912 | } | ||||
| 913 | |||||
| 914 | |||||
| 915 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
| 916 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
| 917 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
| 918 | DEBUG > 1 and | ||||
| 919 | print STDERR "Skipping $para_type paragraph because in ignore mode.\n"; | ||||
| 920 | next; | ||||
| 921 | } | ||||
| 922 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
| 923 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
| 924 | |||||
| 925 | if($para_type eq '=pod') { | ||||
| 926 | $self->_ponder_pod($para,$curr_open,$paras); | ||||
| 927 | |||||
| 928 | } elsif($para_type eq '=over') { | ||||
| 929 | next if $self->_ponder_over($para,$curr_open,$paras); | ||||
| 930 | |||||
| 931 | } elsif($para_type eq '=back') { | ||||
| 932 | next if $self->_ponder_back($para,$curr_open,$paras); | ||||
| 933 | |||||
| 934 | } else { | ||||
| 935 | |||||
| 936 | # All non-magical codes!!! | ||||
| 937 | |||||
| 938 | # Here we start using $para_type for our own twisted purposes, to | ||||
| 939 | # mean how it should get treated, not as what the element name | ||||
| 940 | # should be. | ||||
| 941 | |||||
| 942 | DEBUG > 1 and print STDERR "Pondering non-magical $para_type\n"; | ||||
| 943 | |||||
| 944 | my $i; | ||||
| 945 | |||||
| 946 | # Enforce some =headN discipline | ||||
| 947 | if($para_type =~ m/^=head\d$/s | ||||
| 948 | and ! $self->{'accept_heads_anywhere'} | ||||
| 949 | and @$curr_open | ||||
| 950 | and $curr_open->[-1][0] eq '=over' | ||||
| 951 | ) { | ||||
| 952 | DEBUG > 2 and print STDERR "'=$para_type' inside an '=over'!\n"; | ||||
| 953 | $self->whine( | ||||
| 954 | $para->[1]{'start_line'}, | ||||
| 955 | "You forgot a '=back' before '$para_type'" | ||||
| 956 | ); | ||||
| 957 | unshift @$paras, ['=back', {}, ''], $para; # close the =over | ||||
| 958 | next; | ||||
| 959 | } | ||||
| 960 | |||||
| 961 | |||||
| 962 | if($para_type eq '=item') { | ||||
| 963 | |||||
| 964 | my $over; | ||||
| 965 | unless(@$curr_open and | ||||
| 966 | $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { | ||||
| 967 | $self->whine( | ||||
| 968 | $para->[1]{'start_line'}, | ||||
| 969 | "'=item' outside of any '=over'" | ||||
| 970 | ); | ||||
| 971 | unshift @$paras, | ||||
| 972 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], | ||||
| 973 | $para | ||||
| 974 | ; | ||||
| 975 | next; | ||||
| 976 | } | ||||
| 977 | |||||
| 978 | |||||
| 979 | my $over_type = $over->[1]{'~type'}; | ||||
| 980 | |||||
| 981 | if(!$over_type) { | ||||
| 982 | # Shouldn't happen1 | ||||
| 983 | die "Typeless over in stack, starting at line " | ||||
| 984 | . $over->[1]{'start_line'}; | ||||
| 985 | |||||
| 986 | } elsif($over_type eq 'block') { | ||||
| 987 | unless($curr_open->[-1][1]{'~bitched_about'}) { | ||||
| 988 | $curr_open->[-1][1]{'~bitched_about'} = 1; | ||||
| 989 | $self->whine( | ||||
| 990 | $curr_open->[-1][1]{'start_line'}, | ||||
| 991 | "You can't have =items (as at line " | ||||
| 992 | . $para->[1]{'start_line'} | ||||
| 993 | . ") unless the first thing after the =over is an =item" | ||||
| 994 | ); | ||||
| 995 | } | ||||
| 996 | # Just turn it into a paragraph and reconsider it | ||||
| 997 | $para->[0] = '~Para'; | ||||
| 998 | unshift @$paras, $para; | ||||
| 999 | next; | ||||
| 1000 | |||||
| 1001 | } elsif($over_type eq 'text') { | ||||
| 1002 | my $item_type = $self->_get_item_type($para); | ||||
| 1003 | # That kills the content of the item if it's a number or bullet. | ||||
| 1004 | DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; | ||||
| 1005 | |||||
| 1006 | if($item_type eq 'text') { | ||||
| 1007 | # Nothing special needs doing for 'text' | ||||
| 1008 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { | ||||
| 1009 | $self->whine( | ||||
| 1010 | $para->[1]{'start_line'}, | ||||
| 1011 | "Expected text after =item, not a $item_type" | ||||
| 1012 | ); | ||||
| 1013 | # Undo our clobbering: | ||||
| 1014 | push @$para, $para->[1]{'~orig_content'}; | ||||
| 1015 | delete $para->[1]{'number'}; | ||||
| 1016 | # Only a PROPER item-number element is allowed | ||||
| 1017 | # to have a number attribute. | ||||
| 1018 | } else { | ||||
| 1019 | die "Unhandled item type $item_type"; # should never happen | ||||
| 1020 | } | ||||
| 1021 | |||||
| 1022 | # =item-text thingies don't need any assimilation, it seems. | ||||
| 1023 | |||||
| 1024 | } elsif($over_type eq 'number') { | ||||
| 1025 | my $item_type = $self->_get_item_type($para); | ||||
| 1026 | # That kills the content of the item if it's a number or bullet. | ||||
| 1027 | DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; | ||||
| 1028 | |||||
| 1029 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; | ||||
| 1030 | |||||
| 1031 | if($item_type eq 'bullet') { | ||||
| 1032 | # Hm, it's not numeric. Correct for this. | ||||
| 1033 | $para->[1]{'number'} = $expected_value; | ||||
| 1034 | $self->whine( | ||||
| 1035 | $para->[1]{'start_line'}, | ||||
| 1036 | "Expected '=item $expected_value'" | ||||
| 1037 | ); | ||||
| 1038 | push @$para, $para->[1]{'~orig_content'}; | ||||
| 1039 | # restore the bullet, blocking the assimilation of next para | ||||
| 1040 | |||||
| 1041 | } elsif($item_type eq 'text') { | ||||
| 1042 | # Hm, it's not numeric. Correct for this. | ||||
| 1043 | $para->[1]{'number'} = $expected_value; | ||||
| 1044 | $self->whine( | ||||
| 1045 | $para->[1]{'start_line'}, | ||||
| 1046 | "Expected '=item $expected_value'" | ||||
| 1047 | ); | ||||
| 1048 | # Text content will still be there and will block next ~Para | ||||
| 1049 | |||||
| 1050 | } elsif($item_type ne 'number') { | ||||
| 1051 | die "Unknown item type $item_type"; # should never happen | ||||
| 1052 | |||||
| 1053 | } elsif($expected_value == $para->[1]{'number'}) { | ||||
| 1054 | DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n"; | ||||
| 1055 | |||||
| 1056 | } else { | ||||
| 1057 | DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'}, | ||||
| 1058 | " instead of the expected value of $expected_value\n"; | ||||
| 1059 | $self->whine( | ||||
| 1060 | $para->[1]{'start_line'}, | ||||
| 1061 | "You have '=item " . $para->[1]{'number'} . | ||||
| 1062 | "' instead of the expected '=item $expected_value'" | ||||
| 1063 | ); | ||||
| 1064 | $para->[1]{'number'} = $expected_value; # correcting!! | ||||
| 1065 | } | ||||
| 1066 | |||||
| 1067 | if(@$para == 2) { | ||||
| 1068 | # For the cases where we /didn't/ push to @$para | ||||
| 1069 | if($paras->[0][0] eq '~Para') { | ||||
| 1070 | DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; | ||||
| 1071 | push @$para, splice @{shift @$paras},2; | ||||
| 1072 | } else { | ||||
| 1073 | DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
| 1074 | push @$para, ''; # Just so it's not contentless | ||||
| 1075 | } | ||||
| 1076 | } | ||||
| 1077 | |||||
| 1078 | |||||
| 1079 | } elsif($over_type eq 'bullet') { | ||||
| 1080 | my $item_type = $self->_get_item_type($para); | ||||
| 1081 | # That kills the content of the item if it's a number or bullet. | ||||
| 1082 | DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; | ||||
| 1083 | |||||
| 1084 | if($item_type eq 'bullet') { | ||||
| 1085 | # as expected! | ||||
| 1086 | |||||
| 1087 | if( $para->[1]{'~_freaky_para_hack'} ) { | ||||
| 1088 | DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n"; | ||||
| 1089 | push @$para, $para->[1]{'~_freaky_para_hack'}; | ||||
| 1090 | } | ||||
| 1091 | |||||
| 1092 | } elsif($item_type eq 'number') { | ||||
| 1093 | $self->whine( | ||||
| 1094 | $para->[1]{'start_line'}, | ||||
| 1095 | "Expected '=item *'" | ||||
| 1096 | ); | ||||
| 1097 | push @$para, $para->[1]{'~orig_content'}; | ||||
| 1098 | # and block assimilation of the next paragraph | ||||
| 1099 | delete $para->[1]{'number'}; | ||||
| 1100 | # Only a PROPER item-number element is allowed | ||||
| 1101 | # to have a number attribute. | ||||
| 1102 | } elsif($item_type eq 'text') { | ||||
| 1103 | $self->whine( | ||||
| 1104 | $para->[1]{'start_line'}, | ||||
| 1105 | "Expected '=item *'" | ||||
| 1106 | ); | ||||
| 1107 | # But doesn't need processing. But it'll block assimilation | ||||
| 1108 | # of the next para. | ||||
| 1109 | } else { | ||||
| 1110 | die "Unhandled item type $item_type"; # should never happen | ||||
| 1111 | } | ||||
| 1112 | |||||
| 1113 | if(@$para == 2) { | ||||
| 1114 | # For the cases where we /didn't/ push to @$para | ||||
| 1115 | if($paras->[0][0] eq '~Para') { | ||||
| 1116 | DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; | ||||
| 1117 | push @$para, splice @{shift @$paras},2; | ||||
| 1118 | } else { | ||||
| 1119 | DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
| 1120 | push @$para, ''; # Just so it's not contentless | ||||
| 1121 | } | ||||
| 1122 | } | ||||
| 1123 | |||||
| 1124 | } else { | ||||
| 1125 | die "Unhandled =over type \"$over_type\"?"; | ||||
| 1126 | # Shouldn't happen! | ||||
| 1127 | } | ||||
| 1128 | |||||
| 1129 | $para_type = 'Plain'; | ||||
| 1130 | $para->[0] .= '-' . $over_type; | ||||
| 1131 | # Whew. Now fall thru and process it. | ||||
| 1132 | |||||
| 1133 | |||||
| 1134 | } elsif($para_type eq '=extend') { | ||||
| 1135 | # Well, might as well implement it here. | ||||
| 1136 | $self->_ponder_extend($para); | ||||
| 1137 | next; # and skip | ||||
| 1138 | } elsif($para_type eq '=encoding') { | ||||
| 1139 | # Not actually acted on here, but we catch errors here. | ||||
| 1140 | $self->_handle_encoding_second_level($para); | ||||
| 1141 | next unless $self->keep_encoding_directive; | ||||
| 1142 | $para_type = 'Plain'; | ||||
| 1143 | } elsif($para_type eq '~Verbatim') { | ||||
| 1144 | $para->[0] = 'Verbatim'; | ||||
| 1145 | $para_type = '?Verbatim'; | ||||
| 1146 | } elsif($para_type eq '~Para') { | ||||
| 1147 | $para->[0] = 'Para'; | ||||
| 1148 | $para_type = '?Plain'; | ||||
| 1149 | } elsif($para_type eq 'Data') { | ||||
| 1150 | $para->[0] = 'Data'; | ||||
| 1151 | $para_type = '?Data'; | ||||
| 1152 | } elsif( $para_type =~ s/^=//s | ||||
| 1153 | and defined( $para_type = $self->{'accept_directives'}{$para_type} ) | ||||
| 1154 | ) { | ||||
| 1155 | DEBUG > 1 and print STDERR " Pondering known directive ${$para}[0] as $para_type\n"; | ||||
| 1156 | } else { | ||||
| 1157 | # An unknown directive! | ||||
| 1158 | $seen_legal_directive--; | ||||
| 1159 | DEBUG > 1 and printf STDERR "Unhandled directive %s (Handled: %s)\n", | ||||
| 1160 | $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) | ||||
| 1161 | ; | ||||
| 1162 | $self->whine( | ||||
| 1163 | $para->[1]{'start_line'}, | ||||
| 1164 | "Unknown directive: $para->[0]" | ||||
| 1165 | ); | ||||
| 1166 | |||||
| 1167 | # And maybe treat it as text instead of just letting it go? | ||||
| 1168 | next; | ||||
| 1169 | } | ||||
| 1170 | |||||
| 1171 | if($para_type =~ s/^\?//s) { | ||||
| 1172 | if(! @$curr_open) { # usual case | ||||
| 1173 | DEBUG and print STDERR "Treating $para_type paragraph as such because stack is empty.\n"; | ||||
| 1174 | } else { | ||||
| 1175 | my @fors = grep $_->[0] eq '=for', @$curr_open; | ||||
| 1176 | DEBUG > 1 and print STDERR "Containing fors: ", | ||||
| 1177 | join(',', map $_->[1]{'target'}, @fors), "\n"; | ||||
| 1178 | |||||
| 1179 | if(! @fors) { | ||||
| 1180 | DEBUG and print STDERR "Treating $para_type paragraph as such because stack has no =for's\n"; | ||||
| 1181 | |||||
| 1182 | #} elsif(grep $_->[1]{'~resolve'}, @fors) { | ||||
| 1183 | #} elsif(not grep !$_->[1]{'~resolve'}, @fors) { | ||||
| 1184 | } elsif( $fors[-1][1]{'~resolve'} ) { | ||||
| 1185 | # Look to the immediately containing for | ||||
| 1186 | |||||
| 1187 | if($para_type eq 'Data') { | ||||
| 1188 | DEBUG and print STDERR "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; | ||||
| 1189 | $para->[0] = 'Para'; | ||||
| 1190 | $para_type = 'Plain'; | ||||
| 1191 | } else { | ||||
| 1192 | DEBUG and print STDERR "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; | ||||
| 1193 | } | ||||
| 1194 | } else { | ||||
| 1195 | DEBUG and print STDERR "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n"; | ||||
| 1196 | $para->[0] = $para_type = 'Data'; | ||||
| 1197 | } | ||||
| 1198 | } | ||||
| 1199 | } | ||||
| 1200 | |||||
| 1201 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
| 1202 | if($para_type eq 'Plain') { | ||||
| 1203 | $self->_ponder_Plain($para); | ||||
| 1204 | } elsif($para_type eq 'Verbatim') { | ||||
| 1205 | $self->_ponder_Verbatim($para); | ||||
| 1206 | } elsif($para_type eq 'Data') { | ||||
| 1207 | $self->_ponder_Data($para); | ||||
| 1208 | } else { | ||||
| 1209 | die "\$para type is $para_type -- how did that happen?"; | ||||
| 1210 | # Shouldn't happen. | ||||
| 1211 | } | ||||
| 1212 | |||||
| 1213 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
| 1214 | $para->[0] =~ s/^[~=]//s; | ||||
| 1215 | |||||
| 1216 | DEBUG and print STDERR "\n", pretty($para), "\n"; | ||||
| 1217 | |||||
| 1218 | # traverse the treelet (which might well be just one string scalar) | ||||
| 1219 | $self->{'content_seen'} ||= 1 if $seen_legal_directive | ||||
| 1220 | && ! $self->{'~tried_gen_errata'}; | ||||
| 1221 | $self->_traverse_treelet_bit(@$para); | ||||
| 1222 | } | ||||
| 1223 | } | ||||
| 1224 | |||||
| 1225 | return; | ||||
| 1226 | } | ||||
| 1227 | |||||
| 1228 | ########################################################################### | ||||
| 1229 | # The sub-ponderers... | ||||
| 1230 | |||||
| - - | |||||
| 1233 | sub _ponder_for { | ||||
| 1234 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 1235 | |||||
| 1236 | # Fake it out as a begin/end | ||||
| 1237 | my $target; | ||||
| 1238 | |||||
| 1239 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
| 1240 | DEBUG > 1 and print STDERR "Ignoring ignorable =for\n"; | ||||
| 1241 | return 1; | ||||
| 1242 | } | ||||
| 1243 | |||||
| 1244 | for(my $i = 2; $i < @$para; ++$i) { | ||||
| 1245 | if($para->[$i] =~ s/^\s*(\S+)\s*//s) { | ||||
| 1246 | $target = $1; | ||||
| 1247 | last; | ||||
| 1248 | } | ||||
| 1249 | } | ||||
| 1250 | unless(defined $target) { | ||||
| 1251 | $self->whine( | ||||
| 1252 | $para->[1]{'start_line'}, | ||||
| 1253 | "=for without a target?" | ||||
| 1254 | ); | ||||
| 1255 | return 1; | ||||
| 1256 | } | ||||
| 1257 | DEBUG > 1 and | ||||
| 1258 | print STDERR "Faking out a =for $target as a =begin $target / =end $target\n"; | ||||
| 1259 | |||||
| 1260 | $para->[0] = 'Data'; | ||||
| 1261 | |||||
| 1262 | unshift @$paras, | ||||
| 1263 | ['=begin', | ||||
| 1264 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, | ||||
| 1265 | $target, | ||||
| 1266 | ], | ||||
| 1267 | $para, | ||||
| 1268 | ['=end', | ||||
| 1269 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, | ||||
| 1270 | $target, | ||||
| 1271 | ], | ||||
| 1272 | ; | ||||
| 1273 | |||||
| 1274 | return 1; | ||||
| 1275 | } | ||||
| 1276 | |||||
| 1277 | sub _ponder_begin { | ||||
| 1278 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 1279 | my $content = join ' ', splice @$para, 2; | ||||
| 1280 | $content =~ s/^\s+//s; | ||||
| 1281 | $content =~ s/\s+$//s; | ||||
| 1282 | unless(length($content)) { | ||||
| 1283 | $self->whine( | ||||
| 1284 | $para->[1]{'start_line'}, | ||||
| 1285 | "=begin without a target?" | ||||
| 1286 | ); | ||||
| 1287 | DEBUG and print STDERR "Ignoring targetless =begin\n"; | ||||
| 1288 | return 1; | ||||
| 1289 | } | ||||
| 1290 | |||||
| 1291 | my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; | ||||
| 1292 | $para->[1]{'title'} = $title if ($title); | ||||
| 1293 | $para->[1]{'target'} = $target; # without any ':' | ||||
| 1294 | $content = $target; # strip off the title | ||||
| 1295 | |||||
| 1296 | $content =~ s/^:!/!:/s; | ||||
| 1297 | my $neg; # whether this is a negation-match | ||||
| 1298 | $neg = 1 if $content =~ s/^!//s; | ||||
| 1299 | my $to_resolve; # whether to process formatting codes | ||||
| 1300 | $to_resolve = 1 if $content =~ s/^://s; | ||||
| 1301 | |||||
| 1302 | my $dont_ignore; # whether this target matches us | ||||
| 1303 | |||||
| 1304 | foreach my $target_name ( | ||||
| 1305 | split(',', $content, -1), | ||||
| 1306 | $neg ? () : '*' | ||||
| 1307 | ) { | ||||
| 1308 | DEBUG > 2 and | ||||
| 1309 | print STDERR " Considering whether =begin $content matches $target_name\n"; | ||||
| 1310 | next unless $self->{'accept_targets'}{$target_name}; | ||||
| 1311 | |||||
| 1312 | DEBUG > 2 and | ||||
| 1313 | print STDERR " It DOES match the acceptable target $target_name!\n"; | ||||
| 1314 | $to_resolve = 1 | ||||
| 1315 | if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; | ||||
| 1316 | $dont_ignore = 1; | ||||
| 1317 | $para->[1]{'target_matching'} = $target_name; | ||||
| 1318 | last; # stop looking at other target names | ||||
| 1319 | } | ||||
| 1320 | |||||
| 1321 | if($neg) { | ||||
| 1322 | if( $dont_ignore ) { | ||||
| 1323 | $dont_ignore = ''; | ||||
| 1324 | delete $para->[1]{'target_matching'}; | ||||
| 1325 | DEBUG > 2 and print STDERR " But the leading ! means that this is a NON-match!\n"; | ||||
| 1326 | } else { | ||||
| 1327 | $dont_ignore = 1; | ||||
| 1328 | $para->[1]{'target_matching'} = '!'; | ||||
| 1329 | DEBUG > 2 and print STDERR " But the leading ! means that this IS a match!\n"; | ||||
| 1330 | } | ||||
| 1331 | } | ||||
| 1332 | |||||
| 1333 | $para->[0] = '=for'; # Just what we happen to call these, internally | ||||
| 1334 | $para->[1]{'~really'} ||= '=begin'; | ||||
| 1335 | $para->[1]{'~ignore'} = (! $dont_ignore) || 0; | ||||
| 1336 | $para->[1]{'~resolve'} = $to_resolve || 0; | ||||
| 1337 | |||||
| 1338 | DEBUG > 1 and print STDERR " Making note to ", $dont_ignore ? 'not ' : '', | ||||
| 1339 | "ignore contents of this region\n"; | ||||
| 1340 | DEBUG > 1 and $dont_ignore and print STDERR " Making note to treat contents as ", | ||||
| 1341 | ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; | ||||
| 1342 | DEBUG > 1 and print STDERR " (Stack now: ", $self->_dump_curr_open(), ")\n"; | ||||
| 1343 | |||||
| 1344 | push @$curr_open, $para; | ||||
| 1345 | if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
| 1346 | DEBUG > 1 and print STDERR "Ignoring ignorable =begin\n"; | ||||
| 1347 | } else { | ||||
| 1348 | $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; | ||||
| 1349 | $self->_handle_element_start((my $scratch='for'), $para->[1]); | ||||
| 1350 | } | ||||
| 1351 | |||||
| 1352 | return 1; | ||||
| 1353 | } | ||||
| 1354 | |||||
| 1355 | sub _ponder_end { | ||||
| 1356 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 1357 | my $content = join ' ', splice @$para, 2; | ||||
| 1358 | $content =~ s/^\s+//s; | ||||
| 1359 | $content =~ s/\s+$//s; | ||||
| 1360 | DEBUG and print STDERR "Ogling '=end $content' directive\n"; | ||||
| 1361 | |||||
| 1362 | unless(length($content)) { | ||||
| 1363 | $self->whine( | ||||
| 1364 | $para->[1]{'start_line'}, | ||||
| 1365 | "'=end' without a target?" . ( | ||||
| 1366 | ( @$curr_open and $curr_open->[-1][0] eq '=for' ) | ||||
| 1367 | ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) | ||||
| 1368 | : '' | ||||
| 1369 | ) | ||||
| 1370 | ); | ||||
| 1371 | DEBUG and print STDERR "Ignoring targetless =end\n"; | ||||
| 1372 | return 1; | ||||
| 1373 | } | ||||
| 1374 | |||||
| 1375 | unless($content =~ m/^\S+$/) { # i.e., unless it's one word | ||||
| 1376 | $self->whine( | ||||
| 1377 | $para->[1]{'start_line'}, | ||||
| 1378 | "'=end $content' is invalid. (Stack: " | ||||
| 1379 | . $self->_dump_curr_open() . ')' | ||||
| 1380 | ); | ||||
| 1381 | DEBUG and print STDERR "Ignoring mistargetted =end $content\n"; | ||||
| 1382 | return 1; | ||||
| 1383 | } | ||||
| 1384 | |||||
| 1385 | unless(@$curr_open and $curr_open->[-1][0] eq '=for') { | ||||
| 1386 | $self->whine( | ||||
| 1387 | $para->[1]{'start_line'}, | ||||
| 1388 | "=end $content without matching =begin. (Stack: " | ||||
| 1389 | . $self->_dump_curr_open() . ')' | ||||
| 1390 | ); | ||||
| 1391 | DEBUG and print STDERR "Ignoring mistargetted =end $content\n"; | ||||
| 1392 | return 1; | ||||
| 1393 | } | ||||
| 1394 | |||||
| 1395 | unless($content eq $curr_open->[-1][1]{'target'}) { | ||||
| 1396 | $self->whine( | ||||
| 1397 | $para->[1]{'start_line'}, | ||||
| 1398 | "=end $content doesn't match =begin " | ||||
| 1399 | . $curr_open->[-1][1]{'target'} | ||||
| 1400 | . ". (Stack: " | ||||
| 1401 | . $self->_dump_curr_open() . ')' | ||||
| 1402 | ); | ||||
| 1403 | DEBUG and print STDERR "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; | ||||
| 1404 | return 1; | ||||
| 1405 | } | ||||
| 1406 | |||||
| 1407 | # Else it's okay to close... | ||||
| 1408 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
| 1409 | DEBUG > 1 and print STDERR "Not firing any event for this =end $content because in an ignored region\n"; | ||||
| 1410 | # And that may be because of this to-be-closed =for region, or some | ||||
| 1411 | # other one, but it doesn't matter. | ||||
| 1412 | } else { | ||||
| 1413 | $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; | ||||
| 1414 | # what's that for? | ||||
| 1415 | |||||
| 1416 | $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; | ||||
| 1417 | $self->_handle_element_end( my $scratch = 'for', $para->[1]); | ||||
| 1418 | } | ||||
| 1419 | DEBUG > 1 and print STDERR "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; | ||||
| 1420 | pop @$curr_open; | ||||
| 1421 | |||||
| 1422 | return 1; | ||||
| 1423 | } | ||||
| 1424 | |||||
| 1425 | sub _ponder_doc_end { | ||||
| 1426 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 1427 | if(@$curr_open) { # Deal with things left open | ||||
| 1428 | DEBUG and print STDERR "Stack is nonempty at end-document: (", | ||||
| 1429 | $self->_dump_curr_open(), ")\n"; | ||||
| 1430 | |||||
| 1431 | DEBUG > 9 and print STDERR "Stack: ", pretty($curr_open), "\n"; | ||||
| 1432 | unshift @$paras, $self->_closers_for_all_curr_open; | ||||
| 1433 | # Make sure there is exactly one ~end in the parastack, at the end: | ||||
| 1434 | @$paras = grep $_->[0] ne '~end', @$paras; | ||||
| 1435 | push @$paras, $para, $para; | ||||
| 1436 | # We need two -- once for the next cycle where we | ||||
| 1437 | # generate errata, and then another to be at the end | ||||
| 1438 | # when that loop back around to process the errata. | ||||
| 1439 | return 1; | ||||
| 1440 | |||||
| 1441 | } else { | ||||
| 1442 | DEBUG and print STDERR "Okay, stack is empty now.\n"; | ||||
| 1443 | } | ||||
| 1444 | |||||
| 1445 | # Try generating errata section, if applicable | ||||
| 1446 | unless($self->{'~tried_gen_errata'}) { | ||||
| 1447 | $self->{'~tried_gen_errata'} = 1; | ||||
| 1448 | my @extras = $self->_gen_errata(); | ||||
| 1449 | if(@extras) { | ||||
| 1450 | unshift @$paras, @extras; | ||||
| 1451 | DEBUG and print STDERR "Generated errata... relooping...\n"; | ||||
| 1452 | return 1; # I.e., loop around again to process these fake-o paragraphs | ||||
| 1453 | } | ||||
| 1454 | } | ||||
| 1455 | |||||
| 1456 | splice @$paras; # Well, that's that for this paragraph buffer. | ||||
| 1457 | DEBUG and print STDERR "Throwing end-document event.\n"; | ||||
| 1458 | |||||
| 1459 | $self->_handle_element_end( my $scratch = 'Document' ); | ||||
| 1460 | return 1; # Hasta la byebye | ||||
| 1461 | } | ||||
| 1462 | |||||
| 1463 | sub _ponder_pod { | ||||
| 1464 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 1465 | $self->whine( | ||||
| 1466 | $para->[1]{'start_line'}, | ||||
| 1467 | "=pod directives shouldn't be over one line long! Ignoring all " | ||||
| 1468 | . (@$para - 2) . " lines of content" | ||||
| 1469 | ) if @$para > 3; | ||||
| 1470 | |||||
| 1471 | # Content ignored unless 'pod_handler' is set | ||||
| 1472 | if (my $pod_handler = $self->{'pod_handler'}) { | ||||
| 1473 | my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2]; | ||||
| 1474 | $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output | ||||
| 1475 | $pod_handler->($line, $line_num, $self); | ||||
| 1476 | } | ||||
| 1477 | |||||
| 1478 | # The surrounding methods set content_seen, so let us remain consistent. | ||||
| 1479 | # I do not know why it was not here before -- should it not be here? | ||||
| 1480 | # $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; | ||||
| 1481 | |||||
| 1482 | return; | ||||
| 1483 | } | ||||
| 1484 | |||||
| 1485 | sub _ponder_over { | ||||
| 1486 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 1487 | return 1 unless @$paras; | ||||
| 1488 | my $list_type; | ||||
| 1489 | |||||
| 1490 | if($paras->[0][0] eq '=item') { # most common case | ||||
| 1491 | $list_type = $self->_get_initial_item_type($paras->[0]); | ||||
| 1492 | |||||
| 1493 | } elsif($paras->[0][0] eq '=back') { | ||||
| 1494 | # Ignore empty lists by default | ||||
| 1495 | if ($self->{'parse_empty_lists'}) { | ||||
| 1496 | $list_type = 'empty'; | ||||
| 1497 | } else { | ||||
| 1498 | shift @$paras; | ||||
| 1499 | return 1; | ||||
| 1500 | } | ||||
| 1501 | } elsif($paras->[0][0] eq '~end') { | ||||
| 1502 | $self->whine( | ||||
| 1503 | $para->[1]{'start_line'}, | ||||
| 1504 | "=over is the last thing in the document?!" | ||||
| 1505 | ); | ||||
| 1506 | return 1; # But feh, ignore it. | ||||
| 1507 | } else { | ||||
| 1508 | $list_type = 'block'; | ||||
| 1509 | } | ||||
| 1510 | $para->[1]{'~type'} = $list_type; | ||||
| 1511 | push @$curr_open, $para; | ||||
| 1512 | # yes, we reuse the paragraph as a stack item | ||||
| 1513 | |||||
| 1514 | my $content = join ' ', splice @$para, 2; | ||||
| 1515 | $para->[1]{'~orig_content'} = $content; | ||||
| 1516 | my $overness; | ||||
| 1517 | if($content =~ m/^\s*$/s) { | ||||
| 1518 | $para->[1]{'indent'} = 4; | ||||
| 1519 | } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { | ||||
| 1520 | 2 | 2.83ms | 2 | 15µs | # spent 13µs (10+2) within Pod::Simple::BlackBox::BEGIN@1520 which was called:
# once (10µs+2µs) by Pod::Simple::LinkSection::BEGIN@7 at line 1520 # spent 13µs making 1 call to Pod::Simple::BlackBox::BEGIN@1520
# spent 2µs making 1 call to integer::unimport |
| 1521 | $para->[1]{'indent'} = $1; | ||||
| 1522 | if($1 == 0) { | ||||
| 1523 | $self->whine( | ||||
| 1524 | $para->[1]{'start_line'}, | ||||
| 1525 | "Can't have a 0 in =over $content" | ||||
| 1526 | ); | ||||
| 1527 | $para->[1]{'indent'} = 4; | ||||
| 1528 | } | ||||
| 1529 | } else { | ||||
| 1530 | $self->whine( | ||||
| 1531 | $para->[1]{'start_line'}, | ||||
| 1532 | "=over should be: '=over' or '=over positive_number'" | ||||
| 1533 | ); | ||||
| 1534 | $para->[1]{'indent'} = 4; | ||||
| 1535 | } | ||||
| 1536 | DEBUG > 1 and print STDERR "=over found of type $list_type\n"; | ||||
| 1537 | |||||
| 1538 | $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; | ||||
| 1539 | $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); | ||||
| 1540 | |||||
| 1541 | return; | ||||
| 1542 | } | ||||
| 1543 | |||||
| 1544 | sub _ponder_back { | ||||
| 1545 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 1546 | # TODO: fire off </item-number> or </item-bullet> or </item-text> ?? | ||||
| 1547 | |||||
| 1548 | my $content = join ' ', splice @$para, 2; | ||||
| 1549 | if($content =~ m/\S/) { | ||||
| 1550 | $self->whine( | ||||
| 1551 | $para->[1]{'start_line'}, | ||||
| 1552 | "=back doesn't take any parameters, but you said =back $content" | ||||
| 1553 | ); | ||||
| 1554 | } | ||||
| 1555 | |||||
| 1556 | if(@$curr_open and $curr_open->[-1][0] eq '=over') { | ||||
| 1557 | DEBUG > 1 and print STDERR "=back happily closes matching =over\n"; | ||||
| 1558 | # Expected case: we're closing the most recently opened thing | ||||
| 1559 | #my $over = pop @$curr_open; | ||||
| 1560 | $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; | ||||
| 1561 | $self->_handle_element_end( my $scratch = | ||||
| 1562 | 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1] | ||||
| 1563 | ); | ||||
| 1564 | } else { | ||||
| 1565 | DEBUG > 1 and print STDERR "=back found without a matching =over. Stack: (", | ||||
| 1566 | join(', ', map $_->[0], @$curr_open), ").\n"; | ||||
| 1567 | $self->whine( | ||||
| 1568 | $para->[1]{'start_line'}, | ||||
| 1569 | '=back without =over' | ||||
| 1570 | ); | ||||
| 1571 | return 1; # and ignore it | ||||
| 1572 | } | ||||
| 1573 | } | ||||
| 1574 | |||||
| 1575 | sub _ponder_item { | ||||
| 1576 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 1577 | my $over; | ||||
| 1578 | unless(@$curr_open and | ||||
| 1579 | $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { | ||||
| 1580 | $self->whine( | ||||
| 1581 | $para->[1]{'start_line'}, | ||||
| 1582 | "'=item' outside of any '=over'" | ||||
| 1583 | ); | ||||
| 1584 | unshift @$paras, | ||||
| 1585 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], | ||||
| 1586 | $para | ||||
| 1587 | ; | ||||
| 1588 | return 1; | ||||
| 1589 | } | ||||
| 1590 | |||||
| 1591 | |||||
| 1592 | my $over_type = $over->[1]{'~type'}; | ||||
| 1593 | |||||
| 1594 | if(!$over_type) { | ||||
| 1595 | # Shouldn't happen1 | ||||
| 1596 | die "Typeless over in stack, starting at line " | ||||
| 1597 | . $over->[1]{'start_line'}; | ||||
| 1598 | |||||
| 1599 | } elsif($over_type eq 'block') { | ||||
| 1600 | unless($curr_open->[-1][1]{'~bitched_about'}) { | ||||
| 1601 | $curr_open->[-1][1]{'~bitched_about'} = 1; | ||||
| 1602 | $self->whine( | ||||
| 1603 | $curr_open->[-1][1]{'start_line'}, | ||||
| 1604 | "You can't have =items (as at line " | ||||
| 1605 | . $para->[1]{'start_line'} | ||||
| 1606 | . ") unless the first thing after the =over is an =item" | ||||
| 1607 | ); | ||||
| 1608 | } | ||||
| 1609 | # Just turn it into a paragraph and reconsider it | ||||
| 1610 | $para->[0] = '~Para'; | ||||
| 1611 | unshift @$paras, $para; | ||||
| 1612 | return 1; | ||||
| 1613 | |||||
| 1614 | } elsif($over_type eq 'text') { | ||||
| 1615 | my $item_type = $self->_get_item_type($para); | ||||
| 1616 | # That kills the content of the item if it's a number or bullet. | ||||
| 1617 | DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; | ||||
| 1618 | |||||
| 1619 | if($item_type eq 'text') { | ||||
| 1620 | # Nothing special needs doing for 'text' | ||||
| 1621 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { | ||||
| 1622 | $self->whine( | ||||
| 1623 | $para->[1]{'start_line'}, | ||||
| 1624 | "Expected text after =item, not a $item_type" | ||||
| 1625 | ); | ||||
| 1626 | # Undo our clobbering: | ||||
| 1627 | push @$para, $para->[1]{'~orig_content'}; | ||||
| 1628 | delete $para->[1]{'number'}; | ||||
| 1629 | # Only a PROPER item-number element is allowed | ||||
| 1630 | # to have a number attribute. | ||||
| 1631 | } else { | ||||
| 1632 | die "Unhandled item type $item_type"; # should never happen | ||||
| 1633 | } | ||||
| 1634 | |||||
| 1635 | # =item-text thingies don't need any assimilation, it seems. | ||||
| 1636 | |||||
| 1637 | } elsif($over_type eq 'number') { | ||||
| 1638 | my $item_type = $self->_get_item_type($para); | ||||
| 1639 | # That kills the content of the item if it's a number or bullet. | ||||
| 1640 | DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; | ||||
| 1641 | |||||
| 1642 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; | ||||
| 1643 | |||||
| 1644 | if($item_type eq 'bullet') { | ||||
| 1645 | # Hm, it's not numeric. Correct for this. | ||||
| 1646 | $para->[1]{'number'} = $expected_value; | ||||
| 1647 | $self->whine( | ||||
| 1648 | $para->[1]{'start_line'}, | ||||
| 1649 | "Expected '=item $expected_value'" | ||||
| 1650 | ); | ||||
| 1651 | push @$para, $para->[1]{'~orig_content'}; | ||||
| 1652 | # restore the bullet, blocking the assimilation of next para | ||||
| 1653 | |||||
| 1654 | } elsif($item_type eq 'text') { | ||||
| 1655 | # Hm, it's not numeric. Correct for this. | ||||
| 1656 | $para->[1]{'number'} = $expected_value; | ||||
| 1657 | $self->whine( | ||||
| 1658 | $para->[1]{'start_line'}, | ||||
| 1659 | "Expected '=item $expected_value'" | ||||
| 1660 | ); | ||||
| 1661 | # Text content will still be there and will block next ~Para | ||||
| 1662 | |||||
| 1663 | } elsif($item_type ne 'number') { | ||||
| 1664 | die "Unknown item type $item_type"; # should never happen | ||||
| 1665 | |||||
| 1666 | } elsif($expected_value == $para->[1]{'number'}) { | ||||
| 1667 | DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n"; | ||||
| 1668 | |||||
| 1669 | } else { | ||||
| 1670 | DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'}, | ||||
| 1671 | " instead of the expected value of $expected_value\n"; | ||||
| 1672 | $self->whine( | ||||
| 1673 | $para->[1]{'start_line'}, | ||||
| 1674 | "You have '=item " . $para->[1]{'number'} . | ||||
| 1675 | "' instead of the expected '=item $expected_value'" | ||||
| 1676 | ); | ||||
| 1677 | $para->[1]{'number'} = $expected_value; # correcting!! | ||||
| 1678 | } | ||||
| 1679 | |||||
| 1680 | if(@$para == 2) { | ||||
| 1681 | # For the cases where we /didn't/ push to @$para | ||||
| 1682 | if($paras->[0][0] eq '~Para') { | ||||
| 1683 | DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; | ||||
| 1684 | push @$para, splice @{shift @$paras},2; | ||||
| 1685 | } else { | ||||
| 1686 | DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
| 1687 | push @$para, ''; # Just so it's not contentless | ||||
| 1688 | } | ||||
| 1689 | } | ||||
| 1690 | |||||
| 1691 | |||||
| 1692 | } elsif($over_type eq 'bullet') { | ||||
| 1693 | my $item_type = $self->_get_item_type($para); | ||||
| 1694 | # That kills the content of the item if it's a number or bullet. | ||||
| 1695 | DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; | ||||
| 1696 | |||||
| 1697 | if($item_type eq 'bullet') { | ||||
| 1698 | # as expected! | ||||
| 1699 | |||||
| 1700 | if( $para->[1]{'~_freaky_para_hack'} ) { | ||||
| 1701 | DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n"; | ||||
| 1702 | push @$para, $para->[1]{'~_freaky_para_hack'}; | ||||
| 1703 | } | ||||
| 1704 | |||||
| 1705 | } elsif($item_type eq 'number') { | ||||
| 1706 | $self->whine( | ||||
| 1707 | $para->[1]{'start_line'}, | ||||
| 1708 | "Expected '=item *'" | ||||
| 1709 | ); | ||||
| 1710 | push @$para, $para->[1]{'~orig_content'}; | ||||
| 1711 | # and block assimilation of the next paragraph | ||||
| 1712 | delete $para->[1]{'number'}; | ||||
| 1713 | # Only a PROPER item-number element is allowed | ||||
| 1714 | # to have a number attribute. | ||||
| 1715 | } elsif($item_type eq 'text') { | ||||
| 1716 | $self->whine( | ||||
| 1717 | $para->[1]{'start_line'}, | ||||
| 1718 | "Expected '=item *'" | ||||
| 1719 | ); | ||||
| 1720 | # But doesn't need processing. But it'll block assimilation | ||||
| 1721 | # of the next para. | ||||
| 1722 | } else { | ||||
| 1723 | die "Unhandled item type $item_type"; # should never happen | ||||
| 1724 | } | ||||
| 1725 | |||||
| 1726 | if(@$para == 2) { | ||||
| 1727 | # For the cases where we /didn't/ push to @$para | ||||
| 1728 | if($paras->[0][0] eq '~Para') { | ||||
| 1729 | DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; | ||||
| 1730 | push @$para, splice @{shift @$paras},2; | ||||
| 1731 | } else { | ||||
| 1732 | DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
| 1733 | push @$para, ''; # Just so it's not contentless | ||||
| 1734 | } | ||||
| 1735 | } | ||||
| 1736 | |||||
| 1737 | } else { | ||||
| 1738 | die "Unhandled =over type \"$over_type\"?"; | ||||
| 1739 | # Shouldn't happen! | ||||
| 1740 | } | ||||
| 1741 | $para->[0] .= '-' . $over_type; | ||||
| 1742 | |||||
| 1743 | return; | ||||
| 1744 | } | ||||
| 1745 | |||||
| 1746 | sub _ponder_Plain { | ||||
| 1747 | my ($self,$para) = @_; | ||||
| 1748 | DEBUG and print STDERR " giving plain treatment...\n"; | ||||
| 1749 | unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) | ||||
| 1750 | or $para->[1]{'~cooked'} | ||||
| 1751 | ) { | ||||
| 1752 | push @$para, | ||||
| 1753 | @{$self->_make_treelet( | ||||
| 1754 | join("\n", splice(@$para, 2)), | ||||
| 1755 | $para->[1]{'start_line'} | ||||
| 1756 | )}; | ||||
| 1757 | } | ||||
| 1758 | # Empty paragraphs don't need a treelet for any reason I can see. | ||||
| 1759 | # And precooked paragraphs already have a treelet. | ||||
| 1760 | return; | ||||
| 1761 | } | ||||
| 1762 | |||||
| 1763 | sub _ponder_Verbatim { | ||||
| 1764 | my ($self,$para) = @_; | ||||
| 1765 | DEBUG and print STDERR " giving verbatim treatment...\n"; | ||||
| 1766 | |||||
| 1767 | $para->[1]{'xml:space'} = 'preserve'; | ||||
| 1768 | |||||
| 1769 | unless ($self->{'_output_is_for_JustPod'}) { | ||||
| 1770 | # Fix illegal settings for expand_verbatim_tabs() | ||||
| 1771 | # This is because this module doesn't do input error checking, but khw | ||||
| 1772 | # doesn't want to add yet another instance of that. | ||||
| 1773 | $self->expand_verbatim_tabs(8) | ||||
| 1774 | if ! defined $self->expand_verbatim_tabs() | ||||
| 1775 | || $self->expand_verbatim_tabs() =~ /\D/; | ||||
| 1776 | |||||
| 1777 | my $indent = $self->strip_verbatim_indent; | ||||
| 1778 | if ($indent && ref $indent eq 'CODE') { | ||||
| 1779 | my @shifted = (shift @{$para}, shift @{$para}); | ||||
| 1780 | $indent = $indent->($para); | ||||
| 1781 | unshift @{$para}, @shifted; | ||||
| 1782 | } | ||||
| 1783 | |||||
| 1784 | for(my $i = 2; $i < @$para; $i++) { | ||||
| 1785 | foreach my $line ($para->[$i]) { # just for aliasing | ||||
| 1786 | # Strip indentation. | ||||
| 1787 | $line =~ s/^\Q$indent// if $indent; | ||||
| 1788 | next unless $self->expand_verbatim_tabs; | ||||
| 1789 | |||||
| 1790 | # This is commented out because of github issue #85, and the | ||||
| 1791 | # current maintainers don't know why it was there in the first | ||||
| 1792 | # place. | ||||
| 1793 | #&& !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted}); | ||||
| 1794 | while( $line =~ | ||||
| 1795 | # Sort of adapted from Text::Tabs. | ||||
| 1796 | s/^([^\t]*)(\t+)/$1.(" " x ((length($2) | ||||
| 1797 | * $self->expand_verbatim_tabs) | ||||
| 1798 | -(length($1)&7)))/e | ||||
| 1799 | ) {} | ||||
| 1800 | |||||
| 1801 | # TODO: whinge about (or otherwise treat) unindented or overlong lines | ||||
| 1802 | |||||
| 1803 | } | ||||
| 1804 | } | ||||
| 1805 | } | ||||
| 1806 | |||||
| 1807 | # Now the VerbatimFormatted hoodoo... | ||||
| 1808 | if( $self->{'accept_codes'} and | ||||
| 1809 | $self->{'accept_codes'}{'VerbatimFormatted'} | ||||
| 1810 | ) { | ||||
| 1811 | while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } | ||||
| 1812 | # Kill any number of terminal newlines | ||||
| 1813 | $self->_verbatim_format($para); | ||||
| 1814 | } elsif ($self->{'codes_in_verbatim'}) { | ||||
| 1815 | push @$para, | ||||
| 1816 | @{$self->_make_treelet( | ||||
| 1817 | join("\n", splice(@$para, 2)), | ||||
| 1818 | $para->[1]{'start_line'}, $para->[1]{'xml:space'} | ||||
| 1819 | )}; | ||||
| 1820 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines | ||||
| 1821 | } else { | ||||
| 1822 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; | ||||
| 1823 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines | ||||
| 1824 | } | ||||
| 1825 | return; | ||||
| 1826 | } | ||||
| 1827 | |||||
| 1828 | sub _ponder_Data { | ||||
| 1829 | my ($self,$para) = @_; | ||||
| 1830 | DEBUG and print STDERR " giving data treatment...\n"; | ||||
| 1831 | $para->[1]{'xml:space'} = 'preserve'; | ||||
| 1832 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; | ||||
| 1833 | return; | ||||
| 1834 | } | ||||
| 1835 | |||||
| - - | |||||
| 1839 | ########################################################################### | ||||
| 1840 | |||||
| 1841 | sub _traverse_treelet_bit { # for use only by the routine above | ||||
| 1842 | my($self, $name) = splice @_,0,2; | ||||
| 1843 | |||||
| 1844 | my $scratch; | ||||
| 1845 | $self->_handle_element_start(($scratch=$name), shift @_); | ||||
| 1846 | |||||
| 1847 | while (@_) { | ||||
| 1848 | my $x = shift; | ||||
| 1849 | if (ref($x)) { | ||||
| 1850 | &_traverse_treelet_bit($self, @$x); | ||||
| 1851 | } else { | ||||
| 1852 | $x .= shift while @_ && !ref($_[0]); | ||||
| 1853 | $self->_handle_text($x); | ||||
| 1854 | } | ||||
| 1855 | } | ||||
| 1856 | |||||
| 1857 | $self->_handle_element_end($scratch=$name); | ||||
| 1858 | return; | ||||
| 1859 | } | ||||
| 1860 | |||||
| 1861 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 1862 | |||||
| 1863 | sub _closers_for_all_curr_open { | ||||
| 1864 | my $self = $_[0]; | ||||
| 1865 | my @closers; | ||||
| 1866 | foreach my $still_open (@{ $self->{'curr_open'} || return }) { | ||||
| 1867 | my @copy = @$still_open; | ||||
| 1868 | $copy[1] = {%{ $copy[1] }}; | ||||
| 1869 | #$copy[1]{'start_line'} = -1; | ||||
| 1870 | if($copy[0] eq '=for') { | ||||
| 1871 | $copy[0] = '=end'; | ||||
| 1872 | } elsif($copy[0] eq '=over') { | ||||
| 1873 | $self->whine( | ||||
| 1874 | $still_open->[1]{start_line} , | ||||
| 1875 | "=over without closing =back" | ||||
| 1876 | ); | ||||
| 1877 | |||||
| 1878 | $copy[0] = '=back'; | ||||
| 1879 | } else { | ||||
| 1880 | die "I don't know how to auto-close an open $copy[0] region"; | ||||
| 1881 | } | ||||
| 1882 | |||||
| 1883 | unless( @copy > 2 ) { | ||||
| 1884 | push @copy, $copy[1]{'target'}; | ||||
| 1885 | $copy[-1] = '' unless defined $copy[-1]; | ||||
| 1886 | # since =over's don't have targets | ||||
| 1887 | } | ||||
| 1888 | |||||
| 1889 | $copy[1]{'fake-closer'} = 1; | ||||
| 1890 | |||||
| 1891 | DEBUG and print STDERR "Queuing up fake-o event: ", pretty(\@copy), "\n"; | ||||
| 1892 | unshift @closers, \@copy; | ||||
| 1893 | } | ||||
| 1894 | return @closers; | ||||
| 1895 | } | ||||
| 1896 | |||||
| 1897 | #-------------------------------------------------------------------------- | ||||
| 1898 | |||||
| 1899 | sub _verbatim_format { | ||||
| 1900 | my($it, $p) = @_; | ||||
| 1901 | |||||
| 1902 | my $formatting; | ||||
| 1903 | |||||
| 1904 | for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines | ||||
| 1905 | DEBUG and print STDERR "_verbatim_format appends a newline to $i: $p->[$i]\n"; | ||||
| 1906 | $p->[$i] .= "\n"; | ||||
| 1907 | # Unlike with simple Verbatim blocks, we don't end up just doing | ||||
| 1908 | # a join("\n", ...) on the contents, so we have to append a | ||||
| 1909 | # newline to every line, and then nix the last one later. | ||||
| 1910 | } | ||||
| 1911 | |||||
| 1912 | if( DEBUG > 4 ) { | ||||
| 1913 | print STDERR "<<\n"; | ||||
| 1914 | for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines | ||||
| 1915 | print STDERR "_verbatim_format $i: $p->[$i]"; | ||||
| 1916 | } | ||||
| 1917 | print STDERR ">>\n"; | ||||
| 1918 | } | ||||
| 1919 | |||||
| 1920 | for(my $i = $#$p; $i > 2; $i--) { | ||||
| 1921 | # work backwards over the lines, except the first (#2) | ||||
| 1922 | |||||
| 1923 | #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s | ||||
| 1924 | # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s; | ||||
| 1925 | # look at a formatty line preceding a nonformatty one | ||||
| 1926 | DEBUG > 5 and print STDERR "Scrutinizing line $i: $$p[$i]\n"; | ||||
| 1927 | if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) { | ||||
| 1928 | DEBUG > 5 and print STDERR " It's a formatty line. ", | ||||
| 1929 | "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; | ||||
| 1930 | |||||
| 1931 | if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) { | ||||
| 1932 | DEBUG > 5 and print STDERR " Previous line is formatty! Skipping this one.\n"; | ||||
| 1933 | next; | ||||
| 1934 | } else { | ||||
| 1935 | DEBUG > 5 and print STDERR " Previous line is non-formatty! Yay!\n"; | ||||
| 1936 | } | ||||
| 1937 | } else { | ||||
| 1938 | DEBUG > 5 and print STDERR " It's not a formatty line. Ignoring\n"; | ||||
| 1939 | next; | ||||
| 1940 | } | ||||
| 1941 | |||||
| 1942 | # A formatty line has to have #: in the first two columns, and uses | ||||
| 1943 | # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. | ||||
| 1944 | # Example: | ||||
| 1945 | # What do you want? i like pie. [or whatever] | ||||
| 1946 | # #:^^^^^^^^^^^^^^^^^ ///////////// | ||||
| 1947 | |||||
| 1948 | |||||
| 1949 | DEBUG > 4 and print STDERR "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; | ||||
| 1950 | |||||
| 1951 | $formatting = ' ' . $1; | ||||
| 1952 | $formatting =~ s/\s+$//s; # nix trailing whitespace | ||||
| 1953 | unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op | ||||
| 1954 | splice @$p,$i,1; # remove this line | ||||
| 1955 | $i--; # don't consider next line | ||||
| 1956 | next; | ||||
| 1957 | } | ||||
| 1958 | |||||
| 1959 | if( length($formatting) >= length($p->[$i-1]) ) { | ||||
| 1960 | $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' '; | ||||
| 1961 | } else { | ||||
| 1962 | $formatting .= ' ' x (length($p->[$i-1]) - length($formatting)); | ||||
| 1963 | } | ||||
| 1964 | # Make $formatting and the previous line be exactly the same length, | ||||
| 1965 | # with $formatting having a " " as the last character. | ||||
| 1966 | |||||
| 1967 | DEBUG > 4 and print STDERR "Formatting <$formatting> on <", $p->[$i-1], ">\n"; | ||||
| 1968 | |||||
| 1969 | |||||
| 1970 | my @new_line; | ||||
| 1971 | while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) { | ||||
| 1972 | #print STDERR "Format matches $1\n"; | ||||
| 1973 | |||||
| 1974 | if($2) { | ||||
| 1975 | #print STDERR "SKIPPING <$2>\n"; | ||||
| 1976 | push @new_line, | ||||
| 1977 | substr($p->[$i-1], pos($formatting)-length($1), length($1)); | ||||
| 1978 | } else { | ||||
| 1979 | #print STDERR "SNARING $+\n"; | ||||
| 1980 | push @new_line, [ | ||||
| 1981 | ( | ||||
| 1982 | $3 ? 'VerbatimB' : | ||||
| 1983 | $4 ? 'VerbatimI' : | ||||
| 1984 | $5 ? 'VerbatimBI' : die("Should never get called") | ||||
| 1985 | ), {}, | ||||
| 1986 | substr($p->[$i-1], pos($formatting)-length($1), length($1)) | ||||
| 1987 | ]; | ||||
| 1988 | #print STDERR "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; | ||||
| 1989 | } | ||||
| 1990 | } | ||||
| 1991 | my @nixed = | ||||
| 1992 | splice @$p, $i-1, 2, @new_line; # replace myself and the next line | ||||
| 1993 | DEBUG > 10 and print STDERR "Nixed count: ", scalar(@nixed), "\n"; | ||||
| 1994 | |||||
| 1995 | DEBUG > 6 and print STDERR "New version of the above line is these tokens (", | ||||
| 1996 | scalar(@new_line), "):", | ||||
| 1997 | map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; | ||||
| 1998 | $i--; # So the next line we scrutinize is the line before the one | ||||
| 1999 | # that we just went and formatted | ||||
| 2000 | } | ||||
| 2001 | |||||
| 2002 | $p->[0] = 'VerbatimFormatted'; | ||||
| 2003 | |||||
| 2004 | # Collapse adjacent text nodes, just for kicks. | ||||
| 2005 | for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last | ||||
| 2006 | if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { | ||||
| 2007 | DEBUG > 5 and print STDERR "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; | ||||
| 2008 | $p->[$i] .= splice @$p, $i+1, 1; # merge | ||||
| 2009 | --$i; # and back up | ||||
| 2010 | } | ||||
| 2011 | } | ||||
| 2012 | |||||
| 2013 | # Now look for the last text token, and remove the terminal newline | ||||
| 2014 | for( my $i = $#$p; $i >= 2; $i-- ) { | ||||
| 2015 | # work backwards over the tokens, even the first | ||||
| 2016 | if( !ref($p->[$i]) ) { | ||||
| 2017 | if($p->[$i] =~ s/\n$//s) { | ||||
| 2018 | DEBUG > 5 and print STDERR "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; | ||||
| 2019 | } else { | ||||
| 2020 | DEBUG > 5 and print STDERR | ||||
| 2021 | "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; | ||||
| 2022 | } | ||||
| 2023 | last; # we only want the next one | ||||
| 2024 | } | ||||
| 2025 | } | ||||
| 2026 | |||||
| 2027 | return; | ||||
| 2028 | } | ||||
| 2029 | |||||
| 2030 | |||||
| 2031 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 2032 | |||||
| 2033 | |||||
| 2034 | sub _treelet_from_formatting_codes { | ||||
| 2035 | # Given a paragraph, returns a treelet. Full of scary tokenizing code. | ||||
| 2036 | # Like [ '~Top', {'start_line' => $start_line}, | ||||
| 2037 | # "I like ", | ||||
| 2038 | # [ 'B', {}, "pie" ], | ||||
| 2039 | # "!" | ||||
| 2040 | # ] | ||||
| 2041 | # This illustrates the general format of a treelet. It is an array: | ||||
| 2042 | # [0] is a scalar indicating its type. In the example above, the | ||||
| 2043 | # types are '~Top' and 'B' | ||||
| 2044 | # [1] is a hash of various flags about it, possibly empty | ||||
| 2045 | # [2] - [N] are an ordered list of the subcomponents of the treelet. | ||||
| 2046 | # Scalars are literal text, refs are sub-treelets, to | ||||
| 2047 | # arbitrary levels. Stringifying a treelet will recursively | ||||
| 2048 | # stringify the sub-treelets, concatentating everything | ||||
| 2049 | # together to form the exact text of the treelet. | ||||
| 2050 | |||||
| 2051 | my($self, $para, $start_line, $preserve_space) = @_; | ||||
| 2052 | |||||
| 2053 | my $treelet = ['~Top', {'start_line' => $start_line},]; | ||||
| 2054 | |||||
| 2055 | unless ($preserve_space || $self->{'preserve_whitespace'}) { | ||||
| 2056 | $para =~ s/\s+/ /g; # collapse and trim all whitespace first. | ||||
| 2057 | $para =~ s/ $//; | ||||
| 2058 | $para =~ s/^ //; | ||||
| 2059 | } | ||||
| 2060 | |||||
| 2061 | # Only apparent problem the above code is that N<< >> turns into | ||||
| 2062 | # N<< >>. But then, word wrapping does that too! So don't do that! | ||||
| 2063 | |||||
| 2064 | |||||
| 2065 | # As a Start-code is encountered, the number of opening bracket '<' | ||||
| 2066 | # characters minus 1 is pushed onto @stack (so 0 means a single bracket, | ||||
| 2067 | # etc). When closing brackets are found in the text, at least this number | ||||
| 2068 | # (plus the 1) will be required to mean the Start-code is terminated. When | ||||
| 2069 | # those are found, @stack is popped. | ||||
| 2070 | my @stack; | ||||
| 2071 | |||||
| 2072 | my @lineage = ($treelet); | ||||
| 2073 | my $raw = ''; # raw content of L<> fcode before splitting/processing | ||||
| 2074 | # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed | ||||
| 2075 | # into just 1 ' '. Is this the regex's doing or 'raw's? Answer is it's | ||||
| 2076 | # the 'collapse and trim all whitespace first' lines just above. | ||||
| 2077 | my $inL = 0; | ||||
| 2078 | |||||
| 2079 | DEBUG > 4 and print STDERR "Paragraph:\n$para\n\n"; | ||||
| 2080 | |||||
| 2081 | # Here begins our frightening tokenizer RE. The following regex matches | ||||
| 2082 | # text in four main parts: | ||||
| 2083 | # | ||||
| 2084 | # * Start-codes. The first alternative matches C< or C<<, the latter | ||||
| 2085 | # followed by some whitespace. $1 will hold the entire start code | ||||
| 2086 | # (including any space following a multiple-angle-bracket delimiter), | ||||
| 2087 | # and $2 will hold only the additional brackets past the first in a | ||||
| 2088 | # multiple-bracket delimiter. length($2) + 1 will be the number of | ||||
| 2089 | # closing brackets we have to find. | ||||
| 2090 | # | ||||
| 2091 | # * Closing brackets. Match some amount of whitespace followed by | ||||
| 2092 | # multiple close brackets. The logic to see if this closes anything | ||||
| 2093 | # is down below. Note that in order to parse C<< >> correctly, we | ||||
| 2094 | # have to use look-behind (?<=\s\s), since the match of the starting | ||||
| 2095 | # code will have consumed the whitespace. | ||||
| 2096 | # | ||||
| 2097 | # * A single closing bracket, to close a simple code like C<>. | ||||
| 2098 | # | ||||
| 2099 | # * Something that isn't a start or end code. We have to be careful | ||||
| 2100 | # about accepting whitespace, since perlpodspec says that any whitespace | ||||
| 2101 | # before a multiple-bracket closing delimiter should be ignored. | ||||
| 2102 | # | ||||
| 2103 | while($para =~ | ||||
| 2104 | m/\G | ||||
| 2105 | (?: | ||||
| 2106 | # Match starting codes, including the whitespace following a | ||||
| 2107 | # multiple-delimiter start code. $1 gets the whole start code and | ||||
| 2108 | # $2 gets all but one of the <s in the multiple-bracket case. | ||||
| 2109 | ([A-Z]<(?:(<+)\s+)?) | ||||
| 2110 | | | ||||
| 2111 | # Match multiple-bracket end codes. $3 gets the whitespace that | ||||
| 2112 | # should be discarded before an end bracket but kept in other cases | ||||
| 2113 | # and $4 gets the end brackets themselves. ($3 can be empty if the | ||||
| 2114 | # construct is empty, like C<< >>, and all the white-space has been | ||||
| 2115 | # gobbled up already, considered to be space after the opening | ||||
| 2116 | # bracket. In this case we use look-behind to verify that there are | ||||
| 2117 | # at least 2 spaces in a row before the ">".) | ||||
| 2118 | (\s+|(?<=\s\s))(>{2,}) | ||||
| 2119 | | | ||||
| 2120 | (\s?>) # $5: simple end-codes | ||||
| 2121 | | | ||||
| 2122 | ( # $6: stuff containing no start-codes or end-codes | ||||
| 2123 | (?: | ||||
| 2124 | [^A-Z\s>] | ||||
| 2125 | | | ||||
| 2126 | (?: | ||||
| 2127 | [A-Z](?!<) | ||||
| 2128 | ) | ||||
| 2129 | | | ||||
| 2130 | # whitespace is ok, but we don't want to eat the whitespace before | ||||
| 2131 | # a multiple-bracket end code. | ||||
| 2132 | # NOTE: we may still have problems with e.g. S<< >> | ||||
| 2133 | (?: | ||||
| 2134 | \s(?!\s*>{2,}) | ||||
| 2135 | ) | ||||
| 2136 | )+ | ||||
| 2137 | ) | ||||
| 2138 | ) | ||||
| 2139 | /xgo | ||||
| 2140 | ) { | ||||
| 2141 | DEBUG > 4 and print STDERR "\nParagraphic tokenstack = (@stack)\n"; | ||||
| 2142 | if(defined $1) { | ||||
| 2143 | my $bracket_count; # How many '<<<' in a row this has. Needed for | ||||
| 2144 | # Pod::Simple::JustPod | ||||
| 2145 | if(defined $2) { | ||||
| 2146 | DEBUG > 3 and print STDERR "Found complex start-text code \"$1\"\n"; | ||||
| 2147 | $bracket_count = length($2) + 1; | ||||
| 2148 | push @stack, $bracket_count; # length of the necessary complex | ||||
| 2149 | # end-code string | ||||
| 2150 | } else { | ||||
| 2151 | DEBUG > 3 and print STDERR "Found simple start-text code \"$1\"\n"; | ||||
| 2152 | push @stack, 0; # signal that we're looking for simple | ||||
| 2153 | $bracket_count = 1; | ||||
| 2154 | } | ||||
| 2155 | my $code = substr($1,0,1); | ||||
| 2156 | if ('L' eq $code) { | ||||
| 2157 | if ($inL) { | ||||
| 2158 | $raw .= $1; | ||||
| 2159 | $self->scream( $start_line, | ||||
| 2160 | 'Nested L<> are illegal. Pretending inner one is ' | ||||
| 2161 | . 'X<...> so can continue looking for other errors.'); | ||||
| 2162 | $code = "X"; | ||||
| 2163 | } | ||||
| 2164 | else { | ||||
| 2165 | $raw = ""; # reset raw content accumulator | ||||
| 2166 | $inL = @stack; | ||||
| 2167 | } | ||||
| 2168 | } else { | ||||
| 2169 | $raw .= $1 if $inL; | ||||
| 2170 | } | ||||
| 2171 | push @lineage, [ $code, {}, ]; # new node object | ||||
| 2172 | |||||
| 2173 | # Tell Pod::Simple::JustPod how many brackets there were, but to save | ||||
| 2174 | # space, not in the most usual case of there was just 1. It can be | ||||
| 2175 | # inferred by the absence of this element. Similarly, if there is more | ||||
| 2176 | # than one bracket, extract the white space between the final bracket | ||||
| 2177 | # and the real beginning of the interior. Save that if it isn't just a | ||||
| 2178 | # single space | ||||
| 2179 | if ($self->{'_output_is_for_JustPod'} && $bracket_count > 1) { | ||||
| 2180 | $lineage[-1][1]{'~bracket_count'} = $bracket_count; | ||||
| 2181 | my $lspacer = substr($1, 1 + $bracket_count); | ||||
| 2182 | $lineage[-1][1]{'~lspacer'} = $lspacer if $lspacer ne " "; | ||||
| 2183 | } | ||||
| 2184 | push @{ $lineage[-2] }, $lineage[-1]; | ||||
| 2185 | } elsif(defined $4) { | ||||
| 2186 | DEBUG > 3 and print STDERR "Found apparent complex end-text code \"$3$4\"\n"; | ||||
| 2187 | # This is where it gets messy... | ||||
| 2188 | if(! @stack) { | ||||
| 2189 | # We saw " >>>>" but needed nothing. This is ALL just stuff then. | ||||
| 2190 | DEBUG > 4 and print STDERR " But it's really just stuff.\n"; | ||||
| 2191 | push @{ $lineage[-1] }, $3, $4; | ||||
| 2192 | next; | ||||
| 2193 | } elsif(!$stack[-1]) { | ||||
| 2194 | # We saw " >>>>" but needed only ">". Back pos up. | ||||
| 2195 | DEBUG > 4 and print STDERR " And that's more than we needed to close simple.\n"; | ||||
| 2196 | push @{ $lineage[-1] }, $3; # That was a for-real space, too. | ||||
| 2197 | pos($para) = pos($para) - length($4) + 1; | ||||
| 2198 | } elsif($stack[-1] == length($4)) { | ||||
| 2199 | # We found " >>>>", and it was exactly what we needed. Commonest case. | ||||
| 2200 | DEBUG > 4 and print STDERR " And that's exactly what we needed to close complex.\n"; | ||||
| 2201 | } elsif($stack[-1] < length($4)) { | ||||
| 2202 | # We saw " >>>>" but needed only " >>". Back pos up. | ||||
| 2203 | DEBUG > 4 and print STDERR " And that's more than we needed to close complex.\n"; | ||||
| 2204 | pos($para) = pos($para) - length($4) + $stack[-1]; | ||||
| 2205 | } else { | ||||
| 2206 | # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! | ||||
| 2207 | DEBUG > 4 and print STDERR " But it's really just stuff, because we needed more.\n"; | ||||
| 2208 | push @{ $lineage[-1] }, $3, $4; | ||||
| 2209 | next; | ||||
| 2210 | } | ||||
| 2211 | #print STDERR "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; | ||||
| 2212 | |||||
| 2213 | if ($3 ne " " && $self->{'_output_is_for_JustPod'}) { | ||||
| 2214 | if ($3 ne "") { | ||||
| 2215 | $lineage[-1][1]{'~rspacer'} = $3; | ||||
| 2216 | } | ||||
| 2217 | elsif ($lineage[-1][1]{'~lspacer'} eq " ") { | ||||
| 2218 | |||||
| 2219 | # Here we had something like C<< >> which was a false positive | ||||
| 2220 | delete $lineage[-1][1]{'~lspacer'}; | ||||
| 2221 | } | ||||
| 2222 | else { | ||||
| 2223 | $lineage[-1][1]{'~rspacer'} | ||||
| 2224 | = substr($lineage[-1][1]{'~lspacer'}, -1, 1); | ||||
| 2225 | chop $lineage[-1][1]{'~lspacer'}; | ||||
| 2226 | } | ||||
| 2227 | } | ||||
| 2228 | |||||
| 2229 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; | ||||
| 2230 | # Keep the element from being childless | ||||
| 2231 | |||||
| 2232 | if ($inL == @stack) { | ||||
| 2233 | $lineage[-1][1]{'raw'} = $raw; | ||||
| 2234 | $inL = 0; | ||||
| 2235 | } | ||||
| 2236 | |||||
| 2237 | pop @stack; | ||||
| 2238 | pop @lineage; | ||||
| 2239 | |||||
| 2240 | $raw .= $3.$4 if $inL; | ||||
| 2241 | |||||
| 2242 | } elsif(defined $5) { | ||||
| 2243 | DEBUG > 3 and print STDERR "Found apparent simple end-text code \"$5\"\n"; | ||||
| 2244 | |||||
| 2245 | if(@stack and ! $stack[-1]) { | ||||
| 2246 | # We're indeed expecting a simple end-code | ||||
| 2247 | DEBUG > 4 and print STDERR " It's indeed an end-code.\n"; | ||||
| 2248 | |||||
| 2249 | if(length($5) == 2) { # There was a space there: " >" | ||||
| 2250 | push @{ $lineage[-1] }, ' '; | ||||
| 2251 | } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element | ||||
| 2252 | push @{ $lineage[-1] }, ''; # keep it from being really childless | ||||
| 2253 | } | ||||
| 2254 | |||||
| 2255 | if ($inL == @stack) { | ||||
| 2256 | $lineage[-1][1]{'raw'} = $raw; | ||||
| 2257 | $inL = 0; | ||||
| 2258 | } | ||||
| 2259 | |||||
| 2260 | pop @stack; | ||||
| 2261 | pop @lineage; | ||||
| 2262 | } else { | ||||
| 2263 | DEBUG > 4 and print STDERR " It's just stuff.\n"; | ||||
| 2264 | push @{ $lineage[-1] }, $5; | ||||
| 2265 | } | ||||
| 2266 | |||||
| 2267 | $raw .= $5 if $inL; | ||||
| 2268 | |||||
| 2269 | } elsif(defined $6) { | ||||
| 2270 | DEBUG > 3 and print STDERR "Found stuff \"$6\"\n"; | ||||
| 2271 | push @{ $lineage[-1] }, $6; | ||||
| 2272 | $raw .= $6 if $inL; | ||||
| 2273 | # XXX does not capture multiplace whitespaces -- 'raw' ends up with | ||||
| 2274 | # at most 1 leading/trailing whitespace, why not all of it? | ||||
| 2275 | # Answer, because we deliberately trimmed it above | ||||
| 2276 | |||||
| 2277 | } else { | ||||
| 2278 | # should never ever ever ever happen | ||||
| 2279 | DEBUG and print STDERR "AYYAYAAAAA at line ", __LINE__, "\n"; | ||||
| 2280 | die "SPORK 512512!"; | ||||
| 2281 | } | ||||
| 2282 | } | ||||
| 2283 | |||||
| 2284 | if(@stack) { # Uhoh, some sequences weren't closed. | ||||
| 2285 | my $x= "..."; | ||||
| 2286 | while(@stack) { | ||||
| 2287 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; | ||||
| 2288 | # Hmmmmm! | ||||
| 2289 | |||||
| 2290 | my $code = (pop @lineage)->[0]; | ||||
| 2291 | my $ender_length = pop @stack; | ||||
| 2292 | if($ender_length) { | ||||
| 2293 | --$ender_length; | ||||
| 2294 | $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); | ||||
| 2295 | } else { | ||||
| 2296 | $x = $code . "<$x>"; | ||||
| 2297 | } | ||||
| 2298 | } | ||||
| 2299 | DEBUG > 1 and print STDERR "Unterminated $x sequence\n"; | ||||
| 2300 | $self->whine($start_line, | ||||
| 2301 | "Unterminated $x sequence", | ||||
| 2302 | ); | ||||
| 2303 | } | ||||
| 2304 | |||||
| 2305 | return $treelet; | ||||
| 2306 | } | ||||
| 2307 | |||||
| 2308 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 2309 | |||||
| 2310 | sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) | ||||
| 2311 | return stringify_lol($_[1]); | ||||
| 2312 | } | ||||
| 2313 | |||||
| 2314 | sub stringify_lol { # function: stringify_lol($lol) | ||||
| 2315 | my $string_form = ''; | ||||
| 2316 | _stringify_lol( $_[0] => \$string_form ); | ||||
| 2317 | return $string_form; | ||||
| 2318 | } | ||||
| 2319 | |||||
| 2320 | sub _stringify_lol { # the real recursor | ||||
| 2321 | my($lol, $to) = @_; | ||||
| 2322 | for(my $i = 2; $i < @$lol; ++$i) { | ||||
| 2323 | if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { | ||||
| 2324 | _stringify_lol( $lol->[$i], $to); # recurse! | ||||
| 2325 | } else { | ||||
| 2326 | $$to .= $lol->[$i]; | ||||
| 2327 | } | ||||
| 2328 | } | ||||
| 2329 | return; | ||||
| 2330 | } | ||||
| 2331 | |||||
| 2332 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 2333 | |||||
| 2334 | sub _dump_curr_open { # return a string representation of the stack | ||||
| 2335 | my $curr_open = $_[0]{'curr_open'}; | ||||
| 2336 | |||||
| 2337 | return '[empty]' unless @$curr_open; | ||||
| 2338 | return join '; ', | ||||
| 2339 | map {; | ||||
| 2340 | ($_->[0] eq '=for') | ||||
| 2341 | ? ( ($_->[1]{'~really'} || '=over') | ||||
| 2342 | . ' ' . $_->[1]{'target'}) | ||||
| 2343 | : $_->[0] | ||||
| 2344 | } | ||||
| 2345 | @$curr_open | ||||
| 2346 | ; | ||||
| 2347 | } | ||||
| 2348 | |||||
| 2349 | ########################################################################### | ||||
| 2350 | 1 | 5µs | my %pretty_form = ( | ||
| 2351 | "\a" => '\a', # ding! | ||||
| 2352 | "\b" => '\b', # BS | ||||
| 2353 | "\e" => '\e', # ESC | ||||
| 2354 | "\f" => '\f', # FF | ||||
| 2355 | "\t" => '\t', # tab | ||||
| 2356 | "\cm" => '\cm', | ||||
| 2357 | "\cj" => '\cj', | ||||
| 2358 | "\n" => '\n', # probably overrides one of either \cm or \cj | ||||
| 2359 | '"' => '\"', | ||||
| 2360 | '\\' => '\\\\', | ||||
| 2361 | '$' => '\\$', | ||||
| 2362 | '@' => '\\@', | ||||
| 2363 | '%' => '\\%', | ||||
| 2364 | '#' => '\\#', | ||||
| 2365 | ); | ||||
| 2366 | |||||
| 2367 | sub pretty { # adopted from Class::Classless | ||||
| 2368 | # Not the most brilliant routine, but passable. | ||||
| 2369 | # Don't give it a cyclic data structure! | ||||
| 2370 | my @stuff = @_; # copy | ||||
| 2371 | my $x; | ||||
| 2372 | my $out = | ||||
| 2373 | # join ",\n" . | ||||
| 2374 | join ", ", | ||||
| 2375 | map {; | ||||
| 2376 | if(!defined($_)) { | ||||
| 2377 | "undef"; | ||||
| 2378 | } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { | ||||
| 2379 | $x = "[ " . pretty(@$_) . " ]" ; | ||||
| 2380 | $x; | ||||
| 2381 | } elsif(ref($_) eq 'SCALAR') { | ||||
| 2382 | $x = "\\" . pretty($$_) ; | ||||
| 2383 | $x; | ||||
| 2384 | } elsif(ref($_) eq 'HASH') { | ||||
| 2385 | my $hr = $_; | ||||
| 2386 | $x = "{" . join(", ", | ||||
| 2387 | map(pretty($_) . '=>' . pretty($hr->{$_}), | ||||
| 2388 | sort keys %$hr ) ) . "}" ; | ||||
| 2389 | $x; | ||||
| 2390 | } elsif(!length($_)) { q{''} # empty string | ||||
| 2391 | } elsif( | ||||
| 2392 | $_ eq '0' # very common case | ||||
| 2393 | or( | ||||
| 2394 | m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s | ||||
| 2395 | and $_ ne '-0' # the strange case that RE lets thru | ||||
| 2396 | ) | ||||
| 2397 | ) { $_; | ||||
| 2398 | } else { | ||||
| 2399 | # Yes, explicitly name every character desired. There are shorcuts one | ||||
| 2400 | # could make, but I (Karl Williamson) was afraid that some Perl | ||||
| 2401 | # releases would have bugs in some of them. For example [A-Z] works | ||||
| 2402 | # even on EBCDIC platforms to match exactly the 26 uppercase English | ||||
| 2403 | # letters, but I don't know if it has always worked without bugs. It | ||||
| 2404 | # seemed safest just to list the characters. | ||||
| 2405 | # s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> | ||||
| 2406 | s<([^ !"#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])> | ||||
| 2407 | <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; | ||||
| 2408 | #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; | ||||
| 2409 | qq{"$_"}; | ||||
| 2410 | } | ||||
| 2411 | } @stuff; | ||||
| 2412 | # $out =~ s/\n */ /g if length($out) < 75; | ||||
| 2413 | return $out; | ||||
| 2414 | } | ||||
| 2415 | |||||
| 2416 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 2417 | |||||
| 2418 | # A rather unsubtle method of blowing away all the state information | ||||
| 2419 | # from a parser object so it can be reused. Provided as a utility for | ||||
| 2420 | # backward compatibility in Pod::Man, etc. but not recommended for | ||||
| 2421 | # general use. | ||||
| 2422 | |||||
| 2423 | sub reinit { | ||||
| 2424 | my $self = shift; | ||||
| 2425 | foreach (qw(source_dead source_filename doc_has_started | ||||
| 2426 | start_of_pod_block content_seen last_was_blank paras curr_open | ||||
| 2427 | line_count pod_para_count in_pod ~tried_gen_errata all_errata errata errors_seen | ||||
| 2428 | Title)) { | ||||
| 2429 | |||||
| 2430 | delete $self->{$_}; | ||||
| 2431 | } | ||||
| 2432 | } | ||||
| 2433 | |||||
| 2434 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 2435 | 1 | 12µs | 1; | ||
| 2436 | |||||
# spent 66µs within Pod::Simple::BlackBox::CORE:match which was called 9 times, avg 7µs/call:
# once (20µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 37)[Pod/Simple/BlackBox.pm:44]
# once (10µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 32)[Pod/Simple/BlackBox.pm:44]
# once (10µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 34)[Pod/Simple/BlackBox.pm:44]
# once (7µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 41)[Pod/Simple/BlackBox.pm:44]
# once (7µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 39)[Pod/Simple/BlackBox.pm:44]
# once (6µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 43)[Pod/Simple/BlackBox.pm:44]
# once (5µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 30)[Pod/Simple/BlackBox.pm:44]
# once (1µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 28)[Pod/Simple/BlackBox.pm:44]
# once (1µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 45)[Pod/Simple/BlackBox.pm:44] | |||||
# spent 8µs within Pod::Simple::BlackBox::CORE:qr which was called 10 times, avg 840ns/call:
# once (2µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 44)[Pod/Simple/BlackBox.pm:40]
# once (1µs+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 27)[Pod/Simple/BlackBox.pm:40]
# once (700ns+0s) by Pod::Simple::LinkSection::BEGIN@7 at line 2 of (eval 35)[Pod/Simple/BlackBox.pm:74]
# once (700ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 31)[Pod/Simple/BlackBox.pm:40]
# once (600ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 42)[Pod/Simple/BlackBox.pm:40]
# once (600ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 36)[Pod/Simple/BlackBox.pm:40]
# once (600ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 29)[Pod/Simple/BlackBox.pm:40]
# once (600ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 40)[Pod/Simple/BlackBox.pm:40]
# once (600ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 38)[Pod/Simple/BlackBox.pm:40]
# once (600ns+0s) by Pod::Simple::BlackBox::my_qr at line 1 of (eval 33)[Pod/Simple/BlackBox.pm:40] |