blob: de00e4d0fa1f8224ed33a305e1b5f3eda036def2 [file] [log] [blame]
package Text::ParseWords;
use strict;
require 5.006;
our $VERSION = "3.30";
use Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(shellwords quotewords nested_quotewords parse_line);
our @EXPORT_OK = qw(old_shellwords);
our $PERL_SINGLE_QUOTE;
sub shellwords {
my (@lines) = @_;
my @allwords;
foreach my $line (@lines) {
$line =~ s/^\s+//;
my @words = parse_line('\s+', 0, $line);
pop @words if (@words and !defined $words[-1]);
return() unless (@words || !length($line));
push(@allwords, @words);
}
return(@allwords);
}
sub quotewords {
my($delim, $keep, @lines) = @_;
my($line, @words, @allwords);
foreach $line (@lines) {
@words = parse_line($delim, $keep, $line);
return() unless (@words || !length($line));
push(@allwords, @words);
}
return(@allwords);
}
sub nested_quotewords {
my($delim, $keep, @lines) = @_;
my($i, @allwords);
for ($i = 0; $i < @lines; $i++) {
@{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
return() unless (@{$allwords[$i]} || !length($lines[$i]));
}
return(@allwords);
}
sub parse_line {
my($delimiter, $keep, $line) = @_;
my($word, @pieces);
no warnings 'uninitialized'; # we will be testing undef strings
while (length($line)) {
# This pattern is optimised to be stack conservative on older perls.
# Do not refactor without being careful and testing it on very long strings.
# See Perl bug #42980 for an example of a stack busting input.
$line =~ s/^
(?:
# double quoted string
(") # $quote
((?>[^\\"]*(?:\\.[^\\"]*)*))" # $quoted
| # --OR--
# singe quoted string
(') # $quote
((?>[^\\']*(?:\\.[^\\']*)*))' # $quoted
| # --OR--
# unquoted string
( # $unquoted
(?:\\.|[^\\"'])*?
)
# followed by
( # $delim
\Z(?!\n) # EOL
| # --OR--
(?-x:$delimiter) # delimiter
| # --OR--
(?!^)(?=["']) # a quote
)
)//xs or return; # extended layout
my ($quote, $quoted, $unquoted, $delim) = (($1 ? ($1,$2) : ($3,$4)), $5, $6);
return() unless( defined($quote) || length($unquoted) || length($delim));
if ($keep) {
$quoted = "$quote$quoted$quote";
}
else {
$unquoted =~ s/\\(.)/$1/sg;
if (defined $quote) {
$quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
$quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
}
}
$word .= substr($line, 0, 0); # leave results tainted
$word .= defined $quote ? $quoted : $unquoted;
if (length($delim)) {
push(@pieces, $word);
push(@pieces, $delim) if ($keep eq 'delimiters');
undef $word;
}
if (!length($line)) {
push(@pieces, $word);
}
}
return(@pieces);
}
sub old_shellwords {
# Usage:
# use ParseWords;
# @words = old_shellwords($line);
# or
# @words = old_shellwords(@lines);
# or
# @words = old_shellwords(); # defaults to $_ (and clobbers it)
no warnings 'uninitialized'; # we will be testing undef strings
local *_ = \join('', @_) if @_;
my (@words, $snippet);
s/\A\s+//;
while ($_ ne '') {
my $field = substr($_, 0, 0); # leave results tainted
for (;;) {
if (s/\A"(([^"\\]|\\.)*)"//s) {
($snippet = $1) =~ s#\\(.)#$1#sg;
}
elsif (/\A"/) {
require Carp;
Carp::carp("Unmatched double quote: $_");
return();
}
elsif (s/\A'(([^'\\]|\\.)*)'//s) {
($snippet = $1) =~ s#\\(.)#$1#sg;
}
elsif (/\A'/) {
require Carp;
Carp::carp("Unmatched single quote: $_");
return();
}
elsif (s/\A\\(.?)//s) {
$snippet = $1;
}
elsif (s/\A([^\s\\'"]+)//) {
$snippet = $1;
}
else {
s/\A\s+//;
last;
}
$field .= $snippet;
}
push(@words, $field);
}
return @words;
}
1;
__END__