#!/usr/bin/perl -I/home/phil/perl/cpan/DataTableText/lib/ #------------------------------------------------------------------------------- # Create a parse tree from an array of terms representing an expression. # Philip R Brenan at appaapps dot com, Appa Apps Ltd Inc., 2021 #------------------------------------------------------------------------------- package Tree::Term; use v5.26; our $VERSION = 20210630; # Version use warnings FATAL => qw(all); use strict; use Carp qw(confess cluck); use Data::Dump qw(dump ddx pp); use Data::Table::Text qw(:all); use feature qw(say state current_sub); #D1 Parse # Create a parse tree from an array of terms representing an expression. sub new($@) #P New term. {my ($operator, @operands) = @_; # Operator, operands. my $t = genHash(__PACKAGE__, # Description of a term in the expression. operands => @operands ? [@operands] : undef, # Operands to which the operator will be applied. operator => $operator, # Operator to be applied to one or more operands. up => undef, # Parent term if this is a sub term. ); $_->up = $t for grep {ref $_} @operands; # Link to parent if possible $t } sub parse(@) # Parse an expression. {my (@expression) = @_; # Expression to parse my @s; # Stack my $codes = genHash(q(Tree::Term::Codes), # Lexical item codes. a => 'assign', # Infix operator with priority 2 binding right to left typically used in an assignment. b => 'open', # Open parenthesis. B => 'close', # Close parenthesis. d => 'dyad', # Infix operator with priority 3 binding left to right typically used in arithmetic. p => 'prefix', # Monadic prefix operator. q => 'suffix', # Monadic suffix operator. s => 'semi-colon', # Infix operator with priority 1 binding left to right typically used to separate statements. t => 'term', # A term in the expression. v => 'variable', # A variable in the expression. ); my sub term() # Convert the longest possible expression on top of the stack into a term {my $n = scalar(@s); # lll "TTTT $n \n", dump([@s]); my sub test($$) # Check the type of an item in the stack {my ($item, $type) = @_; # Item to test, expected type of item return index($type, 't') > -1 if ref $item; # Term index($type, substr($item, 0, 1)) > -1 # Something other than a term defines its type by its first letter }; if (@s >= 3) # Go for term infix-operator term {my ($r, $d, $l) = reverse @s; if (test($l, 't') and test($r, 't') and test($d, 'ads')) # Parse out infix operator expression {pop @s for 1..3; push @s, new $d, $l, $r; return 1; } if (test($l, 'b') and test($r, 'B') and test($d, 't')) # Parse bracketed term {pop @s for 1..3; push @s, $d; return 1; } } if (@s >= 2) # Convert ( ) to an empty term {my ($r, $l) = reverse @s; if (test($l, 'b') and test($r, 'B')) # Empty pair of brackets {pop @s for 1..2; push @s, new 'empty1'; return 1; } if (test($l, 'p') and test($r, 't')) # Prefix operator applied to a term {pop @s for 1..2; push @s, new $l, $r; return 1; } if (test($r,'q') and test($l, 't')) # Post fix operator applied to a term {pop @s for 1..2; push @s, new $r, $l; return 1; } if (test($l,'s') and test($r, 'B')) # Semi-colon, close implies remove unneeded semi {pop @s for 1..2; push @s, $r; return 1; } } if (@s >= 1) # Convert variable to term {my ($t) = reverse @s; if (test($t, 'v')) # Single variable {pop @s for 1; push @s, new $t; return 1; } } if (@s == 1) # Convert leading semi to empty, semi {my ($t) = @s; if (test($t,'s')) # Semi {@s = (new('empty4'), $t); return 1; } } undef # No move made }; for my $i(keys @expression) # Each input element {my $e = $expression[$i]; # lll "AAAA $i $e\n", dump([@s]); if (!@s) # Empty stack {confess "Expression must start with a variable or open or a prefix operator or a semi" if !ref($e) and $e !~ m(\A(b|p|s|v)); push @s, $e; term; next; } my $s = $s[-1]; # Stack has data my sub type() # Type of the current stack top {return 't' if ref $s; # Term on top of stack substr($s, 0, 1); # Something other than a term defines its type by its first letter }; my sub check($) # Check that the top of the stack has one of the specified elements {my ($types) = @_; # Possible types to match return 1 if index($types, type) > -1; # Check type allowed my @c; for my $c(split //, $types) # Translate lexical codes into types {push @c, $$codes{$c}; } my $c = join ', ', sort @c; confess qq(Expected $e to follow one of $c at $i but not: $s\n); }; my sub test($) # Check that the second item on the stack contains one of the expected items {my ($types) = @_; # Possible types to match return undef unless @s >= 2; # Stack not deep enough so cannot contain any of the specified types return 1 if index($types, ref($s[-2]) ? 't' : substr($s[-2], 0, 1)) > -1; undef }; if ($e =~ m(a)) # Assign {check("Bqtv"); push @s, $e; next; } if ($e =~ m(b)) # Open {check("abds"); push @s, $e; next; } if ($e =~ m(B)) # Closing parenthesis {check("abqstv"); 1 while term; push @s, $e; 1 while term; check("bst"); } if ($e =~ m(d)) # Infix but not assign or semi-colon {check("Bqtv"); push @s, $e; next; } if ($e =~ m(p)) # Prefix {check("abdps"); push @s, $e; next; } if ($e =~ m(q)) # Suffix {check("Bqtv"); push @s, $e; term; next; } if ($e =~ m(s)) # Semi colon {check("bBqstv"); push @s, new 'empty5' if $s =~ m(\A(s|b)); # Insert an empty element between two consecutive semicolons 1 while term; push @s, $e; next; } if ($e =~ m(v)) # Variable {check("abdps"); push @s, $e; term; 1 while test("p") and term; next; } } pop @s while @s > 1 and $s[-1] =~ m(s); # Remove any trailing semi colons 1 while term; # Final reductions # lll "EEEE\n", dump([@s]); @s == 1 or confess "Incomplete expression"; $s[0] # The resulting parse tree } # parse #D1 Print # Print a parse tree to make it easy to visualize its structure. sub depth($) #P Depth of a term in an expression. {my ($term) = @_; # Term my $d = 0; for(my $t = $term; $t; $t = $t->up) {++$d} $d } sub listTerms($) #P List the terms in an expression in post order {my ($expression) = @_; # Root term my @t; # Terms sub # Recurse through terms {my ($e) = @_; # Term my $o = $e->operands; return unless $e; # Operator if (my @o = $o ? grep {ref $_} @$o : ()) # Operands {my ($p, @p) = @o; __SUB__->($p); # First operand push @t, $e; # Operator __SUB__->($_) for @p; # Second and subsequent operands } else # No operands {push @t, $e; # Operator } } ->($expression); @t } sub flat($@) # Print the terms in the expression as a tree from left right to make it easier to visualize the structure of the tree. {my ($expression, @title) = @_; # Root term, optional title my @t = $expression->listTerms; # Terms in expression in post order my @s; # Print my sub align # Align the ends of the lines {my $L = 0; # Longest line for my $s(@s) {my $l = length $s; $L = $l if $l > $L; } for my $i(keys @s) # Pad to longest {my $s = $s[$i] =~ s/\s+\Z//rs; my $l = length($s); if ($l < $L) {my $p = ' ' x ($L - $l); $s[$i] = $s . $p; } } }; for my $t(@t) # Initialize output rectangle {$s[$_] //= '' for 0..$t->depth; } for my $t(@t) # Traverse tree {my $d = $t->depth; my $p = $t->operator; # Operator align if $p =~ m(\A(a|d|s)); # Shift over for some components $s[$d] .= " $p"; # Describe operator or operand align unless $p =~ m(\A(p|q|v)); # Vertical for some components } shift @s while @s and $s[ 0] =~ m(\A\s*\Z)s; # Remove leading blank lines for my $i(keys @s) # Clean up trailing blanks so that tests are not affected by spurious white space mismatches {$s[$i] =~ s/\s+\n/\n/gs; $s[$i] =~ s/\s+\Z//gs; } unshift @s, join(' ', @title) if @title; # Add title join "\n", @s, ''; } #D #------------------------------------------------------------------------------- # Export - eeee #------------------------------------------------------------------------------- use Exporter qw(import); use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); @ISA = qw(Exporter); @EXPORT = qw(); @EXPORT_OK = qw( ); %EXPORT_TAGS = (all=>[@EXPORT, @EXPORT_OK]); # podDocumentation =pod =encoding utf-8 =head1 Name Tree::Term - Create a parse tree from an array of terms representing an expression. =head1 Synopsis The expression to L<parse> is presented as an array of words, the first letter of each word indicates its lexical role as in: my @e = qw(b b p2 p1 v1 q1 q2 B d3 b p4 p3 v2 q3 q4 d4 p6 p5 v3 q5 q6 B s B s); Where: a assign - infix operator with priority 2 binding right to left b open - open parenthesis B close - close parenthesis d dyad - infix operator with priority 3 binding left to right p prefix - monadic prefix operator q suffix - monadic suffix operator s semi-colon - infix operator with priority 1 binding left to right v variable - a variable in the expression The results of parsing the expression can be printed with L<flat> which provides a left to right representation of the parse tree. is_deeply parse(@e)->flat, <<END; d3 q2 d4 q1 q4 q6 p2 q3 q5 p1 p4 p6 v1 p3 p5 v2 v3 END =head1 Description Create a parse tree from an array of terms representing an expression. Version 20210630. The following sections describe the methods in each functional area of this module. For an alphabetic listing of all methods by name see L<Index|/Index>. =head1 Parse Create a parse tree from an array of terms representing an expression. =head2 parse(@expression) Parse an expression. Parameter Description 1 @expression Expression to parse B<Example:> ok T [qw(b b p2 p1 v1 q1 q2 B d3 b p4 p3 v2 q3 q4 d4 p6 p5 v3 q5 q6 B s B s)], <<END; d3 q2 d4 q1 q4 q6 p2 q3 q5 p1 p4 p6 v1 p3 p5 v2 v3 END =head1 Print Print a parse tree to make it easy to visualize its structure. =head2 flat($expression, @title) Print the terms in the expression as a tree from left right to make it easier to visualize the structure of the tree. Parameter Description 1 $expression Root term 2 @title Optional title B<Example:> ok T [qw(p2 p1 v1 q1 q2 d3 p4 p3 v2 q3 q4 d4 p6 p5 v3 q5 q6 s)], <<END; d3 q2 d4 q1 q4 q6 p2 q3 q5 p1 p4 p6 v1 p3 p5 v2 v3 END =head1 Hash Definitions =head2 Tree::Term Definition Description of a term in the expression. =head3 Output fields =head4 operands Operands to which the operator will be applied. =head4 operator Operator to be applied to one or more operands. =head4 up Parent term if this is a sub term. =head2 Tree::Term::Codes Definition Lexical item codes. =head3 Output fields =head4 B Close parenthesis. =head4 a Infix operator with priority 2 binding right to left typically used in an assignment. =head4 b Open parenthesis. =head4 d Infix operator with priority 3 binding left to right typically used in arithmetic. =head4 p Monadic prefix operator. =head4 q Monadic suffix operator. =head4 s Infix operator with priority 1 binding left to right typically used to separate statements. =head4 t A term in the expression. =head4 v A variable in the expression. =head1 Private Methods =head2 new($operator, @operands) New term. Parameter Description 1 $operator Operator 2 @operands Operands. =head2 depth($term) Depth of a term in an expression. Parameter Description 1 $term Term =head2 listTerms($expression) List the terms in an expression in post order Parameter Description 1 $expression Root term =head1 Index 1 L<depth|/depth> - Depth of a term in an expression. 2 L<flat|/flat> - Print the terms in the expression as a tree from left right to make it easier to visualize the structure of the tree. 3 L<listTerms|/listTerms> - List the terms in an expression in post order 4 L<new|/new> - New term. 5 L<parse|/parse> - Parse an expression. =head1 Installation This module is written in 100% Pure Perl and, thus, it is easy to read, comprehend, use, modify and install via B<cpan>: sudo cpan install Tree::Term =head1 Author L<philiprbrenan@gmail.com|mailto:philiprbrenan@gmail.com> L<http://www.appaapps.com|http://www.appaapps.com> =head1 Copyright Copyright (c) 2016-2021 Philip R Brenan. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself. =cut # Tests and documentation sub test {my $p = __PACKAGE__; binmode($_, ":utf8") for *STDOUT, *STDERR; return if eval "eof(${p}::DATA)"; my $s = eval "join('', <${p}::DATA>)"; $@ and die $@; eval $s; $@ and die $@; 1 } test unless caller; 1; # podDocumentation __DATA__ use Time::HiRes qw(time); use Test::More; my $develop = -e q(/home/phil/); # Developing my $log = q(/home/phil/perl/cpan/TreeTerm/lib/Tree/zzz.txt); # Log file my $localTest = ((caller(1))[0]//'Tree::Term') eq "Tree::Term"; # Local testing mode Test::More->builder->output("/dev/null") if $localTest; # Reduce number of confirmation messages during testing if ($^O =~ m(bsd|linux)i) # Supported systems {plan tests => 23; } else {plan skip_all =>qq(Not supported on: $^O); } sub T #P Test a parse {my ($expression, $expected) = @_; # Expression, expected result my $g = parse(@$expression)->flat; my $r = $g eq $expected; owf($log, $g) if -e $log; # Save result if testing confess "Failed test" unless $r; $r } eval {goto latest}; ok T [qw(v1)], <<END; v1 END ok T [qw(s)], <<END; empty4 END ok T [qw(s s)], <<END; s empty4 empty5 END ok T [qw(v1 d2 v3)], <<END; d2 v1 v3 END ok T [qw(v1 a2 v3)], <<END; a2 v1 v3 END ok T [qw(v1 a2 v3 d4 v5)], <<END; a2 v1 d4 v3 v5 END ok T [qw(v1 a2 v3 d4 v5 s6 v8 a9 v10)], <<END; s6 a2 a9 v1 d4 v8 v10 v3 v5 END ok T [qw(v1 a2 v3 s s s v4 a5 v6 s s)], <<END; s s empty5 s a5 s empty5 v4 v6 a2 empty5 v1 v3 END ok T [qw(b B)], <<END; empty1 END ok T [qw(b b B B)], <<END; empty1 END ok T [qw(b b v1 B B)], <<END; v1 END ok T [qw(b b v1 a2 v3 B B)], <<END; a2 v1 v3 END ok T [qw(b b v1 a2 v3 d4 v5 B B)], <<END; a2 v1 d4 v3 v5 END ok T [qw(p1 v1)], <<END; p1 v1 END ok T [qw(p2 p1 v1)], <<END; p2 p1 v1 END ok T [qw(v1 q1)], <<END; q1 v1 END ok T [qw(v1 q1 q2)], <<END; q2 q1 v1 END ok T [qw(p2 p1 v1 q1 q2)], <<END; q2 q1 p2 p1 v1 END ok T [qw(p2 p1 v1 q1 q2 d3 p4 p3 v2 q3 q4)], <<END; d3 q2 q4 q1 q3 p2 p4 p1 p3 v1 v2 END ok T [qw(p2 p1 v1 q1 q2 d3 p4 p3 v2 q3 q4 d4 p6 p5 v3 q5 q6 s)], <<END; #Tflat d3 q2 d4 q1 q4 q6 p2 q3 q5 p1 p4 p6 v1 p3 p5 v2 v3 END ok T [qw(b s B)], <<END; empty5 END ok T [qw(b s s B)], <<END; s empty5 empty5 END ok T [qw(b b p2 p1 v1 q1 q2 B d3 b p4 p3 v2 q3 q4 d4 p6 p5 v3 q5 q6 B s B s)], <<END; #Tparse d3 q2 d4 q1 q4 q6 p2 q3 q5 p1 p4 p6 v1 p3 p5 v2 v3 END