#!usr/bin/perl ###################################################################################### # # # Author: Clint Cuffy # # Date: 08/20/2016 # # Revised: 01/31/2017 # # UMLS Similarity - Binary Search Tree # # # ###################################################################################### # # # Description: # # ============ # # # # Binary Search Tree Module For Use With UMLS Similarity Word2Vec # # Package # # Features: # # ========= # # Speeds Up xmlTow2v module's "compoundify" option using BST for # # Compound Words # # # ###################################################################################### package Word2vec::Bst; use strict; use warnings; # Word2Vec Utility Package(s) use Word2vec::Node; use vars qw($VERSION); $VERSION = '0.02'; ###################################################################################### # Constructor ###################################################################################### BEGIN { # CONSTRUCTOR : DO SOMETHING HERE } ###################################################################################### # Deconstructor ###################################################################################### END { # DECONSTRUCTOR : DO SOMETHING HERE my ( $self ) = @_; $self->CleanUp( $self->{ _rootNode } ) if defined( $self->{ _rootNode } ); } ###################################################################################### # new Class Operator ###################################################################################### sub new { my $class = shift; my $self = { # Private Member Variables _debugLog => shift, # Boolean (Binary): 0 = False, 1 = True _writeLog => shift, # Boolean (Binary): 0 = False, 1 = True _rootNode => shift, # Node struct }; # Set debug log variable to false if not defined $self->{ _debugLog } = 0 if !defined ( $self->{ _debugLog } ); $self->{ _writeLog } = 0 if !defined ( $self->{ _writeLog } ); $self->{ _rootNode } = Node->new() if !defined ( $self->{ _rootNode } ); bless $self, $class; } ###################################################################################### # DESTROY ###################################################################################### sub DESTROY { my ( $self ) = shift; $self->DeleteBSTNodes( $self->{ _rootNode } ) if defined( $self->{ _rootNode } ); undef( $self->{ _rootNode } ) if defined( $self->{ _rootNode } ); } ###################################################################################### # Module Functions ###################################################################################### sub CreateTree { my ( $self, $aryRef, $start, $end, $parentNode ) = @_; # Check(s) return if !defined ( $aryRef ); return if !defined ( $start ); return if !defined ( $end ); my $rootNode = $self->CreateBST( $aryRef, $start, $end, $parentNode ); $self->SetRootNode( $rootNode ); return 0 if defined( $rootNode ); return -1 if !defined( $rootNode ); } sub CreateBST { my ( $self, $aryRef, $start, $end, $parentNode ) = @_; # Check(s) return if !defined ( $aryRef ); return if !defined ( $start ); return if !defined ( $end ); # Create BST if( $start <= $end ) { my @array = @$aryRef; my $midIndex = int( $start + ( $end - $start ) / 2 ); my $currentNode = Node->new(); $currentNode->data( $array[ $midIndex ] ); # Split Array Into Smaller Arrays For Performance my @rightAry = splice( @array, $midIndex + 1, $end ); my @leftAry = splice( @array, $start, $midIndex ); my $leftAryEnd = @leftAry; my $rightAryEnd = @rightAry; # Set Parent, Left Child and Right Child Nodes Of Current Node $currentNode->parent( $parentNode ); $currentNode->leftChild( $self->CreateBST( \@leftAry, $start, $leftAryEnd -1, $currentNode ) ); $currentNode->rightChild( $self->CreateBST( \@rightAry, $start, $rightAryEnd - 1, $currentNode ) ); return $currentNode; } return undef; } sub BSTContainsSearch { my ( $self, $node, $searchWord ) = @_; # Check(s) return undef if !defined ( $searchWord ); # Search Binary Tree if( defined ( $node ) && defined( $node->data ) ) { my @searchWordAry = split( ' ', $searchWord ); my @nodeDataAry = split( ' ', $node->data ); my $searchStr = ""; my $nodeStr = ""; # Create "Node String" With Same Amount Of Words "Search Word" if( @searchWordAry < @nodeDataAry ) { $searchStr = $searchWord; @nodeDataAry = splice( @nodeDataAry, 0, @searchWordAry ); $nodeStr = join( ' ', @nodeDataAry ); } else { $searchStr = $searchWord; $nodeStr = $node->data; } if( index( $node->data, $searchWord ) != 0 || ( $searchStr cmp $nodeStr ) != 0 ) { if( ( $searchWord cmp $node->data ) == -1 ) { $node = $self->BSTContainsSearch( $node->leftChild, $searchWord ); } elsif( ( $searchWord cmp $node->data ) == 1 ) { $node = $self->BSTContainsSearch( $node->rightChild, $searchWord ); } } return $node; } return undef; } sub BSTExactSearch { my ( $self, $node, $searchWord ) = @_; # Check(s) return undef if !defined ( $searchWord ); # Search Binary Tree if( defined ( $node ) ) { if( defined( $node->data ) && $node->data ne $searchWord ) { if( ( $searchWord cmp $node->data ) == -1 ) { $node = $self->BSTExactSearch( $node->leftChild, $searchWord ); } elsif( ( $searchWord cmp $node->data ) == 1 ) { $node = $self->BSTExactSearch( $node->rightChild, $searchWord ); } } return $node; } return undef; } sub DeleteBSTNodes { my ( $self, $node ) = @_; return if( !defined ( $node ) ); # Get Left Child Node $self->DeleteBSTNodes( $node->leftChild ) if( defined( $node->leftChild ) ); # Get Right Child Node $self->DeleteBSTNodes( $node->rightChild ) if( defined( $node->rightChild ) ); # Delete Node $node->word( "" ); $node->data( "" ); undef( $node ); } ###################################################################################### # Accessors ###################################################################################### sub GetDebugLog { my ( $self ) = @_; $self->{ _debugLog } = 0 if !defined ( $self->{ _debugLog } ); return $self->{ _debugLog }; } sub GetWriteLog { my ( $self ) = @_; $self->{ _writeLog } = 0 if !defined ( $self->{ _writeLog } ); return $self->{ _writeLog }; } sub GetRootNode { my ( $self ) = @_; $self->{ _rootNode } = undef if !defined ( $self->{ _rootNode } ); return $self->{ _rootNode }; } ###################################################################################### # Mutators ###################################################################################### sub SetRootNode { my ( $self, $node ) = @_; return $self->{ _rootNode } = $node; } #################### All Modules Are To Output "1"(True) at EOF ###################### 1; =head1 NAME Word2vec::Bst - xmltow2v Basic Binary Search Tree Module =head1 SYNOPSIS use Word2vec::Bst; my @sortedArray = ( "Cookie", "Lungs", "Money", "Veterinarian", "Dog", "Urn", "Heart", "Coffee Grounds" ); @sortedArray = sort( @sortedArray ); my $sortedArrayRef = \@sortedArray; my $arySize = @sortedArray; my $bst = Word2vec::Bst->new(); my $rootNode = $bst->CreateBST( $sortedArrayRef, 0, $arySize, undef ); $bst->SetRootNode( $rootNode ); my $node1 = $bst->BSTExactSearch( $rootNode, "Coffee" ); my $node2 = $bst->BSTContainsSearch( $rootNode, "Coffee" ); print( "Exact Phrase Match Found - Search Word: \"Coffee\"\n" ) if defined( $node1 ); print( "Exact Phrase: \"Coffee\" Not Found\n" ) if !defined( $node1 ); print( "Phrase Containing Word: \"Coffee\" Found\n" ) if defined( $node2 ); print( "Phrase Containing Word: \"Coffee\" Not Found\n" ) if !defined( $node2 ); undef( @sortedArray ); $bst->DESTROY(); undef( $bst ); =head1 DESCRIPTION Word2vec::Bst is a basic binary search tree module for use with Word2vec::Xmltow2v. This module expects a sorted array passed as a function parameter (array reference) to create a balanced binary search tree. =head2 Main Functions =head3 new Description: Returns a new 'Word2vec::Bst' module object. Input: None Output: Word2vec::Bst object. Example: use Word2vec::Bst; my $bst = Word2vec::Bst->new(); print( "Word2vec::Bst object creation successful\n" ) if defined( $bst ); print( "Word2vec::Bst object creation un-successful\n" ) if !defined( $bst ); print( "Removing Tree From Memory\n" ) if defined( $bst ); $bst->DESTROY() if defined( $bst ); undef( $bst ); =head3 DESTROY Description: Removes binary search tree from memory. Input: None Output: None Example: See above example for "new" function. Note: Destroy function is also automatically called during global destruction when exiting the program. =head3 CreateTree Description: Creates binary search tree with required function parameters and sets member variable root node in 'Word2vec::Bst' object. Note: The array must be sorted before calling this method to create a balanced tree. Input: $arrayReference -> Reference to an array containing sorted string data. $startIndex -> Beginning index of sorted array which the function incorporates into the binary tree. $endIndex -> Last index of the sorted array which the function incorporates into the binary tree. $parentNode -> Parent node parameter of type 'Word2vec::Node'. Set to 'undef' during tree instantiation. Output: $value -> '0' if successful / '-1' if un-successful. Example: use Word2vec::Bst; my @sortedArray = ( "Cookie", "Lungs", "Money", "Veterinarian", "Dog", "Urn", "Heart", "Coffee Grounds" ); @sortedArray = sort( @sortedArray ); my $sortedArrayRef = \@sortedArray; my $arySize = @sortedArray; my $bst = Word2vec::Bst->new(); my $result = $bst->CreateTree( $sortedArrayRef, 0, $arySize, undef ); print( "Binary Search Tree Created Successfully\n" ) if $result == 0; print( "Binary Search Tree Creation Un-Successful\n" ) if $result == -1; $bst->DESTROY() if defined( $bst ); undef( $bst ); =head3 CreateBST Description: Creates binary search tree with required function parameters. Returns the root node. Note: The array must be sorted before calling this method to create a balanced tree. Input: $arrayReference -> Reference to an array containing sorted string data. $startIndex -> Beginning index of sorted array which the function incorporates into the binary tree. $endIndex -> Last index of the sorted array which the function incorporates into the binary tree. $parentNode -> Parent node parameter of type 'Word2vec::Node'. Set to 'undef' during tree instantiation. Output: Word2vec::Node -> Binary Search Tree root node or 'undef'. Example: use Word2vec::Bst; my @sortedArray = ( "Cookie", "Lungs", "Money", "Veterinarian", "Dog", "Urn", "Heart", "Coffee Grounds" ); @sortedArray = sort( @sortedArray ); my $sortedArrayRef = \@sortedArray; my $arySize = @sortedArray; my $bst = Word2vec::Bst->new(); my $rootNode = $bst->CreateBST( $sortedArrayRef, 0, $arySize, undef ); $bst->SetRootNode( $rootNode ); $bst->DESTROY() if defined( $bst ); undef( $bst ); =head3 BSTContainsSearch Description: Searches binary search tree nodes to see if 'node->data' contains passed string parameter, beginning with the passed node parameter and propagates down the tree until found. Input: Word2vec::Node -> Starting tree node to search. (ie. Begin at root node) string -> Search word/phrase. Output: Word2vec::Node -> Returns binary search tree node or 'undef' if not found. Example: use Word2vec::Bst; my @sortedArray = ( "Cookie", "Lungs", "Money", "Veterinarian", "Dog", "Urn", "Heart", "Coffee Grounds" ); @sortedArray = sort( @sortedArray ); my $sortedArrayRef = \@sortedArray; my $arySize = @sortedArray; my $bst = Word2vec::Bst->new(); my $result = $bst->CreateTree( $sortedArrayRef, 0, $arySize, undef ); print( "Binary Search Tree - Created Successfully\n" ) if $result == 0; print( "Binary Search Tree - Creation Un-successful\n" ) if $result == -1; die "" if $result == -1; my $node = $bst->BSTContainsSearch( $rootNode, "Coffee" ); print( "Phrase Containing Word: \"Coffee\" Found\n" ) if defined( $node ); print( "Phrase Containing Word: \"Coffee\" Not Found\n" ) if !defined( $node ); undef( @sortedArray ); $bst->DESTROY(); undef( $bst ); =head3 BSTExactSearch Description: Searches binary search tree for passed string parameter, beginning with passed node and propagates down the tree until found. Input: Word2vec::Node -> Starting tree node to search. (ie. Begin at root node) string -> Search word/phrase. Output: Word2vec::Node -> Returns binary search tree node or 'undef' if not found. Example: use Word2vec::Bst; my @sortedArray = ( "Cookie", "Lungs", "Money", "Veterinarian", "Dog", "Urn", "Heart", "Coffee Grounds" ); @sortedArray = sort( @sortedArray ); my $sortedArrayRef = \@sortedArray; my $arySize = @sortedArray; my $bst = Word2vec::Bst->new(); my $result = $bst->CreateTree( $sortedArrayRef, 0, $arySize, undef ); print( "Binary Search Tree - Created Successfully\n" ) if $result == 0; print( "Binary Search Tree - Creation Un-successful\n" ) if $result == -1; die "" if $result == -1; my $node = $bst->BSTExactSearch( $rootNode, "Money" ); print( "Exact Phrase Match Found - Search Word: \"Money\"\n" ) if defined( $node ); print( "Exact Phrase: \"Money\" Not Found\n" ) if !defined( $node ); undef( @sortedArray ); $bst->DESTROY(); undef( $bst ); =head3 DeleteBSTNodes Description: Recursive function that deletes all parameter node's left and right children that propagates downward. Called by DESTROY() function to remove tree from memory. Input: Word2vec::Node -> Starting node of tree to remove from memory. (ie. root node) Output: None Example: use Word2vec::Bst; my @sortedArray = ( "Cookie", "Lungs", "Money", "Veterinarian", "Dog", "Urn", "Heart", "Coffee Grounds" ); @sortedArray = sort( @sortedArray ); my $sortedArrayRef = \@sortedArray; my $arySize = @sortedArray; my $bst = Word2vec::Bst->new(); my $result = $bst->CreateTree( $sortedArrayRef, 0, $arySize, undef ); print( "Binary Search Tree - Created Successfully\n" ) if $result == 0; print( "Binary Search Tree - Creation Un-successful\n" ) if $result == -1; die "" if $result == -1; print( "Destroying Binary Search Tree\n" ); $bst->DESTROY(); undef( @sortedArray ); undef( $bst ); =head2 Accessor Functions =head3 GetDebugLog Description: Returns the _debugLog member variable set during Word2vec::Bst object initialization of new function. Input: None Output: $value -> 0 = False, 1 = True Example: use Word2vec::Bst; my $bst = Word2vec::Bst->new(); my $debugLog = $bst->GetDebugLog(); print( "Debug Logging Enabled\n" ) if $debugLog == 1; print( "Debug Logging Disabled\n" ) if $debugLog == 0; undef( $bst ); =head3 GetWriteLog Description: Returns the _writeLog member variable set during Word2vec::Bst object initialization of new function. Input: None Output: $value -> 0 = False, 1 = True Example: use Word2vec::Bst; my $bst = Word2vec::Bst->new(); my $writeLog = $bst->GetWriteLog(); print( "Write Logging Enabled\n" ) if $writeLog == 1; print( "Write Logging Disabled\n" ) if $writeLog == 0; undef( $bst ); =head3 GetRootNode Description: Returns binary search tree root node. Input: None Output: Word2vec::Node -> Binary Search Tree Root Node Example: use Word2vec::Bst; my @sortedArray = ( "Cookie", "Lungs", "Money", "Veterinarian", "Dog", "Urn", "Heart", "Coffee Grounds" ); @sortedArray = sort( @sortedArray ); my $sortedArrayRef = \@sortedArray; my $arySize = @sortedArray; my $bst = Word2vec::Bst->new(); my $result = $bst->CreateTree( $sortedArrayRef, 0, $arySize, undef ); print( "Binary Search Tree - Created Successfully\n" ) if $result == 0; print( "Binary Search Tree - Creation Un-successful\n" ) if $result == -1; die "" if $result == -1; my $rootNode = $bst->GetRootNode(); print( "BST Root Node Exists\n" ) if defined( $rootNode ); print( "BST Root Node Does Not Exist\n" ) if !defined( $rootNode ); print( "Root Node Contains Data: " . $rootNode->data . "\n" ) if defined( $rootNode ) && defined( $rootNode->data ); print( "Destroying Binary Search Tree\n" ); $bst->DESTROY(); undef( @sortedArray ); undef( $bst ); =head2 Mutator Functions =head3 SetRootNode Description: Sets binary search tree root node to passed node parameter. Input: Word2vec::Node -> Binary Search Tree node which will be set to the root node of the tree. Output: None Example: use Word2vec::Bst; my @sortedArray = ( "Cookie", "Lungs", "Money", "Veterinarian", "Dog", "Urn", "Heart", "Coffee Grounds" ); @sortedArray = sort( @sortedArray ); my $sortedArrayRef = \@sortedArray; my $arySize = @sortedArray; my $bst = Word2vec::Bst->new(); my $result = $bst->CreateTree( $sortedArrayRef, 0, $arySize, undef ); print( "Binary Search Tree - Created Successfully\n" ) if $result == 0; print( "Binary Search Tree - Creation Un-successful\n" ) if $result == -1; die "" if $result == -1; my $rootNode = $bst->GetRootNode(); print( "BST Root Node Exists\n" ) if defined( $rootNode ); print( "BST Root Node Does Not Exist\n" ) if !defined( $rootNode ); $bst->SetRootNode( $rootNode ) if defined( $rootNode ); print( "Destroying Binary Search Tree\n" ); $bst->DESTROY(); undef( @sortedArray ); undef( $bst ); =head1 Author Clint Cuffy, Virginia Commonwealth University =head1 COPYRIGHT Copyright (c) 2016 Bridget T McInnes, Virginia Commonwealth University btmcinnes at vcu dot edu Clint Cuffy, Virginia Commonwealth University cuffyca at vcu dot edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to: The Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. =cut