NAME
Data::Mining::Apriori - Perl extension for implement the data mining algorithm apriori.
SYNOPSIS
use strict;
use warnings;
use Data::Mining::Apriori;
# TRANSACTION 103:CEREAL 101:MILK 102:BREAD
# 1101 1 1 0
# 1102 1 0 1
# 1103 1 1 1
# 1104 1 1 1
# 1105 0 1 1
# 1106 1 1 1
# 1107 1 1 1
# 1108 1 0 1
# 1109 1 1 1
# 1110 1 1 1
my $apriori = new Data::Mining::Apriori;
$apriori->{totalTransactions}=10; # The total number of transactions
$apriori->{minSupport}=1.55; # The minimum support
$apriori->{minConfidence}=1.55; # The minimum confidence
$apriori->{output}=1; # The output type {1:export to file, 2:export to excel}(optional)
$apriori->{messages}=1; # One boolean {1:true, 0:false} to display the messages(optional)
$apriori->{itemsKeyDescription}{'101'}='MILK'; # Hash table to add items by key and description
$apriori->{itemsKeyDescription}{102}='BREAD';
$apriori->{itemsKeyDescription}{'103'}='CEREAL';
@{$apriori->{itemsKeyTransactions}{'101'}}=('1101',1103,'1104',1105,'1106',1107,'1109',1110); # Reference to array, to add the transactions of each item per key
@{$apriori->{itemsKeyTransactions}{102}}=('1102',1103,'1104',1105,'1106',1107,1108,'1109',1110);
@{$apriori->{itemsKeyTransactions}{'103'}}=('1101',1102,1103,'1104','1106',1107,1108,'1109',1110);
$apriori->generate_rules; # Generate association rules to no longer meet the minimum support and confidence
# or
# $apriori->association_rules_itemset_size(3); # Generate rules from a set of items size
print "\n@{$apriori->{frequentItemset}}\n"; # Show frequent items
# or from a database
# CREATE TABLE dimension_product(
# product_key INTEGER NOT NULL PRIMARY KEY,
# product_alternate_key INTEGER NOT NULL,
# product_name TEXT NOT NULL,
# price REAL NOT NULL
# // ...
# );
#
# INSERT INTO dimension_product VALUES(1,101,'MILK',10.00);
# INSERT INTO dimension_product VALUES(2,102,'BREAD',10.00);
# INSERT INTO dimension_product VALUES(3,103,'CEREAL',10.00);
#
# // ...
#
# CREATE TABLE fact_sales(
# sales_order_number INTEGER NOT NULL,
# sales_order_line_number INTEGER NOT NULL,
# product_key INTEGER NOT NULL,
# quantity INTEGER NOT NULL,
# // ...
# PRIMARY KEY(sales_order_number, sales_order_line_number),
# FOREIGN KEY(product_key) REFERENCES dimension_product(product_key)
# );
#
# INSERT INTO fact_sales VALUES(1101,1,3,1);
# INSERT INTO fact_sales VALUES(1101,2,1,1);
# INSERT INTO fact_sales VALUES(1102,1,3,1);
# INSERT INTO fact_sales VALUES(1102,2,2,1);
# INSERT INTO fact_sales VALUES(1103,1,1,1);
# INSERT INTO fact_sales VALUES(1103,2,2,1);
# INSERT INTO fact_sales VALUES(1103,3,3,1);
# INSERT INTO fact_sales VALUES(1104,1,1,1);
# INSERT INTO fact_sales VALUES(1104,2,2,1);
# INSERT INTO fact_sales VALUES(1104,3,3,1);
# INSERT INTO fact_sales VALUES(1105,1,1,1);
# INSERT INTO fact_sales VALUES(1105,2,2,1);
# INSERT INTO fact_sales VALUES(1106,1,1,1);
# INSERT INTO fact_sales VALUES(1106,2,2,1);
# INSERT INTO fact_sales VALUES(1106,3,3,1);
# INSERT INTO fact_sales VALUES(1107,1,1,1);
# INSERT INTO fact_sales VALUES(1107,2,2,1);
# INSERT INTO fact_sales VALUES(1107,3,3,1);
# INSERT INTO fact_sales VALUES(1108,1,3,1);
# INSERT INTO fact_sales VALUES(1108,2,2,1);
# INSERT INTO fact_sales VALUES(1109,1,1,1);
# INSERT INTO fact_sales VALUES(1109,2,2,1);
# INSERT INTO fact_sales VALUES(1109,3,3,1);
# INSERT INTO fact_sales VALUES(1110,1,1,1);
# INSERT INTO fact_sales VALUES(1110,2,2,1);
# INSERT INTO fact_sales VALUES(1110,3,3,1);
use DBD::SQLite;
use Data::Mining::Apriori;
my $db = DBI->connect('dbi:SQLite:dbname=DW.db','','');
my $sql = q~
SELECT COUNT(DISTINCT(sales_order_number)) FROM fact_sales
/* WHERE ... */
~;
my $query = $db->prepare($sql);
$query->execute;
my $totalTransactions = $query->fetchrow;
$apriori = new Data::Mining::Apriori;
$apriori->{totalTransactions}=$totalTransactions;
$apriori->{minSupport}=1.55;
$apriori->{minConfidence}=1.55;
$apriori->{output}=1;
$apriori->{messages}=1;
my $support = int((($apriori->{totalTransactions}/100)*$apriori->{minSupport}));
$sql = qq~
SELECT dp.product_alternate_key, dp.product_name, COUNT(*)
FROM dimension_product dp
JOIN fact_sales fs ON
dp.product_key = fs.product_key
/* WHERE ... */
GROUP BY dp.product_alternate_key, dp.product_name
HAVING COUNT(*) >= $support
~;
$query = $db->prepare($sql);
$query->execute;
while(my($key,$description)=$query->fetchrow_array){
$apriori->{itemsKeyDescription}{$key}=$description;
}
foreach my$key(keys(%{$apriori->{itemsKeyDescription}})){
$sql = qq~
SELECT DISTINCT(fs.sales_order_number)
FROM dimension_product dp
JOIN fact_sales fs ON
dp.product_key = fs.product_key
WHERE dp.product_alternate_key = $key
/* AND ... */
~;
$query = $db->prepare($sql);
$query->execute;
while(my$transaction=$query->fetchrow){
push @{$apriori->{itemsKeyTransactions}{$key}},$transaction;
}
}
$apriori->generate_rules;
# or
# $apriori->association_rules_itemset_size(3);
print "\n@{$apriori->{frequentItemset}}\n";
DESCRIPTION
This module implements apriori data mining algorithm.
ATTRIBUTES
totalTransactions
The total number of transactions.
minSupport
The minimum support.
minConfidence
The minimum confidence.
output
The output type {1:export to file, 2:export to excel}(optional).
messages
A value boolean {1:true, 0:false} to display the messages(optional).
itemsKeyDescription
Hash table to add items by key and description.
itemsKeyTransactions
Reference to array, to add the transactions of each item per key.
quantityPossibleRules
Quantity of possible rules.
frequentItemset
Frequent itemset.
recursively
A value boolean {1:true, 0:false} to generate association rules until no set of items meets the minimum support or minimum confidence.
METHODS
new
Creates a new instance of Data::Mining::Apriori.
generate_rules
Generate association rules until no set of items meets the minimum support or minimum confidence.
association_rules_itemset_size
Generates association rules by size set of items. Accepts the following argument:
an integer
Representing the size of the set of items.
AUTHOR
Alex Graciano, <agraciano@cpan.org>
COPYRIGHT AND LICENSE
Copyright (C) 2015 by Alex Graciano
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.12.4 or, at your option, any later version of Perl 5 you may have available.