NAME

Data::Mining::Apriori - Perl extension for implement the apriori algorithm of data mining.

SYNOPSIS

use strict;
use warnings;
use Data::Mining::Apriori;

# TRANSACTION 103:CEREAL 101:MILK 102:BREAD
#        1101          1        1         0
#        1102          1        0         1
#        1103          1        1         1
#        1104          1        1         1
#        1105          0        1         1
#        1106          1        1         1
#        1107          1        1         1
#        1108          1        0         1
#        1109          1        1         1
#        1110          1        1         1

my $apriori = new Data::Mining::Apriori;

$apriori->{totalTransactions}=10; # The total number of transactions

$apriori->{minSupport}=1.55; # The minimum support(percent)

$apriori->{minConfidence}=1.55; # The minimum confidence(percent, optional)

$apriori->{minLift}=0; # The minimum lift(optional)

$apriori->{minLeverage}=0; # The minimum leverage(optional)

$apriori->{minConviction}=0; # The minimum conviction(optional)

$apriori->{minCoverage}=0; # The minimum coverage(percent, optional)

$apriori->{limitRules}=10; # The limit of rules(optional)

$apriori->{output}=1; # The output type (1 - Export to text file delimited by tab; 2 - Export to excel file with chart)(optional)

$apriori->{messages}=1; # A value boolean to display the messages(optional)

$apriori->{itemsKeyDescription}{'101'}='MILK'; # Hash table to add items by key and description
$apriori->{itemsKeyDescription}{102}='BREAD';
$apriori->{itemsKeyDescription}{'103'}='CEREAL';

@{$apriori->{itemsKeyTransactions}{'101'}}=('1101',1103,'1104',1105,'1106',1107,'1109',1110);
# Reference to array, to add the transactions of each item per key
@{$apriori->{itemsKeyTransactions}{102}}=('1102',1103,'1104',1105,'1106',1107,1108,'1109',1110);
@{$apriori->{itemsKeyTransactions}{'103'}}=('1101',1102,1103,'1104','1106',1107,1108,'1109',1110);

$apriori->generate_rules; # Generate association rules to no longer meet the minimum support and confidence
# or
#$apriori->association_rules_itemset_size(3); # Generate rules from a set of items size 3, for example

print "\n@{$apriori->{frequentItemset}}\n"; # Show frequent items

#output messages

3 items, 12 possible rules
Itemset size 2, 3 items 
Processing... 
Frequent itemset: { 101, 103, 102 }, 3 items 
Exporting to file "output_itemset_size_2.txt"... 
Itemset size 3, 3 items 
Processing... 
Frequent itemset: { 101, 102, 103 }, 3 items 
Exporting to file "output_itemset_size_3.txt"... 
101, 102, 103

#output file "output_itemset_size_2.txt"

Rules	Support %	Confidence %	Lift	Leverage	Conviction	Coverage %
R1	70,00	77,78	1,11	7,00	0,10	90,00
R2	80,00	88,89	1,11	8,00	0,09	90,00
R3	70,00	87,50	1,25	14,00	0,08	80,00
R4	70,00	87,50	1,25	14,00	0,08	80,00
R5	80,00	88,89	1,11	8,00	0,09	90,00
R6	70,00	77,78	1,11	7,00	0,10	90,00

Rule R1: { 103 } => { 101 }
Support: 70,00 %
Confidence: 77,78 %
Lift: 1,11
Leverage: 7,00
Conviction: 0,10
Coverage: 90,00 %
Items:
103 CEREAL
101 MILK

to be continued...

#output file "output_itemset_size_3.txt"

Rules	Support %	Confidence %	Lift	Leverage	Conviction	Coverage %
R7	60,00	66,67	1,11	6,00	0,12	90,00
R8	60,00	85,71	1,43	18,00	0,07	70,00
R9	60,00	75,00	1,25	12,00	0,09	80,00
R10	60,00	75,00	1,25	12,00	0,09	80,00
R11	60,00	85,71	1,43	18,00	0,07	70,00
R12	60,00	66,67	1,11	6,00	0,12	90,00

Rule R7: { 103 } => { 101, 102 }
Support: 60,00 %
Confidence: 66,67 %
Lift: 1,11
Leverage: 6,00
Conviction: 0,12
Coverage: 90,00 %
Items:
103 CEREAL
101 MILK
102 BREAD

Rule R8: { 101, 103 } => { 102 }
Support: 60,00 %
Confidence: 85,71 %
Lift: 1,43
Leverage: 18,00
Conviction: 0,07
Coverage: 70,00 %
Items:
101 MILK
103 CEREAL
102 BREAD

to be continued...

# or from a database

# CREATE TABLE dimension_product(
# 	product_key INTEGER NOT NULL PRIMARY KEY,
# 	product_alternate_key INTEGER NOT NULL,
# 	product_name TEXT NOT NULL,
# 	price REAL NOT NULL
#	// ...
# );
#
# INSERT INTO dimension_product VALUES(1,101,'MILK',10.00);
# INSERT INTO dimension_product VALUES(2,102,'BREAD',10.00);
# INSERT INTO dimension_product VALUES(3,103,'CEREAL',10.00);
# // ...
# 
# CREATE TABLE fact_sales(
# 	sales_order_number INTEGER NOT NULL,
# 	sales_order_line_number INTEGER NOT NULL,
# 	product_key INTEGER NOT NULL,
# 	quantity INTEGER NOT NULL,
#	// ...
# 	PRIMARY KEY(sales_order_number, sales_order_line_number),
# 	FOREIGN KEY(product_key) REFERENCES dimension_product(product_key)
# );
#
# INSERT INTO fact_sales VALUES(1101,1,3,1);
# INSERT INTO fact_sales VALUES(1101,2,1,1);
# INSERT INTO fact_sales VALUES(1102,1,3,1);
# INSERT INTO fact_sales VALUES(1102,2,2,1);
# INSERT INTO fact_sales VALUES(1103,1,1,1);
# INSERT INTO fact_sales VALUES(1103,2,2,1);
# INSERT INTO fact_sales VALUES(1103,3,3,1);
# INSERT INTO fact_sales VALUES(1104,1,1,1);
# INSERT INTO fact_sales VALUES(1104,2,2,1);
# INSERT INTO fact_sales VALUES(1104,3,3,1);
# INSERT INTO fact_sales VALUES(1105,1,1,1);
# INSERT INTO fact_sales VALUES(1105,2,2,1);
# INSERT INTO fact_sales VALUES(1106,1,1,1);
# INSERT INTO fact_sales VALUES(1106,2,2,1);
# INSERT INTO fact_sales VALUES(1106,3,3,1);
# INSERT INTO fact_sales VALUES(1107,1,1,1);
# INSERT INTO fact_sales VALUES(1107,2,2,1);
# INSERT INTO fact_sales VALUES(1107,3,3,1);
# INSERT INTO fact_sales VALUES(1108,1,3,1);
# INSERT INTO fact_sales VALUES(1108,2,2,1);
# INSERT INTO fact_sales VALUES(1109,1,1,1);
# INSERT INTO fact_sales VALUES(1109,2,2,1);
# INSERT INTO fact_sales VALUES(1109,3,3,1);
# INSERT INTO fact_sales VALUES(1110,1,1,1);
# INSERT INTO fact_sales VALUES(1110,2,2,1);
# INSERT INTO fact_sales VALUES(1110,3,3,1);
#//...

use DBD::SQLite;
use Data::Mining::Apriori;

my $db = DBI->connect('dbi:SQLite:dbname=DW.db','','');

my $sql = q~
SELECT COUNT(DISTINCT(sales_order_number)) FROM fact_sales
/* WHERE ... */
~;

my $query = $db->prepare($sql);
$query->execute;
my $totalTransactions = $query->fetchrow;

$apriori = new Data::Mining::Apriori;

$apriori->{totalTransactions}=$totalTransactions;

$apriori->{minSupport}=1.55;

$apriori->{minConfidence}=1.55;

$apriori->{minLift}=0;

$apriori->{minLeverage}=0;

$apriori->{minConviction}=0;

$apriori->{minCoverage}=0;

$apriori->{limitRules}=10;

$apriori->{output}=1;

$apriori->{messages}=1;

$sql = qq~
SELECT dp.product_alternate_key, dp.product_name
FROM dimension_product dp
JOIN fact_sales fs ON
dp.product_key = fs.product_key
/* WHERE ... */
~;

$query = $db->prepare($sql);
$query->execute;
while(my($key,$description)=$query->fetchrow_array){
	$apriori->{itemsKeyDescription}{$key}=$description;
}

foreach my$key(keys(%{$apriori->{itemsKeyDescription}})){
	$sql = qq~
	SELECT DISTINCT(fs.sales_order_number)
	FROM dimension_product dp
	JOIN fact_sales fs ON
	dp.product_key = fs.product_key
	WHERE dp.product_alternate_key = $key
	/* AND ... */
	~;
	$query = $db->prepare($sql);
	$query->execute;
	while(my$transaction=$query->fetchrow){
		push @{$apriori->{itemsKeyTransactions}{$key}},$transaction;
	}
}

$apriori->generate_rules;
# or
#$apriori->association_rules_itemset_size(3);

print "\n@{$apriori->{frequentItemset}}\n";

DESCRIPTION

This module implements the apriori algorithm of data mining.

ATTRIBUTES

totalTransactions

The total number of transactions.

minSupport

The minimum support(percent).

minConfidence

The minimum confidence(percent, optional).

minLift

The minimum lift(optional).

minLeverage

The minimum leverage(optional).

minConviction

The minimum conviction(optional).

minCoverage

The minimum coverage(percent, optional).

limitRules

The limit of rules(optional).

output

The output type (optional):

  • 1 - Text file delimited by tab;

  • 2 - Excel file with chart.

messages

A value boolean to display the messages(optional).

itemsKeyDescription

Hash table to add items by key and description.

itemsKeyTransactions

Reference to array, to add the transactions of each item per key.

quantityPossibleRules

Quantity of possible rules.

frequentItemset

Frequent itemset.

recursively

A value boolean to generate association rules until no set of items meets the minimum support or minimum confidence.

associationRules

A data structure to store the name of the rule, key items, implication, support, confidence, lift, leverage, conviction and coverage.

	$self->{associationRules} = {
                                    '1' => {
                                                'R1' => {
                                                            'items' => [
                                                                         '103',
                                                                         '101'
                                                                       ],
                                                            'rule' => [
                                                                        '{ 103 } => { 101 }',
                                                                        '70,00',
                                                                        '77,78',
                                                                        '1,11',
                                                                        '7,00',
                                                                        '0,10',
                                                                        '90,00'
                                                                      ]
                                                        }
                                            },
                                            # to be continued...

METHODS

new

Creates a new instance of Data::Mining::Apriori.

generate_rules

Generate association rules until no set of items meets the minimum support, confidence, lift, leverage, conviction and coverage.

association_rules_itemset_size

Generates association rules by size set of items. Accepts the following argument:

  • An integer.

    Representing the size of the set of items.

AUTHOR

Alex Graciano, <agraciano@cpan.org>

COPYRIGHT AND LICENSE

Copyright (C) 2015-2016 by Alex Graciano

This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.12.4 or, at your option, any later version of Perl 5 you may have available.