#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include <glib.h>
typedef struct {
int longest;
GHashTable* dictionary;
GHashTable* chains;
} IM;
typedef struct {
int count;
GHashTable* states;
} chain_t;
static inline SV* phash_value(SV *phash, char *key) {
SV* hash = *av_fetch((AV*) phash, 0, 0);
IV index = SvIV( *hv_fetch((HV*) SvRV(hash), key, strlen(key), 0) );
return *av_fetch((AV*) phash, index, 0);
}
static void free_key (gpointer key, gpointer value, gpointer user_data) {
g_free(key);
}
static void free_chain (gpointer key, gpointer value, gpointer user_data) {
chain_t *chain = value;
g_free(key);
g_hash_table_destroy(chain->states);
g_free(value);
}
/* this is pretty fun but hacky
In order to be re-entrant and only pass one variable around we put
the number of keys in the first array element, then decrement it
and put the next key into the array at that index.
The last copy blats over the counter, but it's zero at that point
anyway.
*/
static void get_keys (gpointer key, gpointer value, gpointer user_data) {
char **keys = user_data;
int i = (int) --keys[0];
keys[i] = key;
}
MODULE = Algorithm::MarkovChain::GHash PACKAGE = Algorithm::MarkovChain::GHash
PROTOTYPES: ENABLE
SV*
_new_cstuff()
CODE:
IM* im = g_malloc(sizeof(IM));
SV* obj = newSViv((IV)im);
im->longest = 0;
im->dictionary = g_hash_table_new(g_str_hash, g_str_equal);
im->chains = g_hash_table_new(g_str_hash, g_str_equal);
SvREADONLY_on(obj);
RETVAL = obj;
OUTPUT:
RETVAL
void
_c_destroy (obj)
SV *obj
CODE:
{
IM* im = (IM*) SvIV(phash_value(SvRV(obj), "_cstuff"));
g_hash_table_foreach(im->dictionary, free_key, NULL);
g_hash_table_foreach(im->chains, free_chain, NULL);
g_hash_table_destroy(im->dictionary);
g_hash_table_destroy(im->chains);
g_free(im);
}
void
increment_seen(obj, stub, original_next)
SV* obj;
char *stub;
char *original_next;
CODE:
{
IM* im = (IM*) SvIV(phash_value(SvRV(obj), "_cstuff"));
chain_t* stubs = g_hash_table_lookup(im->chains, stub);
int count;
char* next = g_hash_table_lookup(im->dictionary, original_next);
/* printf("increment_seen: '%s' '%s'\n", stub, original_next); */
if (!next) {
next = g_strdup(original_next);
g_hash_table_insert(im->dictionary, next, next);
}
if (!stubs) {
char* sep = SvPV_nolen(phash_value(SvRV(obj), "seperator"));
char* s;
int len = 1;
for (s = stub; s = strstr(s, sep); s += strlen(sep)) len++;
if (len > im->longest) {
im->longest = len;
}
stubs = g_malloc(sizeof(chain_t));
stubs->states = g_hash_table_new(g_str_hash, g_str_equal);
stubs->count = 0;
g_hash_table_insert(im->chains, g_strdup(stub), stubs);
}
stubs->count++;
count = (int) g_hash_table_lookup(stubs->states, next);
count++;
g_hash_table_insert(stubs->states, next, (void *) count);
}
void
get_options (obj, stub)
SV *obj;
char *stub;
PPCODE:
{
IM* im = (IM*) SvIV(phash_value(SvRV(obj), "_cstuff"));
chain_t* stubs = g_hash_table_lookup(im->chains, stub);
char **keys = NULL;
int nkeys, i;
if ( (!stubs) || (!(nkeys = g_hash_table_size(stubs->states))) ) {
return;
}
keys = g_malloc(nkeys * sizeof(char *));
keys[0] = (char*) nkeys;
g_hash_table_foreach(stubs->states, get_keys, keys);
for (i = 0; i < nkeys; i++) {
int count = (int) g_hash_table_lookup(stubs->states, keys[i]);
XPUSHs(sv_2mortal(newSVpv(keys[i], 0)));
XPUSHs(sv_2mortal(newSVnv(count / stubs->count)));
}
g_free(keys);
}
int
longest_sequence (obj)
SV* obj;
CODE:
IM* im = (IM*) SvIV(phash_value(SvRV(obj), "_cstuff"));
RETVAL = im->longest;
OUTPUT:
RETVAL
int
sequence_known (obj, stub)
SV* obj;
char *stub;
CODE:
IM* im = (IM*) SvIV(phash_value(SvRV(obj), "_cstuff"));
RETVAL = (int) g_hash_table_lookup(im->chains, stub);
OUTPUT:
RETVAL
char*
random_sequence (obj)
SV *obj;
CODE:
IM* im = (IM*) SvIV(phash_value(SvRV(obj), "_cstuff"));
int nkeys = g_hash_table_size(im->chains);
char **keys = g_malloc(nkeys * sizeof(char *));
keys[0] = (char*) nkeys;
g_hash_table_foreach(im->chains, get_keys, keys);
nkeys = (1.0 * nkeys * rand()) / (1.0*RAND_MAX);
RETVAL = keys[nkeys];
g_free(keys);
OUTPUT:
RETVAL