#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include <stddef.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
/***********************************************/
/* */
/* Structures */
/* */
/***********************************************/
typedef struct atom_count_struct *Atom_count_ptr;
typedef struct atom_count_struct {
char *element_symbol;
int count;
Atom_count_ptr next;
} Atom_count;
typedef struct symbol_table_struct *Symtab_ptr;
typedef struct symbol_table_struct {
Atom_count *start;
Symtab_ptr next;
} Symtab;
typedef struct token_struct {
/* 0 - left parenthesis
1 - element name
2 - int count number
3 - right parenthesis
*/
int type;
char *element_symbol;
int count;
} Token;
typedef struct stack_struct *Stack_ptr;
typedef struct stack_struct {
Symtab *first_tab;
Symtab *last_tab;
Stack_ptr prev;
} Stack;
/***********************************************/
/* */
/* Functions declarations */
/* */
/***********************************************/
int verify_brackets(char *);
int check_brackets(char *, char *);
int is_bracket(char);
int is_left_bracket(char);
char other_bracket(char);
int only_alnum(char *, char *);
int not_even(char *, char *);
char *matching_bracket(char *, char *);
Atom_count *parse_formula_c(char *formula);
void print_atom_count(Atom_count *i);
Atom_count *flatten(Symtab *n);
Atom_count *combine(Atom_count *n);
Atom_count *add_atom(Atom_count *i, Atom_count *j);
void free_symtab(Symtab *n);
int tokenize(Token *t, int *error, char **f);
char *make_str_copy(char *s);
void multiply(Atom_count *i, int n);
Atom_count *new_element(char *element_symbol);
Symtab *new_symtab(void);
/***********************************************/
/* */
/* Functions (for parsing) */
/* */
/***********************************************/
Atom_count *parse_formula_c(char *formula)
{
Token tok;
Token *t = &tok;
int error = 0;
Stack *temp_stack;
Atom_count *ac;
Symtab *st;
Stack *stack = (Stack *) malloc(sizeof(Stack));
stack->first_tab = NULL;
stack->last_tab = NULL;
stack->prev = NULL;
t->type = 4; /* Wrong!!! */
t->element_symbol = NULL;
t->count = 0;
while(tokenize(&tok, &error, &formula))
{
if(t->type == 0) /* left parenthesis */
{
temp_stack = (Stack *) malloc(sizeof(Stack));
temp_stack->first_tab = NULL;
temp_stack->last_tab = NULL;
temp_stack->prev = stack;
stack = temp_stack;
}
else if(t->type == 1) /* element name */
{
ac = new_element(t->element_symbol);
st = new_symtab();
st->start = ac;
if(stack->first_tab == NULL) stack->first_tab = st;
if(stack->last_tab != NULL) stack->last_tab->next = st;
stack->last_tab = st;
}
else if(t->type == 2) /* count */
{
multiply(stack->last_tab->start, t->count);
}
else if(t->type == 3) /* right parenthesis */
{
ac = combine(flatten(stack->first_tab));
free_symtab(stack->first_tab);
temp_stack = stack;
stack = stack->prev;
free(temp_stack);
st = new_symtab();
st->start = ac;
if(stack->first_tab == NULL) stack->first_tab = st;
if(stack->last_tab != NULL) stack->last_tab->next = st;
stack->last_tab = st;
}
else /* Error! */
{
return(NULL);
}
}
if(error) return(NULL);
ac = combine(flatten(stack->first_tab));
free_symtab(stack->first_tab);
free(stack);
return(ac);
}
/************************************************/
int tokenize(Token *t, int *error, char **formula)
{
char *formula_offset = *formula;
char *i = formula_offset;
char *j;
char *k;
if(*i == '(')
{
t->type = 0;
++formula_offset;
*formula = formula_offset;
return(1);
}
else if(*i == ')')
{
t->type = 3;
++formula_offset;
*formula = formula_offset;
return(1);
}
else if(isupper(*i))
{
t->type = 1;
++i;
while(islower(*i))
{
++i;
}
j = (char *) malloc(sizeof(char) * (i - formula_offset + 1));
k = j;
while(formula_offset != i)
{
*k = *formula_offset;
++formula_offset;
++k;
}
*k = '\0';
free(t->element_symbol);
t->element_symbol = j;
*formula = formula_offset;
return(1);
}
else if(isdigit(*i))
{
t->type = 2;
++i;
while(isdigit(*i))
{
++i;
}
j = (char *) malloc(sizeof(char) * (i - formula_offset + 1));
k = j;
while(formula_offset != i)
{
*k = *formula_offset;
++formula_offset;
++k;
}
*k = '\0';
t->count = atoi(j);
free(j);
*formula = formula_offset;
return(1);
}
else if(*i == '\0')
{
free(t->element_symbol);
return(0);
}
else
{
free(t->element_symbol);
*error = 1;
return(0);
}
}
/************************************************/
Atom_count *flatten(Symtab *n)
{
Atom_count *i;
Atom_count *j;
if(n == NULL) return(NULL);
i = n->start;
j = i;
n = n->next;
while(n != NULL)
{
if(j == NULL)
{
j = n->start;
n = n->next;
continue;
}
while(j->next != NULL)
{
j = j->next;
}
j->next = n->start;
n = n->next;
}
return i;
}
/***********************************************/
Atom_count *combine(Atom_count *n)
{
Atom_count *i = NULL;
return add_atom(i, n);
}
/************************************************/
Atom_count *add_atom(Atom_count *i, Atom_count *n)
{
Atom_count *j;
Atom_count *unchanged = i;
if(n == NULL) return i;
j = n;
n = n->next;
j->next = NULL;
while(i != NULL)
{
if(strcmp(i->element_symbol, j->element_symbol) == 0)
{
i->count += j->count;
free(j);
return(add_atom(unchanged, n));
}
i = i->next;
}
j->next = unchanged;
return(add_atom(j, n));
}
/***************************************************/
void free_symtab(Symtab *n)
{
Symtab *i = n;
while(n != NULL)
{
n = n->next;
free(i);
i = n;
}
}
/**************************************************/
char *make_str_copy(char *s)
{
char *t = (char *) malloc(sizeof(char)*(strlen(s)+1));
t = strcpy(t, s);
return(t);
}
/***************************************************/
void multiply(Atom_count *i, int n)
{
while(i != NULL)
{
i->count *= n;
i = i->next;
}
return;
}
/***************************************************/
Symtab *new_symtab(void)
{
Symtab *s = (Symtab *) malloc(sizeof(Symtab));
s->next = NULL;
s->start = NULL;
return(s);
}
/***************************************************/
Atom_count *new_element(char *element_symbol)
{
Atom_count *ac = (Atom_count *) malloc(sizeof(Atom_count));
ac->count = 1;
ac->next = NULL;
ac->element_symbol = make_str_copy(element_symbol);
return(ac);
}
/***************************************************/
void print_atom_count(Atom_count *i)
{
if(i == NULL)
{
printf("List is NULL\n");
return;
}
while(i != NULL)
{
printf("Atom: %s Count: %d\n", i->element_symbol, i->count);
i = i->next;
}
printf("End\n");
return;
}
/***********************************************************/
/* */
/* Functions (for checking parentheses */
/* */
/***********************************************************/
/****************************************************/
int verify_brackets(char *begin)
{
char *p;
char *q;
p = begin;
q = begin;
while (*q != '\0') ++q;
if(check_brackets(p, q))
{
return(1);
}
return(0);
}
/*****************************************************/
int check_brackets(char *s, char *t)
{
char *r;
/********** base case one: no brackets ************/
if (only_alnum(s, t)) return (1);
/***** base case two: odd number of brackets ******/
if (not_even(s, t)) return (0);
/********** RECURSE !!!!! *************************/
while ((is_left_bracket(*s)) != 1) ++s;
r = matching_bracket(s, t);
if (r == NULL) return (0);
if ((check_brackets(s+1, r)) && (check_brackets(r+1, t)))
{
return (1);
}
return (0);
}
/******************************************************/
int only_alnum(char *s, char *t)
{
int n = 1;
while (s < t)
{
if (isalnum(*s) == 0)
n = 0;
++s;
}
return (n);
}
/**********************************************************/
int not_even(char *s, char *t)
{
int i = 0;
int j = 0;
int k = 0;
int n = 0;
while (s < t)
{
if (*s == '[') ++i;
if (*s == ']') --i;
if (*s == '{') ++j;
if (*s == '}') --j;
if (*s == '(') ++k;
if (*s == ')') --k;
if (*s == '<') ++n;
if (*s == '>') --n;
++s;
}
if (i != 0 || j != 0 || k != 0 || n != 0) return (1);
return (0);
}
/*******************************************************/
int is_left_bracket(char b)
{
if ((b == '[') || (b == '{') || (b == '(') || (b == '<'))
{
return (1);
}
return (0);
}
/***********************************************************/
char *matching_bracket(char *s, char *t)
{
char left;
char right;
int i = 1;
left = *s;
right = other_bracket(*s);
++s;
while (s < t)
{
if (*s == right) --i;
if (*s == left) ++i;
if (i == 0) break;
++s;
}
if (i == 0) return (s);
return (NULL);
}
/***********************************************************/
char other_bracket(char b)
{
if (b == '[') return (']');
if (b == '{') return ('}');
if (b == '(') return (')');
if (b == '<') return ('>');
if (b == ']') return ('[');
if (b == '}') return ('{');
if (b == ')') return ('(');
if (b == '>') return ('<');
return ('\0');
}
/************************************************************/
/* */
/* XSUB stuff */
/* */
/************************************************************/
MODULE = Chemistry::MolecularMass PACKAGE = Chemistry::MolecularMass
int
verify_parens(s)
char *s;
CODE:
RETVAL = verify_brackets(s);
OUTPUT:
RETVAL
void
parse_formula(s)
char *s;
PREINIT:
Atom_count *i;
PPCODE:
i = parse_formula_c(s);
if(i == NULL)
{
/* do push nothing on the stack --
an empty list is implicitly returned */
}
else
{
while(i != NULL)
{
EXTEND(SP, 2);
PUSHs(newSVpv(i->element_symbol, 0));
PUSHs(newSViv(i->count));
i = i->next;
}
}