#/*-*- Mode: C++; c-basic-offset: 2; -*- */

##=====================================================================
## Moot::Waste::Scanner
##=====================================================================

MODULE = Moot		PACKAGE = Moot::Waste::Scanner

##--------------------------------------------------------------
wasteTokenScanner*
new(char *CLASS, TokenIOFormatMask fmt=tiofMedium|tiofLocation)
CODE:
 RETVAL=new wasteTokenScanner(fmt, CLASS);
 //fprintf(stderr, "%s::new() --> %p=%i\n", CLASS,RETVAL,RETVAL);
OUTPUT:
 RETVAL

##--------------------------------------------------------------
void
reset(wasteTokenScanner* wts)
CODE:
 wts->scanner.reset();


##=====================================================================
## Moot::Waste::Lexer
## + uses TokenReader::tr_data to hold SV* of underlying reader
##=====================================================================

MODULE = Moot		PACKAGE = Moot::Waste::Lexer

##--------------------------------------------------------------
wasteLexerReader*
new(char *CLASS, TokenIOFormatMask fmt=tiofUnknown)
CODE:
 RETVAL=new wasteLexerReader(fmt, CLASS);
 //fprintf(stderr, "%s::new() --> %p=%i\n", CLASS,RETVAL,RETVAL);
OUTPUT:
 RETVAL

##-------------------------------------------------------------
void
close(wasteLexerReader *wl)
CODE:
 wl->close();  
 if (wl->tr_data) {
   SvREFCNT_dec( (SV*)wl->tr_data );
   wl->tr_data = NULL;
 }

##-------------------------------------------------------------
##int
##_scanner_refcnt(wasteLexerReader *wl)
##CODE:
## if (wl->tr_data) {
##   RETVAL = SvREFCNT((SV*)SvRV((SV*)wl->tr_data));
## } else {
##   RETVAL = -1;
## }
##OUTPUT:
## RETVAL

##-------------------------------------------------------------
SV*
_get_scanner(wasteLexerReader *wl)
CODE:
 if (!wl->tr_data || !wl->scanner) { XSRETURN_UNDEF; }
 RETVAL = newSVsv((SV*)wl->tr_data);
OUTPUT:
 RETVAL

##-------------------------------------------------------------
void
_set_scanner(wasteLexerReader *wl, SV *scanner_sv)
PREINIT:
  TokenReader *tr;
CODE:
  if( sv_isobject(scanner_sv) && (SvTYPE(SvRV(scanner_sv)) == SVt_PVMG) )
    tr = (TokenReader*)SvIV((SV*)SvRV( scanner_sv ));
  else {
    warn("Moot::Waste::Lexer::_set_scanner() -- scanner_sv is not a blessed SV reference");
    XSRETURN_UNDEF;
  }
  wl->from_reader(tr);
  wl->tr_data = newSVsv(scanner_sv);

##--------------------------------------------------------------
bool
dehyphenate(wasteLexerReader* wl, ...)
CODE:
 if (items > 1) {
   bool on = (bool)SvTRUE( ST(1) );
   wl->dehyph_mode(on);
 }
 RETVAL = wl->lexer.wl_dehyph_mode;
OUTPUT:
 RETVAL

##--------------------------------------------------------------
wasteLexicon*
stopwords(wasteLexerReader* wl)
PREINIT:
 const char *CLASS="Moot::Waste::Lexicon";
CODE:
 RETVAL = &wl->lexer.wl_stopwords;
OUTPUT:
 RETVAL

##--------------------------------------------------------------
wasteLexicon*
abbrevs(wasteLexerReader* wl)
PREINIT:
 const char *CLASS="Moot::Waste::Lexicon";
CODE:
 RETVAL = &wl->lexer.wl_abbrevs;
OUTPUT:
 RETVAL

##--------------------------------------------------------------
wasteLexicon*
conjunctions(wasteLexerReader* wl)
PREINIT:
 const char *CLASS="Moot::Waste::Lexicon";
CODE:
 RETVAL = &wl->lexer.wl_conjunctions;
OUTPUT:
 RETVAL


##=====================================================================
## Moot::Waste::Lexicon
## - NO standalone objects allowed: always accessed via wasteLexerReader (so we can skip ref-counting)
##=====================================================================

MODULE = Moot		PACKAGE = Moot::Waste::Lexicon

##--------------------------------------------------------------
## NO standalone objects!!!
#wasteLexicon*
#new(char *CLASS)
#CODE:
#  RETVAL=new wasteLexicon();
#OUTPUT:
#  RETVAL

##--------------------------------------------------------------
## NO standalone objects!!!
#void
#DESTROY(wasteLexicon* lx)
#CODE:
# //if (lx) delete lx;

##--------------------------------------------------------------
void
clear(wasteLexicon* lx)
CODE:
 lx->clear();

##--------------------------------------------------------------
size_t
size(wasteLexicon* lx)
CODE:
 RETVAL = lx->lex.size();
OUTPUT:
 RETVAL

##--------------------------------------------------------------
void
insert(wasteLexicon* lx, const char *str)
CODE:
 lx->insert(str);

##--------------------------------------------------------------
bool
lookup(wasteLexicon* lx, const char *str)
CODE:
 RETVAL = lx->lookup(str);
OUTPUT:
 RETVAL

##--------------------------------------------------------------
bool
_load_reader(wasteLexicon* lx, TokenReader *reader)
CODE:
 RETVAL = lx->load(reader);
OUTPUT:
 RETVAL
 
##--------------------------------------------------------------
bool
_load_file(wasteLexicon* lx, const char *filename)
CODE:
 RETVAL = lx->load(filename);
OUTPUT:
 RETVAL

##--------------------------------------------------------------
AV*
to_array(wasteLexicon* lx, bool utf8=TRUE)
CODE:
 RETVAL = newAV();
 for (wasteLexicon::Lexicon::const_iterator lxi=lx->lex.begin(); lxi!=lx->lex.end(); ++lxi) {
   SV *sv = stdstring2sv(*lxi, utf8);
   av_push(RETVAL, sv);
 }
 sv_2mortal((SV*)RETVAL);
OUTPUT:
 RETVAL


##=====================================================================
## Moot::Waste::Decoder
## + uses TokenWriter::tw_data to hold SV* of underlying writer
##=====================================================================

MODULE = Moot		PACKAGE = Moot::Waste::Decoder

##--------------------------------------------------------------
wasteDecoder*
new(char *CLASS, TokenIOFormatMask fmt=tiofUnknown)
CODE:
 RETVAL=new wasteDecoder(fmt, CLASS);
 //fprintf(stderr, "%s::new() --> %p=%i\n", CLASS,RETVAL,RETVAL);
OUTPUT:
 RETVAL

##-------------------------------------------------------------
void
close(wasteDecoder *wd)
CODE:
 wd->close();  
 if (wd->tw_data) {
   SvREFCNT_dec( (SV*)wd->tw_data );
   wd->tw_data = NULL;
 }

##-------------------------------------------------------------
SV*
_get_sink(wasteDecoder *wd)
CODE:
 if (!wd->tw_data || !wd->wd_sink) { XSRETURN_UNDEF; }
 RETVAL = newSVsv((SV*)wd->tw_data);
OUTPUT:
 RETVAL

##-------------------------------------------------------------
void
_set_sink(wasteDecoder *wd, SV *sink_sv)
PREINIT:
  TokenWriter *tw;
CODE:
  if( sv_isobject(sink_sv) && (SvTYPE(SvRV(sink_sv)) == SVt_PVMG) )
    tw = (TokenWriter*)SvIV((SV*)SvRV( sink_sv ));
  else {
    warn("Moot::Waste::Decoder::_set_sink() -- sink_sv is not a blessed SV reference");
    XSRETURN_UNDEF;
  }
  wd->to_writer(tw);
  wd->tw_data = newSVsv(sink_sv);

##-------------------------------------------------------------
size_t
buffer_size(wasteDecoder *wd)
CODE:
 RETVAL = wd->wd_buf.size();
OUTPUT:
 RETVAL

##-------------------------------------------------------------
bool
buffer_empty(wasteDecoder *wd)
CODE:
 RETVAL = wd->wd_buf.empty();
OUTPUT:
 RETVAL

##-------------------------------------------------------------
HV*
buffer_peek(wasteDecoder *wd, bool utf8=TRUE)
CODE:
 if (wd->wd_buf.empty()) { XSRETURN_UNDEF; }
 RETVAL = token2hv( &wd->wd_buf.front(), utf8 );
OUTPUT:
 RETVAL

##-------------------------------------------------------------
bool
buffer_can_shift(wasteDecoder *wd)
CODE:
 RETVAL = wd->buffer_can_shift();
OUTPUT:
 RETVAL

##-------------------------------------------------------------
void
buffer_shift(wasteDecoder *wd)
CODE:
 wd->buffer_shift();


##-------------------------------------------------------------
AV*
buffer_flush(wasteDecoder *wd, bool force=FALSE, bool utf8=TRUE)
CODE:
 if (wd->wd_buf.empty()) { XSRETURN_UNDEF; }
 RETVAL = newAV();
 while ( !wd->wd_buf.empty() && (force || wd->buffer_can_shift()) ) {
   HV *tokhv = token2hv( &(wd->buffer_peek()), utf8 );
   av_push(RETVAL, newRV_inc((SV*)tokhv));
   wd->buffer_shift();
 }
 sv_2mortal((SV*)RETVAL);
OUTPUT:
 RETVAL


##=====================================================================
## Moot::Waste::Annotator
## + uses TokenWriter::tw_data to hold SV* of underlying writer
##=====================================================================

MODULE = Moot		PACKAGE = Moot::Waste::Annotator

##--------------------------------------------------------------
wasteAnnotatorWriter*
new(char *CLASS, TokenIOFormatMask fmt=tiofMediumRare)
CODE:
 RETVAL=new wasteAnnotatorWriter(fmt, CLASS);
OUTPUT:
 RETVAL

##-------------------------------------------------------------
void
close(wasteAnnotatorWriter *waw)
CODE:
 waw->close();  
 if (waw->tw_data) {
   SvREFCNT_dec( (SV*)waw->tw_data );
   waw->tw_data = NULL;
 }

##-------------------------------------------------------------
SV*
_get_sink(wasteAnnotatorWriter *waw)
CODE:
 if (!waw->tw_data || !waw->waw_sink) { XSRETURN_UNDEF; }
 RETVAL = newSVsv((SV*)waw->tw_data);
OUTPUT:
 RETVAL

##-------------------------------------------------------------
void
_set_sink(wasteAnnotatorWriter *waw, SV *sink_sv)
PREINIT:
  TokenWriter *tw;
CODE:
  if( sv_isobject(sink_sv) && (SvTYPE(SvRV(sink_sv)) == SVt_PVMG) )
    tw = (TokenWriter*)SvIV((SV*)SvRV( sink_sv ));
  else {
    warn("Moot::Waste::AnnotatorWriter::_set_sink() -- sink_sv is not a blessed SV reference");
    XSRETURN_UNDEF;
  }
  waw->to_writer(tw);
  waw->tw_data = newSVsv(sink_sv);

##-------------------------------------------------------------
HV*
annotate(wasteAnnotatorWriter *waw, HV *tokhv)
PREINIT:
 mootToken mtok;
CODE:
 hv2token(tokhv, &mtok);
 waw->waw_annotator.annotate_token(mtok);
 RETVAL = token2hv(&mtok);
OUTPUT:
 RETVAL