NAME

DTA::CAB::Format::TEIws - TEI-XML with //w and //s elements, as output by DTA::TokWrap

SYNOPSIS

##========================================================================
## PRELIMINARIES

use DTA::CAB::Format::TEIws;

##========================================================================
## Constructors etc.

$fmt = CLASS_OR_OBJ->new(%args);

##========================================================================
## Methods: Input: Generic API

$fmt = $fmt->close();
$fmt = $fmt->fromString(\$string);
$fmt = $fmt->fromFile($filename_or_handle);
$fmt = $fmt->fromFh($handle);
$doc = $fmt->parseDocument();

##========================================================================
## Methods: Output: MIME & HTTP stuff

$short = $fmt->shortName();
$ext = $fmt->defaultExtension();

##========================================================================
## Methods: Output: output selection

$fmt = $fmt->flush();
$fmt = $fmt->toString(\$str);
$fmt_or_undef = $fmt->toFile($filename, $formatLevel);
$fmt_or_undef = $fmt->toFh($fh,$formatLevel);

##========================================================================
## Methods: Output: Generic API

$fmt = $fmt->putDocument($doc);

DESCRIPTION

Globals

Variable: @ISA

DTA::CAB::Format::TEIws inherits from DTA::CAB::Format::XmlTokWrap.

Constructors etc.

new
$fmt = CLASS_OR_OBJ->new(%args);

object structure: HASH ref

{
 ##-- new in Format::TEIws
 spliceback => $bool,                    ##-- (output) if true (default), return .cws.cab.xml ; otherwise just .cab.t.xml [requires doc 'teibufr' attribute]
 teibufr => \$buf,                       ##-- tei+ws buffer, for spliceback mode
 teidoc => $doc,                         ##-- tei+ws XML::LibXML::Document
 spliceopts => \%opts,                   ##-- options for DTA::ToKWrap::Processor::idsplice::new()
 'att.linguistic' => $bool,              ##-- read/write TEI att.linguistic features?
 ##
 ##-- input: inherited from Format::XmlNative
 xdoc => $xdoc,                          ##-- XML::LibXML::Document (tokwrap syntax)
 xprs => $xprs,                          ##-- XML::LibXML parser
 parseXmlData => $bool,                  ##-- if true, _xmldata key will be parsed (default OVERRIDE=false)
 ##
 ##-- output: inherited from Format::XmlTokWrap
 arrayEltKeys => \%akey2ekey,            ##-- maps array keys to element keys for output
 arrayImplicitKeys => \%akey2undef,      ##-- pseudo-hash of array keys NOT mapped to explicit elements
 key2xml => \%key2xml,                   ##-- maps keys to XML-safe names
 xml2key => \%xml2key,                   ##-- maps xml keys to internal keys
 ##
 ##-- output: inherited from Format::XmlNative
 #encoding => $inputEncoding,             ##-- default: UTF-8; applies to output only!
 level => $level,                        ##-- output formatting level (default=0)
 ##
 ##-- common: safety
 safe => $bool,                          ##-- if true (default), no "unsafe" token data will be generated (_xmlnod,etc.)
}

Methods: Input: Generic API

close
$fmt = $fmt->close();

close current input source, if any

fromString
$fmt = $fmt->fromString(\$string);

select input from string $string

fromFile
$fmt = $fmt->fromFile($filename_or_handle);

calls $fmt->fromFh()

fromFh
$fmt = $fmt->fromFh($handle);

just calls $fmt->fromString()

parseDocument
$doc = $fmt->parseDocument();

parses buffered XML::LibXML::Document; override inserts $doc->{teibufr} attribute for spliceback mode

Methods: Output: MIME & HTTP stuff

shortName
$short = $fmt->shortName();

returns "official" short name for this format; override returns "teiws".

defaultExtension
$ext = $fmt->defaultExtension();

returns default filename extension for this format; override returns ".tei+ws.xml".

Methods: Output: output selection

flush
$fmt = $fmt->flush();

flush any buffered output to selected output source

toString
$fmt = $fmt->toString(\$str);
$fmt = $fmt->toString(\$str,$formatLevel);

select output to byte-string; override reverts to DTA::CAB::Format::toString().

toFile
$fmt_or_undef = $fmt->toFile($filename, $formatLevel);

select output to $filename; override reverts to DTA::CAB::Format::toFile().

toFh
$fmt_or_undef = $fmt->toFh($fh,$formatLevel);

select output to filehandle $fh; override reverts to DTA::CAB::Format::toFh()

Methods: Output: Generic API

putDocument
$fmt = $fmt->putDocument($doc);

override respects local 'keepc' and 'spliceback' flags

EXAMPLE

An example file in the format accepted/generated by this module is:

 <?xml version="1.0" encoding="UTF-8"?>
 <TEI>
   <text>
     <fw>Running headers are ignored</fw>
     <s lang="de">
       <w msafe="1" t="wie" errid="ec" hasmorph="1" exlex="wie" lang="de">
	 <moot word="wie" lemma="wie" tag="PWAV"/>
	 <xlit isLatinExt="1" isLatin1="1" latin1Text="wie"/>
       </w>
       <w msafe="0" t="oede">
	 <moot tag="ADJD" lemma="öde" word="öde"/>
	 <xlit isLatinExt="1" isLatin1="1" latin1Text="oede"/>
       </w>
       <w exlex="!" errid="ec" t="!" msafe="1">
	 <xlit latin1Text="!" isLatin1="1" isLatinExt="1"/>
	 <moot word="!" tag="$." lemma="!"/>
       </w>
     </s>
   </text>
 </TEI>

att.linguistic Example

An example file in the format accepted/generated by this module with the att.linguistic option set to a true value is:

<?xml version="1.0" encoding="UTF-8"?>
<TEI>
  <text>
    <fw>Running headers are ignored</fw>
    <s xml:id="s1">
      <w xml:id="w1" lemma="wie" pos="PWAV" norm="Wie">Wie</w>
      <w xml:id="w2" lemma="öde" pos="ADJD" norm="öde" join="right">oede</w>
      <w xml:id="w3" lemma="!" pos="$." norm="!" join="left">!</w>
    </s>
    <lb/>
  </text>
</TEI>

AUTHOR

Bryan Jurish <moocow@cpan.org>

COPYRIGHT AND LICENSE

Copyright (C) 2015-2019 by Bryan Jurish

This package is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.24.1 or, at your option, any later version of Perl 5 you may have available.

SEE ALSO

dta-cab-analyze.perl(1), dta-cab-convert.perl(1), dta-cab-http-server.perl(1), dta-cab-http-client.perl(1), dta-cab-xmlrpc-server.perl(1), dta-cab-xmlrpc-client.perl(1), DTA::CAB::Server(3pm), DTA::CAB::Client(3pm), DTA::CAB::Format(3pm), DTA::CAB(3pm), perl(1), ...