sub
new {
my
(
$class
,
%args
) =
@_
;
my
$self
= {
type
=>
''
,
string
=>
''
,
last_position
=> 0,
tag
=>
''
,
fmt
=>
''
,
lang
=>
'en'
,
};
foreach
my
$k
(
keys
%$self
) {
if
(
defined
$args
{
$k
}) {
$self
->{
$k
} =
$args
{
$k
};
}
delete
$args
{
$k
};
}
die
"Extra arguments passed :"
.
join
(
" "
,
%args
)
if
%args
;
die
"Missing type for <$self->{string}>"
unless
$self
->{type};
unless
(
$self
->{fmt} eq
'ltx'
or
$self
->{fmt} eq
'html'
) {
die
"Missing format $self->{fmt} for <$self->{string}>"
}
bless
$self
,
$class
;
}
sub
type {
my
(
$self
,
$type
) =
@_
;
if
(
$type
) {
$self
->{type} =
$type
;
}
return
$self
->{type};
}
sub
last_position {
shift
->{last_position};
}
sub
string {
shift
->{string};
}
sub
lang {
shift
->{lang};
}
sub
append {
my
(
$self
,
$element
) =
@_
;
$self
->{string} .=
$element
->string;
$self
->{last_position} =
$element
->last_position;
}
sub
tag {
if
(
@_
> 1) {
$_
[0]{tag} =
$_
[1];
}
$_
[0]{tag};
}
sub
fmt {
shift
->{fmt};
}
sub
stringify {
my
$self
=
shift
;
my
$type
=
$self
->type;
my
$string
=
$self
->string;
if
(
$type
eq
'text'
) {
if
(
$self
->is_latex) {
$string
=
$self
->escape_tex(
$string
);
$string
=
$self
->_ltx_replace_ldots(
$string
);
$string
=
$self
->_ltx_replace_slash(
$string
);
return
$string
;
}
elsif
(
$self
->is_html) {
if
(
$self
->lang eq
'fr'
) {
$string
=
$self
->_html_french_punctuation(
$string
);
$string
=
$self
->escape_all_html(
$string
);
$string
=~ s/\x{a0}/&
$string
=~ s/\x{202f}/&
return
$string
;
}
else
{
return
$self
->escape_all_html(
$string
);
}
}
else
{
die
"Not reached"
;
}
}
if
(
$type
eq
'safe'
) {
return
$self
->verbatim_string(
$string
);
}
if
(
$type
eq
'verbatim'
) {
if
(
$string
=~ /\A<verbatim>(.*)<\/verbatim>\z/s) {
$string
= $1;
return
$self
->verbatim_string(
$string
);
}
else
{
die
"<$string> doesn't match verbatim!"
;
}
}
elsif
(
$type
eq
'anchor'
) {
my
$anchor
=
$string
;
$anchor
=~ s/[^A-Za-z0-9-]//g;
die
"Bad anchor "
.
$string
unless
length
(
$anchor
);
if
(
$self
->is_latex) {
return
"\\hyperdef{amuse}{$anchor}{}\%\n"
;
}
elsif
(
$self
->is_html) {
return
qq{<a id="text-amuse-label-$anchor" class="text-amuse-internal-anchor"><\/a>\n}
}
else
{
die
"Not reached"
;
}
}
elsif
(
$type
eq
'open'
or
$type
eq
'close'
) {
my
$out
=
$self
->_markup_table->{
$self
->tag}->{
$type
}->{
$self
->fmt};
die
"Missing markup for $self->fmt $type $self->tag"
unless
$out
;
return
$out
;
}
elsif
(
$type
eq
'nobreakspace'
) {
if
(
$self
->is_latex) {
return
'~'
;
}
elsif
(
$self
->is_html) {
return
' '
}
}
elsif
(
$type
eq
'noindent'
) {
if
(
$self
->is_latex) {
return
"\\noindent "
;
}
else
{
my
$leading
=
''
;
if
(
$string
=~ m/\A(\s+)/) {
$leading
= $1;
}
return
"$leading<br />"
;
}
}
elsif
(
$type
eq
'br'
) {
if
(
$self
->is_latex) {
return
"\\forcelinebreak "
;
}
else
{
my
$leading
=
''
;
if
(
$string
=~ m/\A(\s+)/) {
$leading
= $1;
}
return
"$leading<br />"
;
}
}
elsif
(
$type
eq
'bigskip'
) {
if
(
$self
->is_latex) {
return
"\n\\bigskip"
;
}
else
{
my
$leading
=
''
;
if
(
$string
=~ m/\A(\s+)/) {
$leading
= $1;
}
return
"$leading<br />"
;
}
}
elsif
(
$type
eq
'verbatim_code'
) {
warn
qq{<code> is already verbatim! in "$string"\n}
if
$string
=~ m{<verbatim>.+</verbatim>};
if
(
$string
=~ /\A=(.+)=\z/s) {
$string
= $1;
}
elsif
(
$string
=~ /\A<code><verbatim>(.*)<\/verbatim><\/code>\z/s) {
$string
= $1;
}
elsif
(
$string
=~ /\A<code>(.*)<\/code>\z/s) {
$string
= $1;
}
else
{
die
"$string doesn't match the <code> pattern!"
;
}
if
(
length
$string
) {
return
$self
->_markup_table->{code}->{
open
}->{
$self
->fmt}
.
$self
->verbatim_string(
$string
)
.
$self
->_markup_table->{code}->{
close
}->{
$self
->fmt};
}
else
{
return
''
;
}
}
else
{
die
"Unrecognized type "
.
$type
.
" for "
.
$string
;
}
}
sub
_markup_table {
return
{
'rtl'
=> {
open
=> {
html
=>
'<span dir="rtl">'
,
ltx
=>
"\\RL{"
,
},
close
=> {
html
=>
'</span>‎'
, # LRM (U+200E LEFT-TO-RIGHT MARK)
ltx
=>
'}'
,
},
},
'ltr'
=> {
open
=> {
html
=>
'<span dir="ltr">'
,
ltx
=>
"\\LR{"
,
},
close
=> {
html
=>
'</span>‏'
, # RLM (U+200F RIGHT-TO-LEFT MARK)
ltx
=>
'}'
,
},
},
'em'
=> {
open
=> {
html
=>
'<em>'
,
ltx
=>
"\\emph{"
},
close
=> {
html
=>
'</em>'
,
ltx
=>
'}'
,
}
},
'strong'
=> {
open
=> {
html
=>
'<strong>'
,
ltx
=>
"\\textbf{"
},
close
=> {
html
=>
'</strong>'
,
ltx
=>
'}'
,
}
},
'code'
=> {
open
=> {
html
=>
'<code>'
,
ltx
=>
"\\texttt{"
,
},
close
=> {
html
=>
'</code>'
,
ltx
=>
'}'
,
}
},
'strike'
=> {
open
=> {
html
=>
'<strike>'
,
ltx
=>
"\\sout{"
},
close
=> {
html
=>
'</strike>'
,
ltx
=>
'}'
,
}
},
'del'
=> {
open
=> {
html
=>
'<del>'
,
ltx
=>
"\\sout{"
},
close
=> {
html
=>
'</del>'
,
ltx
=>
'}'
,
}
},
'sup'
=> {
open
=> {
html
=>
'<sup>'
,
ltx
=>
"\\textsuperscript{"
},
close
=> {
html
=>
'</sup>'
,
ltx
=>
'}'
,
}
},
'sub'
=> {
open
=> {
html
=>
'<sub>'
,
ltx
=>
"\\textsubscript{"
},
close
=> {
html
=>
'</sub>'
,
ltx
=>
'}'
,
}
},
};
}
sub
_ltx_replace_ldots {
my
(
$self
,
$string
) =
@_
;
my
$ldots
=
"\\dots{}"
;
$string
=~ s/\.{3,4}/
$ldots
/g ;
$string
=~ s/\x{2026}/
$ldots
/g;
return
$string
;
}
sub
_ltx_replace_slash {
my
(
$self
,
$string
) =
@_
;
$string
=~ s!/!\\Slash{}!g;
return
$string
;
}
sub
_html_french_punctuation {
my
(
$self
,
$string
) =
@_
;
my
$chars
=
qr{[\x{20}
\x{a0}\x{202f}\(\)\[\]\.\,\:«»\;\!\?]};
my
$ws
=
qr{[\x{20}
\x{a0}\x{202f}]};
$string
=~ s/
$ws
*([;!?])(?=
$chars
)/\x{202f}$1/gs;
$string
=~ s/
$ws
*([;!?])$/\x{202f}$1/gms;
$string
=~ s/
$ws
*([:»])(?=
$chars
)/\x{a0}$1/gs;
$string
=~ s/
$ws
*([:»])$/\x{a0}$1/gms;
$string
=~ s/^«
$ws
*/«\x{a0}/gms;
$string
=~ s/(?<=
$chars
)«
$ws
*/«\x{a0}/gs;
return
$string
;
}
sub
escape_all_html {
my
(
$self
,
$string
) =
@_
;
$string
=~ s/&/
&
;/g;
$string
=~ s/</
<
;/g;
$string
=~ s/>/
>
;/g;
$string
=~ s/"/
"
;/g;
$string
=~ s/'/&
return
$string
;
}
sub
escape_tex {
my
(
$self
,
$string
) =
@_
;
$string
=~ s/\\/\\textbackslash{}/g;
$string
=~ s/
$string
=~ s/\$/\\\$/g;
$string
=~ s/%/\\%/g;
$string
=~ s/&/\\&/g;
$string
=~ s/_/\\_/g ;
$string
=~ s/\{/\\{/g ;
$string
=~ s/\}/\\}/g ;
$string
=~ s/\\textbackslash\\\{\\\}/\\textbackslash{}/g;
$string
=~ s/~/\\textasciitilde{}/g ;
$string
=~ s/\^/\\^{}/g ;
$string
=~ s/\|/\\textbar{}/g;
return
$string
;
}
sub
is_latex {
shift
->fmt eq
'ltx'
;
}
sub
is_html {
shift
->fmt eq
'html'
;
}
sub
unroll {
my
$self
=
shift
;
my
@new
;
my
%map
= (
'='
=> [
qw/code/
],
'*'
=> [
qw/em/
],
'**'
=> [
qw/strong/
],
'***'
=> [
qw/strong em/
],
);
if
(
$self
->type eq
'open_inline'
) {
push
@new
,
map
{ +{
type
=>
'open'
,
tag
=>
$_
} } @{
$map
{
$self
->tag}};
}
elsif
(
$self
->type eq
'close_inline'
) {
push
@new
,
map
{ +{
type
=>
'close'
,
tag
=>
$_
} }
reverse
@{
$map
{
$self
->tag}};
}
else
{
die
"unroll can be called only on close_inline/open_inline, not "
.
$self
->type .
" "
.
$self
->string;
}
return
map
{ __PACKAGE__->new(
%$_
,
string
=>
''
,
fmt
=>
$self
->fmt) }
@new
;
}
sub
verbatim_string {
my
(
$self
,
$string
) =
@_
;
if
(
$self
->is_latex) {
return
$self
->escape_tex(
$string
);
}
elsif
(
$self
->is_html) {
return
$self
->escape_all_html(
$string
);
}
else
{
die
"Not reached"
;
}
}
1;