=head1 NAME (–¼‘O)

ShiftJIS::CP932::MapUTF - Microsoft CP-932‚ÆUnicode‚Ƃ̕ϊ·

=head1 ŠT—v

    use ShiftJIS::CP932::MapUTF qw(:all);

    $utf8_string  = cp932_to_utf8($cp932_string);
    $cp932_string = utf8_to_cp932($utf8_string);

=head1 à–¾

ƒ}ƒCƒNƒƒ\ƒtƒgƒEƒBƒ“ƒhƒEƒY (Microsoft Windows) ƒR[ƒhƒy[ƒW 932 (CP-932)
‚̃e[ƒuƒ‹‚Í 7915 •¶Žš‚©‚ç‚È‚è‚Ü‚·B

    JIS X 0201 ˆêƒoƒCƒg•¶Žši191 •¶Žšj
    JIS X 0208 “ñƒoƒCƒg•¶Žši6879 •¶Žšj
    NEC“ÁŽê•¶Žši83 •¶ŽšA13‹æj
    NEC‘I’èIBMŠg’£•¶Žši374 •¶ŽšA89`92‹æj
    IBMŠg’£•¶Žši388 •¶ŽšA115`119‹æj

‚±‚Ì•\‚́A‰•œ•ÏŠ·‚Å‚«‚È‚¢“ñd’è‹`•¶Žš‚ðŠÜ‚ñ‚Å‚¢‚Ü‚·B
‚±‚ê‚ç‚Ì“ñd’è‹`•¶Žš‚̓xƒ“ƒ_[iNEC ‚¨‚æ‚Ñ IBMj’è‹`‚ÌŠg’£•¶Žš‚Ì‚½‚߂ł·B
—Ⴆ‚΁AUnicode ‚Ì C<U+2252> ‚ɑΉž•t‚¯‚ç‚ê‚é•¶Žš‚Í“ñŒÂ‚ ‚è‚Ü‚·B
‚‚܂èAJIS X 0208 •¶Žš‚Ì C<0x81e0> ‚Æ NEC “ÁŽê•¶Žš‚Ì C<0x8790> ‚Å‚·B

ŽÀÛACP-932 ‚Ì 7915 •¶Žš‚ð Unicode ‚Ì 7517 •¶Žš‚ɑΉž•t‚¯‚È‚¯‚ê‚΂Ȃè‚Ü‚¹‚ñB
‚±‚Ì‚½‚߁A398 ‚̉•œ•ÏŠ·‚Å‚«‚È‚¢‘ΉžŠÖŒW‚ª‘¶Ý‚µ‚Ü‚·B

‚±‚̃‚ƒWƒ…[ƒ‹‚́ACP-932 ‚©‚ç Unicode ‚ɁA‚Ü‚½A
Unicode ‚©‚ç CP-932 ‚ɁA“KØ‚ɕϊ·‚·‚éŠÖ”‚ð’ñ‹Ÿ‚µ‚Ü‚·B

=head2 CP-932 ‚©‚ç Unicode ‚ւ̕ϊ·

‘æˆêˆø”‚ªƒŒƒtƒ@ƒŒƒ“ƒX‚̏ꍇA‚»‚ê‚Í C<SJIS_CALLBACK> ‚Æ‚µ‚āA
Unicode ‚ւ̑Ήž‚ª‚È‚¢ CP-932 •¶Žš‚̏ˆ—‚É—p‚¢‚ç‚ê‚Ü‚·B
(C<STRING> ‚ɃŒƒtƒ@ƒŒƒ“ƒX‚ð—^‚¦‚邱‚Ƃ͂ł«‚Ü‚¹‚ñB)

C<SJIS_CALLBACK> ‚ª—^‚¦‚ç‚ê‚Ä‚¢‚éê‡A
‘æ“ñˆø”‚ª C<STRING> ‚Æ‚µ‚Ä—p‚¢‚ç‚ê‚Ü‚·B
‚³‚à‚È‚¯‚ê‚Αæˆêˆø”‚ª C<STRING> ‚ɂȂè‚Ü‚·B

‚à‚µ C<SJIS_CALLBACK> ‚ª—^‚¦‚ç‚ê‚Ä‚¢‚È‚¢ê‡A
Unicode ‚ւ̑Ήž‚ª‚È‚¢ CP-932 •¶Žš‚Í–Ù‚Á‚č폜‚³‚êA
•”•ª•¶Žš‚͈êƒoƒCƒg•ª’µ‚΂³‚ê‚Ü‚·B
C<SJIS_CALLBACK> ‚Æ‚µ‚āAí‚ɋ󕶎š—ñ‚ð•Ô‚·
ƒR[ƒhƒŠƒtƒ@ƒŒƒ“ƒX (C<sub {''}>) ‚ª“n‚³‚ꂽ‚©‚̂悤‚É“®ì‚µ‚Ü‚·B

¡‚̂Ƃ±‚ëAC<SJIS_CALLBACK> ‚Æ‚µ‚ẮA
ƒR[ƒhƒŠƒtƒ@ƒŒƒ“ƒX‚݂̂ªŽg‚¦‚Ü‚·B
ƒR[ƒhƒŠƒtƒ@ƒŒƒ“ƒX‚̕Ԃè’l‚ªƒ}ƒbƒsƒ“ƒO‚̂Ȃ¢•¶Žš‚Ì‘ã‚í‚è‚É‘}“ü‚³‚ê‚Ü‚·B

ƒR[ƒhƒŠƒtƒ@ƒŒƒ“ƒX C<SJIS_CALLBACK> ‚́AˆêŒÂˆÈã‚̈ø”‚ƂƂà‚É
ŒÄ‚яo‚³‚ê‚Ü‚·Bƒ}ƒbƒsƒ“ƒO‚̂Ȃ¢•¶Žš‚ª•”•ª“I‚È“ñƒoƒCƒg•¶Žš
i‘æˆêƒoƒCƒg‚݂̂̈êƒoƒCƒg’·‚Ì•¶Žš—ñj‚̏ꍇA
‘æˆêˆø”‚Í–¢’è‹`’liC<undef>j‚ɂȂèA
‘æ“ñˆø”‚̓oƒCƒg‚ð•\‚·•„†‚È‚µ®”’l‚ɂȂè‚Ü‚·B
•”•ª•¶Žš‚łȂ¯‚ê‚΁A‘æˆêˆø”‚́A•¶Žš‚ð•\‚·•¶Žš—ñ‚ɂȂè‚Ü‚·B

ƒfƒtƒHƒ‹ƒg‚ł́A•”•ª“I‚È“ñƒoƒCƒg•¶Žš‚́A•¶Žš—ñiC<STRING>j‚Ì––”ö‚ɂ̂Ý
Œ»‚ê‚é‰Â”\«‚ª‚ ‚èA•¶Žš—ñ‚̐擪‚â“r’†‚ɂ͌»‚ê‚Ü‚¹‚ñ
iC<SJIS_OPTION> ‚Ì C<'t'> ‚àŽQÆ‚Ì‚±‚ƁjB

—á

    my $sjis_callback = sub {
        my ($char, $byte) = @_;
        return function($char) if defined $char;
        die sprintf "found partial byte 0x%02x", $byte;
    };

ã‹L‚Ì—á‚ŁAC<$char> ‚Æ‚µ‚ẮAC<"\x80">, C<"\x82\xf2">, C<"\xfc\xfc">,
C<"\xff"> ‚Ȃǂª‚ ‚蓾‚Ü‚·B

C<SJIS_CALLBACK> ‚̕Ԃè’l‚́A•ÏŠ·æ‚ÌŒ`Ž®‚ɍ‡‚킹‚È‚¯‚ê‚΂Ȃè‚Ü‚¹‚ñB
—Ⴆ‚΁AC<cp932_to_utf16be()> ‚ƂƂà‚É UTF-8 ‚ð•Ô‚·
C<SJIS_CALLBACK> ‚ðŽg‚Á‚Ă͂¢‚¯‚Ü‚¹‚ñB
‚‚܂èAUTF ‚²‚ƂɁAC<SJIS_CALLBACK> ‚ð—pˆÓ‚·‚é•K—v‚ª‚ ‚è‚Ü‚·B

C<SJIS_OPTION> ‚ð C<STRING> ‚ÌŒã‚É‚¨‚­‚±‚Æ‚ª‚Å‚«‚Ü‚·B
‚±‚ê‚ç‚Í C<'tg'> ‚â C<'gst'> ‚̂悤‚É‘g‚ݍ‡‚킹‚邱‚Æ‚à
‚Å‚«‚Ü‚·i‡˜‚Í”CˆÓ‚Å‚·jB

    'g'    CP-932 ŠOŽšiƒ†[ƒU’è‹`•¶Žšj[0xF040`0xF9FC (95`114‹æ)] ‚ð
           Unicode ‚Ì PUA [0xE000`0xE757] ‚ɕϊ·‚µ‚Ü‚·i1880 •¶ŽšjB

    's'    CP-932 –¢’è‹`‚̈êƒoƒCƒg•¶Žš‚ðˆÈ‰º‚̂悤‚ɕϊ·‚µ‚Ü‚·B
           0x80 => U+0080,  0xA0 => U+F8F0,
           0xFD => U+F8F1,  0xFE => U+F8F2,  0xFF => U+F8F3.

    't'    ‘æ“ñƒoƒCƒg‚Ì”ÍˆÍ [0x40..0x7E, 0x80..0xFC] ‚ðƒ`ƒFƒbƒN‚µ‚Ü‚·B
           —Ⴆ‚Î "\x81\x39" ‚̓fƒtƒHƒ‹ƒg‚ł͖¢’è‹`‚Ì“ñƒoƒCƒg•¶Žš‚Æ
           ‚݂Ȃµ‚Ü‚·‚ªA't' ‚ð—p‚¢‚邯A•”•ª•¶ŽšƒoƒCƒg 0x81 ‚ÌŒã‚É
           ˆêƒoƒCƒg•¶Žš "\x39" ‚ª‘±‚¢‚½‚à‚̂Ƃ݂Ȃµ‚Ü‚·B

=over 4

=item C<cp932_to_utf8([SJIS_CALLBACK,] STRING [, SJIS_OPTION])>

CP-932 ‚ð UTF-8 ‚ɕϊ·‚µ‚Ü‚·B

=item C<cp932_to_unicode([SJIS_CALLBACK,] STRING [, SJIS_OPTION])>

CP-932 ‚ð Unicode ‚ɕϊ·‚µ‚Ü‚·B
iC<SVf_UTF8> ƒtƒ‰ƒO•t‚«‚Ì Perl‚Ì“à•”Œ`Ž®, F<perlunicode> ‚ðŽQÆBj

B<‚±‚̊֐”‚Í Perl 5.6.1 ˆÈ~A‚©‚ XS ”łł̂ݒñ‹Ÿ‚³‚ê‚Ü‚·B>

=item C<cp932_to_utf16le([SJIS_CALLBACK,] STRING [, SJIS_OPTION])>

CP-932 ‚ð UTF-16LE ‚ɕϊ·‚µ‚Ü‚·B

=item C<cp932_to_utf16be([SJIS_CALLBACK,] STRING [, SJIS_OPTION])>

CP-932 ‚ð UTF-16BE ‚ɕϊ·‚µ‚Ü‚·B

=item C<cp932_to_utf32le([SJIS_CALLBACK,] STRING [, SJIS_OPTION])>

CP-932 ‚ð UTF-32LE ‚ɕϊ·‚µ‚Ü‚·B

=item C<cp932_to_utf32be([SJIS_CALLBACK,] STRING [, SJIS_OPTION])>

CP-932 ‚ð UTF-32BE ‚ɕϊ·‚µ‚Ü‚·B

=back

=head2 Unicode ‚©‚ç CP-932 ‚ւ̕ϊ·

“ñd’è‹`•¶Žš‚Í‚·‚ׂāAMicrosoft PRB Q170559 ‚ɏ]‚Á‚ĕϊ·‚³‚ê‚Ü‚·B
—Ⴆ‚Î C<U+2252> ‚Í C<"\x87\x90"> ‚ł͂Ȃ­ C<"\x81\xE0"> ‚ɕϊ·‚³‚ê‚Ü‚·B

‘æˆêˆø”‚ªƒŒƒtƒ@ƒŒƒ“ƒX‚̏ꍇA‚»‚ê‚Í C<UNICODE_CALLBACK> ‚Æ‚µ‚āA
CP-932 ‚ւ̑Ήž‚ª‚È‚¢ Unicode •¶Žš‚̏ˆ—‚É—p‚¢‚ç‚ê‚Ü‚·B
(C<STRING> ‚ɃŒƒtƒ@ƒŒƒ“ƒX‚ð—^‚¦‚邱‚Ƃ͂ł«‚Ü‚¹‚ñB)

C<UNICODE_CALLBACK> ‚ª—^‚¦‚ç‚ê‚Ä‚¢‚éê‡A
‘æ“ñˆø”‚ª C<STRING> ‚Æ‚µ‚Ä—p‚¢‚ç‚ê‚Ü‚·B
‚³‚à‚È‚¯‚ê‚Αæˆêˆø”‚ª C<STRING> ‚ɂȂè‚Ü‚·B

‚à‚µ C<UNICODE_CALLBACK> ‚ª—^‚¦‚ç‚ê‚Ä‚¢‚È‚¢ê‡A
CP-932 ‚ւ̑Ήž‚ª‚È‚¢ Unicode •¶Žš‚Í–Ù‚Á‚č폜‚³‚êA
‚Ü‚½A•”•ª•¶Žš‚͈êƒoƒCƒg•ª’µ‚΂³‚ê‚Ü‚·B
C<UNICODE_CALLBACK> ‚Æ‚µ‚āAí‚ɋ󕶎š—ñ‚ð•Ô‚·
ƒR[ƒhƒŠƒtƒ@ƒŒƒ“ƒX (C<sub {''}>) ‚ª“n‚³‚ꂽ‚©‚̂悤‚É“®ì‚µ‚Ü‚·B

¡‚̂Ƃ±‚ëAC<UNICODE_CALLBACK> ‚Æ‚µ‚ẮA
ƒR[ƒhƒŠƒtƒ@ƒŒƒ“ƒX‚݂̂ªŽg‚¦‚Ü‚·B
‚»‚̃R[ƒhƒŠƒtƒ@ƒŒƒ“ƒX‚̕Ԃè’l‚ª
ƒ}ƒbƒsƒ“ƒO‚̂Ȃ¢•¶Žš‚Ì‘ã‚í‚è‚É‘}“ü‚³‚ê‚Ü‚·B

ƒR[ƒhƒŠƒtƒ@ƒŒƒ“ƒX C<UNICODE_CALLBACK> ‚́A
ˆêŒÂˆÈã‚̈ø”‚ƂƂà‚ɌĂяo‚³‚ê‚Ü‚·Bƒ}ƒbƒsƒ“ƒO‚̂Ȃ¢•¶Žš‚ª
•”•ª“I•¶Žši•s³‚ȃoƒCƒgj‚̏ꍇA‘æˆêˆø”‚Í–¢’è‹`’liC<undef>j‚ɂȂèA
‘æ“ñˆø”‚̓oƒCƒg‚ð•\‚·•„†‚È‚µ®”’l‚ɂȂè‚Ü‚·B
•”•ª•¶Žš‚łȂ¯‚ê‚΁A‘æˆêˆø”‚́AUnicode•¶Žš‚Ì•„†ˆÊ’u‚ð•\‚·
•„†‚È‚µ®”’l‚ɂȂè‚Ü‚·B

—Ⴆ‚΁ACP-932 ‚ւ̑Ήž‚ª‚È‚¢•¶Žš‚ð HTML 4.01 ‚̐”’l•¶ŽšŽQÆ‚É
•ÏŠ·‚·‚é•û–@‚ðŽ¦‚µ‚Ü‚·B

    sub toHexNCR {
        my ($char, $byte) = @_;
        return sprintf("&#x%x;", $char) if defined $char;
        die sprintf "illegal byte 0x%02x was found", $byte;
    }

    $cp932 = utf8_to_cp932   (\&toHexNCR, $utf8_string);
    $cp932 = unicode_to_cp932(\&toHexNCR, $unicode_string);
    $cp932 = utf16le_to_cp932(\&toHexNCR, $utf16le_string);

C<UNICODE_CALLBACK> ‚̕Ԃè’l‚Í CP-932 ‚Æ‚µ‚Đ³‚µ‚­‚ ‚é•K—v‚ª‚ ‚è‚Ü‚·B

C<UNICODE_OPTION> ‚ð C<STRING> ‚ÌŒã‚É‚¨‚­‚±‚Æ‚ª‚Å‚«‚Ü‚·B
‚±‚ê‚ç‚Í C<'fg'> ‚â C<'gsf'> ‚̂悤‚É‘g‚ݍ‡‚킹‚邱‚Æ‚à
‚Å‚«‚Ü‚·i‡˜‚Í”CˆÓ‚Å‚·jB

    'g'    CP-932 ŠOŽšiƒ†[ƒU’è‹`•¶Žšj[0xF040`0xF9FC (95`114‹æ)] ‚É
           Unicode ‚Ì PUA [0xE000`0xE757] ‚©‚ç•ÏŠ·‚µ‚Ü‚·i1880 •¶ŽšjB

    's'    CP-932 –¢’è‹`‚̈êƒoƒCƒg•¶Žš‚̑Ήž•t‚¯‚ð’ljÁ‚µ‚Ü‚·B
           U+0080 => 0x80,  U+F8F0 => 0xA0,
           U+F8F1 => 0xFD,  U+F8F2 => 0xFE,  U+F8F3 => 0xFF.

    'f'    Unicode ‚©‚ç CP-932 ‚Ö‚ÌŠô‚‚©‚Ì‘ã—p“I‚ȕϊ· (fallbacks) ‚ð
           ’ljÁ‚µ‚Ü‚·Bƒ}ƒbƒsƒ“ƒO‚ª’ljÁ‚³‚ê‚é•¶Žš‚́Alatin-1 —̈æ
           [U+00A0..U+00FF] ‚Ì‚¤‚¿‚ÌŠô‚‚©‚Ì•¶Žš‚ƁA•½‰¼–¼‚̃” [U+3094,
           •Љ¼–¼‚̃” (0x8394) ‚ɂȂè‚Ü‚·] ‚Å‚·B

=over 4

=item C<utf8_to_cp932([UNICODE_CALLBACK,] STRING [, UNICODE_OPTION])>

UTF-8 ‚ð CP-932 ‚ɕϊ·‚µ‚Ü‚·B

=item C<unicode_to_cp932([UNICODE_CALLBACK,] STRING [, UNICODE_OPTION])>

Unicode ‚ð CP-932 ‚ɕϊ·‚µ‚Ü‚·B

‚±‚Ì B<Unicode> ‚́APerl ‚Ì“à•”Œ`Ž®iF<perlunicode> ŽQÆjB
C<SVf_UTF8> ƒtƒ‰ƒO•t‚«‚łȂ¢ê‡AISO 8859-1 (latin1) •¶Žš—ñ‚Æ‚µ‚Ä
Unicode ‚É upgrade ‚³‚ê‚Ü‚·B

B<‚±‚̊֐”‚Í Perl 5.6.1 ˆÈ~A‚©‚ XS ”łł̂ݒñ‹Ÿ‚³‚ê‚Ü‚·B>

=item C<utf16_to_cp932([UNICODE_CALLBACK,] STRING [, UNICODE_OPTION])>

UTF-16 (C<BOM> •t‚«‚Ü‚½‚Í–³‚µ) ‚ð CP-932 ‚ɕϊ·‚µ‚Ü‚·B

=item C<utf16le_to_cp932([UNICODE_CALLBACK,] STRING [, UNICODE_OPTION])>

UTF-16LE ‚ð CP-932 ‚ɕϊ·‚µ‚Ü‚·B

=item C<utf16be_to_cp932([UNICODE_CALLBACK,] STRING [, UNICODE_OPTION])>

UTF-16BE ‚ð CP-932 ‚ɕϊ·‚µ‚Ü‚·B

=item C<utf32_to_cp932([UNICODE_CALLBACK,] STRING [, UNICODE_OPTION])>

UTF-32 (C<BOM> •t‚«‚Ü‚½‚Í–³‚µ) ‚ð CP-932 ‚ɕϊ·‚µ‚Ü‚·B

=item C<utf32le_to_cp932([UNICODE_CALLBACK,] STRING [, UNICODE_OPTION])>

UTF-32LE ‚ð CP-932 ‚ɕϊ·‚µ‚Ü‚·B

=item C<utf32be_to_cp932([UNICODE_CALLBACK,] STRING [, UNICODE_OPTION])>

UTF-32BE ‚ð CP-932 ‚ɕϊ·‚µ‚Ü‚·B

=back

=head2 —Ao

B<ƒfƒtƒHƒ‹ƒg:>

    cp932_to_utf8     utf8_to_cp932
    cp932_to_utf16le  utf16le_to_cp932
    cp932_to_utf16be  utf16be_to_cp932

    cp932_to_unicode  unicode_to_cp932 (XS ‚݂̂Œñ‹Ÿ‚³‚ê‚Ü‚·)

B<—v‹‚³‚ê‚ê‚Î:>

    cp932_to_utf32le  utf32le_to_cp932
    cp932_to_utf32be  utf32be_to_cp932
                      utf16_to_cp932 [*]
                      utf32_to_cp932 [*]

[*] ‚±‚ê‚ç‚Æ‘Ήž‚·‚ׂ« C<cp932_to_utf16()> ‚¨‚æ‚Ñ C<cp932_to_utf32()>
‚Í–¢ŽÀ‘•‚Å‚·B‚Ü‚¾ C<SJIS_CALLBACK> ‚̕Ԃè’l‚ɂ‚¢‚Ä‚à‚¤­‚µŒŸ“¢‚ª
•K—v‚ƍl‚¦‚Ä‚¢‚Ü‚·B
i•¶Žš—ñ‚̘AŒ‹‚É C<BOM> ‚Ì”Fޝ‚Əˆ—‚ª•K—v‚ƂȂé‚Å‚µ‚傤Bj

=head1 ’ˆÓŽ–€

‚±‚̃‚ƒWƒ…[ƒ‹‚Ì Pure Perl ”ł̓ƒCƒh•¶ŽšiF<perlunicode> ‚ðŽQÆj‚ð
—‰ð‚Å‚«‚Ü‚¹‚ñB•K—v‚È‚çAPerl 5.7 ˆÈ~‚Ì
C<utf8::decode>/C<utf8::encode>iF<utf8> ‚ðŽQÆj‚ðŽg‚Á‚Ä‚­‚¾‚³‚¢B

=head1 ìŽÒ

SADAHIRO Tomoyuki <SADAHIRO@cpan.org> i’åœA ’msj

Copyright(C) 2001-2006, SADAHIRO Tomoyuki. Japan. All rights reserved.

This module is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.

=head1 ŽQlŽ‘—¿

=over 4

=item Microsoft PRB, Article ID: Q170559

Conversion Problem Between Shift-JIS and Unicode

=item cp932 to Unicode table

http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT

http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit932.txt

http://www.microsoft.com/globaldev/reference/dbcs/932.htm

=back

=cut