argra****@users*****
argra****@users*****
2011年 9月 10日 (土) 05:38:41 JST
Index: docs/modules/charnames-1.07/charnames.pod diff -u /dev/null docs/modules/charnames-1.07/charnames.pod:1.1 --- /dev/null Sat Sep 10 05:38:41 2011 +++ docs/modules/charnames-1.07/charnames.pod Sat Sep 10 05:38:41 2011 @@ -0,0 +1,519 @@ + +=encoding euc-jp + +=head1 NAME + +=begin original + +charnames - define character names for C<\N{named}> string literal escapes + +=end original + +charnames - C<\N{named}> 文字列リテラルエスケープのための文字名を定義する + +=head1 SYNOPSIS + + use charnames ':full'; + print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n"; + + use charnames ':short'; + print "\N{greek:Sigma} is an upper-case sigma.\n"; + + use charnames qw(cyrillic greek); + print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n"; + + use charnames ":full", ":alias" => { + e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE", + }; + print "\N{e_ACUTE} is a small letter e with an acute.\n"; + + use charnames (); + print charnames::viacode(0x1234); # prints "ETHIOPIC SYLLABLE SEE" + printf "%04X", charnames::vianame("GOTHIC LETTER AHSA"); # prints "10330" + +=head1 DESCRIPTION + +=begin original + +Pragma C<use charnames> supports arguments C<:full>, C<:short>, script +names and customized aliases. If C<:full> is present, for expansion of +C<\N{CHARNAME}>, the string C<CHARNAME> is first looked up in the list of +standard Unicode character names. If C<:short> is present, and +C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up +as a letter in script C<SCRIPT>. If pragma C<use charnames> is used +with script name arguments, then for C<\N{CHARNAME}> the name +C<CHARNAME> is looked up as a letter in the given scripts (in the +specified order). Customized aliases are explained in L</CUSTOM ALIASES>. + +=end original + +Pragma C<use charnames> supports arguments C<:full>, C<:short>, script +names and customized aliases. If C<:full> is present, for expansion of +C<\N{CHARNAME}>, the string C<CHARNAME> is first looked up in the list of +standard Unicode character names. If C<:short> is present, and +C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up +as a letter in script C<SCRIPT>. If pragma C<use charnames> is used +with script name arguments, then for C<\N{CHARNAME}> the name +C<CHARNAME> is looked up as a letter in the given scripts (in the +specified order). Customized aliases are explained in L</CUSTOM ALIASES>. +(TBT) + +=begin original + +For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME> +this pragma looks for the names + +=end original + +For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME> +this pragma looks for the names +(TBT) + + SCRIPTNAME CAPITAL LETTER CHARNAME + SCRIPTNAME SMALL LETTER CHARNAME + SCRIPTNAME LETTER CHARNAME + +=begin original + +in the table of standard Unicode names. If C<CHARNAME> is lowercase, +then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant +is ignored. + +=end original + +in the table of standard Unicode names. If C<CHARNAME> is lowercase, +then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant +is ignored. +(TBT) + +=begin original + +Note that C<\N{...}> is compile-time, it's a special form of string +constant used inside double-quoted strings: in other words, you cannot +use variables inside the C<\N{...}>. If you want similar run-time +functionality, use charnames::vianame(). + +=end original + +Note that C<\N{...}> is compile-time, it's a special form of string +constant used inside double-quoted strings: in other words, you cannot +use variables inside the C<\N{...}>. If you want similar run-time +functionality, use charnames::vianame(). +(TBT) + +=begin original + +For the C0 and C1 control characters (U+0000..U+001F, U+0080..U+009F) +as of Unicode 3.1, there are no official Unicode names but you can use +instead the ISO 6429 names (LINE FEED, ESCAPE, and so forth). In +Unicode 3.2 (as of Perl 5.8) some naming changes take place ISO 6429 +has been updated, see L</ALIASES>. Also note that the U+UU80, U+0081, +U+0084, and U+0099 do not have names even in ISO 6429. + +=end original + +For the C0 and C1 control characters (U+0000..U+001F, U+0080..U+009F) +as of Unicode 3.1, there are no official Unicode names but you can use +instead the ISO 6429 names (LINE FEED, ESCAPE, and so forth). In +Unicode 3.2 (as of Perl 5.8) some naming changes take place ISO 6429 +has been updated, see L</ALIASES>. Also note that the U+UU80, U+0081, +U+0084, and U+0099 do not have names even in ISO 6429. +(TBT) + +=begin original + +Since the Unicode standard uses "U+HHHH", so can you: "\N{U+263a}" +is the Unicode smiley face, or "\N{WHITE SMILING FACE}". + +=end original + +Since the Unicode standard uses "U+HHHH", so can you: "\N{U+263a}" +is the Unicode smiley face, or "\N{WHITE SMILING FACE}". +(TBT) + +=head1 ALIASES + +(別名) + +=begin original + +A few aliases have been defined for convenience: instead of having +to use the official names + +=end original + +A few aliases have been defined for convenience: instead of having +to use the official names +(TBT) + + LINE FEED (LF) + FORM FEED (FF) + CARRIAGE RETURN (CR) + NEXT LINE (NEL) + +=begin original + +(yes, with parentheses) one can use + +=end original + +(yes, with parentheses) one can use +(TBT) + + LINE FEED + FORM FEED + CARRIAGE RETURN + NEXT LINE + LF + FF + CR + NEL + +=begin original + +One can also use + +=end original + +One can also use +(TBT) + + BYTE ORDER MARK + BOM + +=begin original + +and + +=end original + +and +(TBT) + + ZWNJ + ZWJ + +=begin original + +for ZERO WIDTH NON-JOINER and ZERO WIDTH JOINER. + +=end original + +for ZERO WIDTH NON-JOINER and ZERO WIDTH JOINER. +(TBT) + +=begin original + +For backward compatibility one can use the old names for +certain C0 and C1 controls + +=end original + +For backward compatibility one can use the old names for +certain C0 and C1 controls +(TBT) + + old new + + HORIZONTAL TABULATION CHARACTER TABULATION + VERTICAL TABULATION LINE TABULATION + FILE SEPARATOR INFORMATION SEPARATOR FOUR + GROUP SEPARATOR INFORMATION SEPARATOR THREE + RECORD SEPARATOR INFORMATION SEPARATOR TWO + UNIT SEPARATOR INFORMATION SEPARATOR ONE + PARTIAL LINE DOWN PARTIAL LINE FORWARD + PARTIAL LINE UP PARTIAL LINE BACKWARD + +=begin original + +but the old names in addition to giving the character +will also give a warning about being deprecated. + +=end original + +but the old names in addition to giving the character +will also give a warning about being deprecated. +(TBT) + +=head1 CUSTOM ALIASES + +(カスタム別名) + +=begin original + +This version of charnames supports three mechanisms of adding local +or customized aliases to standard Unicode naming conventions (:full) + +=end original + +This version of charnames supports three mechanisms of adding local +or customized aliases to standard Unicode naming conventions (:full) +(TBT) + +=head2 Anonymous hashes + +(無名ハッシュ) + + use charnames ":full", ":alias" => { + e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE", + }; + my $str = "\N{e_ACUTE}"; + +=head2 Alias file + +(別名ファイルファイル) + + use charnames ":full", ":alias" => "pro"; + +=begin original + + will try to read "unicore/pro_alias.pl" from the @INC path. This + file should return a list in plain perl: + +=end original + +will try to read "unicore/pro_alias.pl" from the @INC path. This +file should return a list in plain perl: +(TBT) + + ( + A_GRAVE => "LATIN CAPITAL LETTER A WITH GRAVE", + A_CIRCUM => "LATIN CAPITAL LETTER A WITH CIRCUMFLEX", + A_DIAERES => "LATIN CAPITAL LETTER A WITH DIAERESIS", + A_TILDE => "LATIN CAPITAL LETTER A WITH TILDE", + A_BREVE => "LATIN CAPITAL LETTER A WITH BREVE", + A_RING => "LATIN CAPITAL LETTER A WITH RING ABOVE", + A_MACRON => "LATIN CAPITAL LETTER A WITH MACRON", + ); + +=head2 Alias shortcut + +(別名ショートカット) + + use charnames ":alias" => ":pro"; + +=begin original + + works exactly the same as the alias pairs, only this time, + ":full" is inserted automatically as first argument (if no + other argument is given). + +=end original + +works exactly the same as the alias pairs, only this time, +":full" is inserted automatically as first argument (if no +other argument is given). +(TBT) + +=head1 charnames::viacode(code) + +=begin original + +Returns the full name of the character indicated by the numeric code. +The example + +=end original + +Returns the full name of the character indicated by the numeric code. +The example +(TBT) + + print charnames::viacode(0x2722); + +=begin original + +prints "FOUR TEARDROP-SPOKED ASTERISK". + +=end original + +prints "FOUR TEARDROP-SPOKED ASTERISK". +(TBT) + +=begin original + +Returns undef if no name is known for the code. + +=end original + +Returns undef if no name is known for the code. +(TBT) + +=begin original + +This works only for the standard names, and does not yet apply +to custom translators. + +=end original + +This works only for the standard names, and does not yet apply +to custom translators. +(TBT) + +=begin original + +Notice that the name returned for of U+FEFF is "ZERO WIDTH NO-BREAK +SPACE", not "BYTE ORDER MARK". + +=end original + +Notice that the name returned for of U+FEFF is "ZERO WIDTH NO-BREAK +SPACE", not "BYTE ORDER MARK". +(TBT) + +=head1 charnames::vianame(name) + +=begin original + +Returns the code point indicated by the name. +The example + +=end original + +Returns the code point indicated by the name. +The example +(TBT) + + printf "%04X", charnames::vianame("FOUR TEARDROP-SPOKED ASTERISK"); + +=begin original + +prints "2722". + +=end original + +prints "2722". +(TBT) + +=begin original + +Returns undef if the name is unknown. + +=end original + +Returns undef if the name is unknown. +(TBT) + +=begin original + +This works only for the standard names, and does not yet apply +to custom translators. + +=end original + +This works only for the standard names, and does not yet apply +to custom translators. +(TBT) + +=head1 CUSTOM TRANSLATORS + +(カスタムトランスレータ) + +=begin original + +The mechanism of translation of C<\N{...}> escapes is general and not +hardwired into F<charnames.pm>. A module can install custom +translations (inside the scope which C<use>s the module) with the +following magic incantation: + +=end original + +The mechanism of translation of C<\N{...}> escapes is general and not +hardwired into F<charnames.pm>. A module can install custom +translations (inside the scope which C<use>s the module) with the +following magic incantation: +(TBT) + + sub import { + shift; + $^H{charnames} = \&translator; + } + +=begin original + +Here translator() is a subroutine which takes C<CHARNAME> as an +argument, and returns text to insert into the string instead of the +C<\N{CHARNAME}> escape. Since the text to insert should be different +in C<bytes> mode and out of it, the function should check the current +state of C<bytes>-flag as in: + +=end original + +Here translator() is a subroutine which takes C<CHARNAME> as an +argument, and returns text to insert into the string instead of the +C<\N{CHARNAME}> escape. Since the text to insert should be different +in C<bytes> mode and out of it, the function should check the current +state of C<bytes>-flag as in: +(TBT) + + use bytes (); # for $bytes::hint_bits + sub translator { + if ($^H & $bytes::hint_bits) { + return bytes_translator(@_); + } + else { + return utf8_translator(@_); + } + } + +=head1 ILLEGAL CHARACTERS + +(不正な文字) + +=begin original + +If you ask by name for a character that does not exist, a warning is +given and the Unicode I<replacement character> "\x{FFFD}" is returned. + +=end original + +If you ask by name for a character that does not exist, a warning is +given and the Unicode I<replacement character> "\x{FFFD}" is returned. +(TBT) + +=begin original + +If you ask by code for a character that does not exist, no warning is +given and C<undef> is returned. (Though if you ask for a code point +past U+10FFFF you do get a warning.) + +=end original + +If you ask by code for a character that does not exist, no warning is +given and C<undef> is returned. (Though if you ask for a code point +past U+10FFFF you do get a warning.) +(TBT) + +=head1 BUGS + +=begin original + +Unicode standard named sequences are not recognized, such as +C<LATIN CAPITAL LETTER A WITH MACRON AND GRAVE> +(which should mean C<LATIN CAPITAL LETTER A WITH MACRON> with an additional +C<COMBINING GRAVE ACCENT>). + +=end original + +Unicode standard named sequences are not recognized, such as +C<LATIN CAPITAL LETTER A WITH MACRON AND GRAVE> +(which should mean C<LATIN CAPITAL LETTER A WITH MACRON> with an additional +C<COMBINING GRAVE ACCENT>). +(TBT) + +=begin original + +Since evaluation of the translation function happens in a middle of +compilation (of a string literal), the translation function should not +do any C<eval>s or C<require>s. This restriction should be lifted in +a future version of Perl. + +=end original + +Since evaluation of the translation function happens in a middle of +compilation (of a string literal), the translation function should not +do any C<eval>s or C<require>s. This restriction should be lifted in +a future version of Perl. +(TBT) + +=cut +