Linux lhjmq-records 5.15.0-118-generic #128-Ubuntu SMP Fri Jul 5 09:28:59 UTC 2024 x86_64
Your IP : 3.145.32.238
#! /bin/false
# vim: set autoindent shiftwidth=4 tabstop=4:
# List of internally known conversions.
# Copyright (C) 2002-2016 Guido Flohr <guido.flohr@cantanea.com>,
# all rights reserved.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
package Locale::Recode::_Conversions;
use strict;
use integer;
use vars qw ($conversions $optional_conversions);
# These are the canonical names of the encodings always available.
$conversions = {
'ASMO_449' => 'ASMO_449',
'ATARI-ST-EURO' => 'ATARI_ST_EURO',
'ATARI-ST' => 'ATARI_ST',
'CP10007' => 'CP10007',
'CSN_369103' => 'CSN_369103',
'CWI' => 'CWI',
'DEC-MCS' => 'DEC_MCS',
'EBCDIC-AT-DE-A' => 'EBCDIC_AT_DE_A',
'EBCDIC-AT-DE' => 'EBCDIC_AT_DE',
'EBCDIC-CA-FR' => 'EBCDIC_CA_FR',
'EBCDIC-DK-NO-A' => 'EBCDIC_DK_NO_A',
'EBCDIC-DK-NO' => 'EBCDIC_DK_NO',
'EBCDIC-ES-A' => 'EBCDIC_ES_A',
'EBCDIC-ES-S' => 'EBCDIC_ES_S',
'EBCDIC-ES' => 'EBCDIC_ES',
'EBCDIC-FI-SE-A' => 'EBCDIC_FI_SE_A',
'EBCDIC-FI-SE' => 'EBCDIC_FI_SE',
'EBCDIC-FR' => 'EBCDIC_FR',
'EBCDIC-IS-FRISS' => 'EBCDIC_IS_FRISS',
'EBCDIC-IT' => 'EBCDIC_IT',
'EBCDIC-PT' => 'EBCDIC_PT',
'EBCDIC-UK' => 'EBCDIC_UK',
'EBCDIC-US' => 'EBCDIC_US',
'ECMA-CYRILLIC' => 'ECMA_CYRILLIC',
'GEORGIAN-ACADEMY' => 'GEORGIAN_ACADEMY',
'GEORGIAN-PS' => 'GEORGIAN_PS',
'GOST_19768-74' => 'GOST_19768_74',
'GREEK-CCITT' => 'GREEK_CCITT',
'GREEK7-OLD' => 'GREEK7_OLD',
'GREEK7' => 'GREEK7',
'HP-ROMAN8' => 'HP_ROMAN8',
'IBM037' => 'IBM037',
'IBM038' => 'IBM038',
'IBM1004' => 'IBM1004',
'IBM1026' => 'IBM1026',
'IBM1047' => 'IBM1047',
'IBM256' => 'IBM256',
'IBM273' => 'IBM273',
'IBM274' => 'IBM274',
'IBM275' => 'IBM275',
'IBM277' => 'IBM277',
'IBM278' => 'IBM278',
'IBM280' => 'IBM280',
'IBM281' => 'IBM281',
'IBM284' => 'IBM284',
'IBM285' => 'IBM285',
'IBM290' => 'IBM290',
'IBM297' => 'IBM297',
'IBM420' => 'IBM420',
'IBM423' => 'IBM423',
'IBM424' => 'IBM424',
'IBM437' => 'IBM437',
'IBM500' => 'IBM500',
'IBM850' => 'IBM850',
'IBM851' => 'IBM851',
'IBM852' => 'IBM852',
'IBM855' => 'IBM855',
'IBM857' => 'IBM857',
'IBM860' => 'IBM860',
'IBM861' => 'IBM861',
'IBM862' => 'IBM862',
'IBM863' => 'IBM863',
'IBM864' => 'IBM864',
'IBM865' => 'IBM865',
'IBM866' => 'IBM866',
'IBM868' => 'IBM868',
'IBM869' => 'IBM869',
'IBM870' => 'IBM870',
'IBM871' => 'IBM871',
'IBM874' => 'IBM874',
'IBM875' => 'IBM875',
'IBM880' => 'IBM880',
'IBM891' => 'IBM891',
'IBM903' => 'IBM903',
'IBM904' => 'IBM904',
'IBM905' => 'IBM905',
'IBM918' => 'IBM918',
'IEC_P27-1' => 'IEC_P27_1',
'INIS-8' => 'INIS_8',
'INIS-CYRILLIC' => 'INIS_CYRILLIC',
'INIS' => 'INIS',
'ISO-8859-1' => 'ISO_8859_1',
'ISO-8859-10' => 'ISO_8859_10',
'ISO-8859-11' => 'ISO_8859_11',
'ISO-8859-13' => 'ISO_8859_13',
'ISO-8859-14' => 'ISO_8859_14',
'ISO-8859-15' => 'ISO_8859_15',
'ISO-8859-16' => 'ISO_8859_16',
'ISO-8859-2' => 'ISO_8859_2',
'ISO-8859-3' => 'ISO_8859_3',
'ISO-8859-4' => 'ISO_8859_4',
'ISO-8859-5' => 'ISO_8859_5',
'ISO-8859-6' => 'ISO_8859_6',
'ISO-8859-7' => 'ISO_8859_7',
'ISO-8859-8' => 'ISO_8859_8',
'ISO-8859-9' => 'ISO_8859_9',
'ISO_10367-BOX' => 'ISO_10367_BOX',
'ISO_2033-1983' => 'ISO_2033_1983',
'ISO_5427-EXT' => 'ISO_5427_EXT',
'ISO_5427' => 'ISO_5427',
'ISO_5428' => 'ISO_5428',
'KOI-8' => 'KOI_8',
'KOI8-R' => 'KOI8_R',
'KOI8-RU' => 'KOI8_RU',
'KOI8-T' => 'KOI8_T',
'KOI8-U' => 'KOI8_U',
'LATIN-GREEK-1' => 'LATIN_GREEK_1',
'LATIN-GREEK' => 'LATIN_GREEK',
'MACINTOSH' => 'MACINTOSH',
'MACARABIC' => 'MACARABIC',
'MACCYRILLIC' => 'MACCYRILLIC',
'MACCROATIAN' => 'MACCROATIAN',
'MACGREEK' => 'MACGREEK',
'MACHEBREW' => 'MACHEBREW',
'MACICELAND' => 'MACICELAND',
'MACROMANIA' => 'MACROMANIA',
'MACTHAI' => 'MACTHAI',
'MACTURKISH' => 'MACTURKISH',
'MACUKRAINE' => 'MACUKRAINE',
'MAC-IS' => 'MAC_IS',
'MAC-SAMI' => 'MAC_SAMI',
'MAC-UK' => 'MAC_UK',
'NATS-DANO' => 'NATS_DANO',
'NATS-SEFI' => 'NATS_SEFI',
'NEXTSTEP' => 'NEXTSTEP',
'TIS-620' => 'TIS_620',
'UTF-8' => 'UTF_8',
'VISCII' => 'VISCII',
'WIN-SAMI-2' => 'SAMI_WS2',
'WINDOWS-1250' => 'CP1250',
'WINDOWS-1251' => 'CP1251',
'WINDOWS-1252' => 'CP1252',
'WINDOWS-1253' => 'CP1253',
'WINDOWS-1254' => 'CP1254',
'WINDOWS-1256' => 'CP1256',
'WINDOWS-1257' => 'CP1257',
'US-ASCII' => 'US_ASCII',
};
# These encodings are maybe available via Encode(3pm).
$optional_conversions = {
'BIG5' => undef,
'BIG5-HKSCS' => undef,
'CN-GB' => undef,
'CN-GB-ISOIR165' => undef,
'CP1006' => undef,
'CP1026' => undef,
'CP1047' => undef,
'CP1361' => undef,
'CP949' => undef,
'CP37' => undef,
'CP424' => undef,
'CP500' => undef,
'CP737' => undef,
'CP775' => undef,
'CP856' => undef,
'CP874' => undef,
'CP875' => undef,
'CP932' => undef,
'CP936' => undef,
'CP950' => undef,
'EUC-JP' => undef,
'EUC-KR' => undef,
'EUC-TW' => undef,
# mapping from 0xef to 0xff missing.
# 'HP-ROMAN8' => undef,
'GB18030' => undef,
'HZ' => undef,
'IBM437' => undef,
'IBM850' => undef,
'IBM852' => undef,
'IBM855' => undef,
'IBM857' => undef,
'IBM860' => undef,
'IBM861' => undef,
'IBM862' => undef,
'IBM863' => undef,
'IBM864' => undef,
'IBM865' => undef,
'IBM866' => undef,
'IBM869' => undef,
'ISO-10646-UCS-2' => undef,
'ISO-10646-UCS-4' => undef,
'ISO-2022-JP' => undef,
'ISO-2022-JP-1' => undef,
'ISO-2022-KR' => undef,
'ISO-8859-1' => undef,
'ISO-8859-10' => undef,
# This is broken in some versions of Encode.
# 'ISO-8859-11' => undef,
'ISO-8859-13' => undef,
'ISO-8859-14' => undef,
'ISO-8859-15' => undef,
# Errors at 0xa5 and 0xab.
# 'ISO-8859-16' => undef,
'ISO-8859-2' => undef,
'ISO-8859-3' => undef,
'ISO-8859-4' => undef,
'ISO-8859-5' => undef,
# Uses arabic digits in ascii range?!
# 'ISO-8859-6' => undef,
# 0xa1 and 0xa2 are incorrectly encoded.
# 'ISO-8859-7' => undef,
# 0xfd and 0xfe are missing.
# 'ISO-8859-8' => undef,
'ISO-8859-9' => undef,
'ISO-IR-149' => undef,
'KOI8-R' => undef,
# 0x95 is BULLET, not BULLET OPERATOR.
# 'KOI8-U' => undef,
# Seems to be messed up in certain Encode versions.
# 'MACINTOSH' => undef,
# TODO: Check other Mac encodings for correctness.
# Nextstep is completely broken in my version of Encode.
# 'NEXTSTEP' => undef,
'SHIFT_JIS' => undef,
'UCS-2BE' => undef,
'UCS-2LE' => undef,
'UCS-4BE' => undef,
'UCS-4LE' => undef,
'US-ASCII' => undef,
'UTF-16' => undef,
'UTF-16BE' => undef,
'UTF-16LE' => undef,
'UTF-32' => undef,
'UTF-32BE' => undef,
'UTF-32LE' => undef,
'UTF-8' => undef,
# 0x86 is missing, 0xa6 is incorrectly encoded.
# 'VISCII' => undef,
'WINDOWS-1250' => undef,
'WINDOWS-1251' => undef,
'WINDOWS-1252' => undef,
'WINDOWS-1253' => undef,
'WINDOWS-1254' => undef,
'WINDOWS-1255' => undef,
'WINDOWS-1256' => undef,
'WINDOWS-1257' => undef,
'WINDOWS-1258' => undef,
};
my $has_encode;
sub resolveAlias
{
my (undef, $encoding) = @_;
$encoding = uc $encoding;
return $encoding if exists $conversions->{$encoding};
return $encoding if exists $optional_conversions->{$encoding};
require Locale::Recode::_Aliases;
my $resolved = Locale::Recode::_Aliases::ALIASES()->{$encoding};
return $resolved if $resolved;
return;
}
sub isSupported
{
my ($class, $encoding) = @_;
return unless defined $encoding && length $encoding;
$encoding = uc $encoding;
my $mimename = $class->resolveAlias ($encoding);
return unless $mimename;
# Determine the correct module.
if (exists $optional_conversions->{$mimename}) {
unless (defined $has_encode) {
eval "require Encode";
$has_encode = !$@;
if ($has_encode) {
require Encode::Alias;
# Add missing real names.
Encode::Alias::define_alias (MS_KANJI => 'ShiftJIS');
Encode::Alias::define_alias ('CN-GB' => 'EUC-CN');
}
}
if ($has_encode) {
# Now check whether Encode really supports that encoding.
eval "Encode::encode ('$mimename', 'x')";
unless ($@) {
$conversions->{$mimename} = '_Encode';
}
delete $optional_conversions->{$mimename};
}
}
return $conversions->{$mimename} if exists $conversions->{$mimename};
return;
}
sub listSupported
{
my ($class) = @_;
foreach my $opt (keys %$optional_conversions) {
$class->isSupported ($opt);
}
my @list = keys %$conversions;
return @list;
}
# Find a conversion path.
sub findPath
{
my ($class, $from, $to) = @_;
$from = 'INTERNAL' eq uc $from ? 'INTERNAL' : $class->resolveAlias ($from);
$to = 'INTERNAL' eq uc $to ? 'INTERNAL' : $class->resolveAlias ($to);
return unless $from && $to;
return [] if $from eq $to;
my $from_module = $class->isSupported ($from);
my $to_module = $class->isSupported ($to);
if (!$from_module) {
if ('INTERNAL' eq $from) {
$from_module = $to_module or return;
} else {
return;
}
}
if (!$to_module) {
if ('INTERNAL' eq $to) {
$to_module = $from_module or return;
} else {
return;
}
}
if ($from_module eq $to_module
|| $to eq 'INTERNAL'
|| $to eq 'UTF-8') {
return [[ $from_module, $from, $to ]];
} elsif ($from eq 'INTERNAL') {
return [[ $to_module, $from, $to ]];
} else {
return [[ $from_module, $from, 'INTERNAL' ],
[ $to_module, 'INTERNAL', $to ]];
}
}
# TODO: check for
# 7bit-jis
# AdobeStandardEncoding
# AdobeSymbol
# AdobeZdingbat
# ascii-ctrl
# big5ext
# big5plus
# cccii
# cns11643-1
# cns11643-2
# cns11643-3
# cns11643-4
# cns11643-5
# cns11643-6
# cns11643-7
# cns11643-f
# dingbats
# gb12345-raw
# gb2312-raw
# gsm0338
# jis0201-raw
# jis0208-raw
# jis0212-raw
# koi8-f
# MIME-B
# MIME-Header
# MIME-Q
# posix-bc
# symbol
# unisys
1;
__END__
=head1 NAME
Locale::Recode::_Conversions - Internal Table of Known Conversions
=head1 SYNOPSIS
use Locale::Recode::_Conversions
This module is internal to libintl. Do not use it directly!
=head1 AUTHOR
Copyright (C) 2002-2016 L<Guido Flohr|http://www.guido-flohr.net/>
(L<mailto:guido.flohr@cantanea.com>), all rights reserved. See the source
code for details!code for details!
=head1 SEE ALSO
Locale::Recode(3), perl(1)
=cut
Local Variables:
mode: perl
perl-indent-level: 4
perl-continued-statement-offset: 4
perl-continued-brace-offset: 0
perl-brace-offset: -4
perl-brace-imaginary-offset: 0
perl-label-offset: -4
cperl-indent-level: 4
cperl-continued-statement-offset: 2
tab-width: 4
End:
|