Linux lhjmq-records 5.15.0-118-generic #128-Ubuntu SMP Fri Jul 5 09:28:59 UTC 2024 x86_64
Your IP : 3.144.23.138
#! /bin/false
# vim: set autoindent shiftwidth=4 tabstop=4:
# Conversion routines for UTF-8 (perl < 5.8.0).
# Copyright (C) 2002-2016 Guido Flohr <guido.flohr@cantanea.com>,
# all rights reserved.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
package Locale::RecodeData::UTF_8;
use strict;
require Locale::RecodeData;
use base qw(Locale::RecodeData);
sub _recode
{
if ($_[0]->{_from} eq 'INTERNAL') {
return $_[0]->_fromInternal ($_[1]);
} else {
return $_[0]->_toInternal ($_[1]);
}
}
# This routine assumes that the internal representation is always sane
# and contains valid codes only.
sub _fromInternal
{
$_[1] = join '', map {
if ($_ <= 0x7f) {
chr $_;
} elsif ($_ <= 0x7ff) {
pack ("C2",
(0xc0 | (($_ >> 6) & 0x1f)),
(0x80 | ($_ & 0x3f)));
} elsif ($_ <= 0xffff) {
pack ("C3",
(0xe0 | (($_ >> 12) & 0xf)),
(0x80 | (($_ >> 6) & 0x3f)),
(0x80 | ($_ & 0x3f)));
} elsif ($_ <= 0x1fffff) {
pack ("C4",
(0xf0 | (($_ >> 18) & 0x7)),
(0x80 | (($_ >> 12) & 0x3f)),
(0x80 | (($_ >> 6) & 0x3f)),
(0x80 | ($_ & 0x3f)));
} elsif ($_ <= 0x3ffffff) {
pack ("C5",
(0xf0 | (($_ >> 24) & 0x3)),
(0x80 | (($_ >> 18) & 0x3f)),
(0x80 | (($_ >> 12) & 0x3f)),
(0x80 | (($_ >> 6) & 0x3f)),
(0x80 | ($_ & 0x3f)));
} else {
pack ("C6",
(0xf0 | (($_ >> 30) & 0x3)),
(0x80 | (($_ >> 24) & 0x1)),
(0x80 | (($_ >> 18) & 0x3f)),
(0x80 | (($_ >> 12) & 0x3f)),
(0x80 | (($_ >> 6) & 0x3f)),
(0x80 | ($_ & 0x3f)));
}
} @{$_[1]};
return 1;
}
# Decode UTF-8 into integers. We do not bother to care about possibly
# configured replacement characters here and simply fall back to 0xfffd.
# Rationale: the internal format is never output directly and the other
# encoders will handle the replacement character correctly.
sub _toInternal
{
if ($] >= 5.006) {
$_[1] = [ unpack "U*", $_[1] ];
return 1;
}
# Sigh, we have to decode ourselves. FIXME: Should be optimized.
# The routine is awfully slow.
# It also does not necessarily detect illegal multi-byte sequences.
my @chars = ();
my @bytes = unpack "C*", $_[1];
BYTE: while (@bytes) {
my $byte = shift @bytes;
if ($byte < 0x80) {
push @chars, $byte;
} elsif ($byte < 0xc0 || $byte > 0xfd) {
push @chars, 0xfffd;
} else {
my $num_bytes;
my $char;
if ($byte < 0xe0) {
$char = $byte & 0x1f;
$num_bytes = 1;
} elsif ($byte < 0xf0) {
$char = $byte & 0xf;
$num_bytes = 2;
} elsif ($byte < 0xf8) {
$char = $byte & 0x7;
$num_bytes = 3;
} elsif ($byte < 0xfc) {
$char = $byte & 0x3;
$num_bytes = 4;
} else {
$char = $byte & 0x1;
$num_bytes = 5;
}
for (my $i = 0; $i < $num_bytes; ++$i) {
my $next = shift @bytes;
if (!defined $next || $next < 0x80 || $next > 0xbf) {
push @chars, 0xfffd;
next BYTE;
} else {
$char <<= 6;
$char |= $next & 0x3f;
}
}
push @chars, $char;
}
}
$_[1] = \@chars;
return 1;
}
1;
__END__
=head1 NAME
Locale::RecodeData::UTF_8 - Conversion routines for UTF-8
=head1 SYNOPSIS
This module is internal to libintl. Do not use directly!
=head1 DESCRIPTION
This modules contains the conversion tables for UTF-8. It is capable of
converting from UTF-8 to the internal format of libintl-perl and vice
versa. It is only suitable for Perl versions E<lt>= 5.8.0. However,
you do not have to bother about version checking, Locale::Recode(3)
will do that for you.
=head1 CHARACTER TABLE
See http://www.unicode.org/.
=head1 AUTHOR
Copyright (C) 2002-2016 L<Guido Flohr|http://www.guido-flohr.net/>
(L<mailto:guido.flohr@cantanea.com>), all rights reserved. See the source
code for details!code for details!
=head1 SEE ALSO
Locale::RecodeData(3), Locale::Recode(3), perl(1)
=cut
Local Variables:
mode: perl
perl-indent-level: 4
perl-continued-statement-offset: 4
perl-continued-brace-offset: 0
perl-brace-offset: -4
perl-brace-imaginary-offset: 0
perl-label-offset: -4
cperl-indent-level: 4
cperl-continued-statement-offset: 2
tab-width: 4
End:
|