Front page | perl.cvs.parrot |
Postings from January 2009
[svn:parrot] r35603 - in branches/strings/pseudocode: . t
From:
simon
Date:
January 15, 2009 14:14
Subject:
[svn:parrot] r35603 - in branches/strings/pseudocode: . t
Message ID:
20090115221448.20037CB9AE@x12.develooper.com
Author: simon
Date: Thu Jan 15 14:14:47 2009
New Revision: 35603
Modified:
branches/strings/pseudocode/Charsets.pm
branches/strings/pseudocode/Encodings.pm
branches/strings/pseudocode/ParrotString.pm
branches/strings/pseudocode/t/create.t
Log:
Clarification of the grapheme/char distinction in ParrotNative.
Modified: branches/strings/pseudocode/Charsets.pm
==============================================================================
--- branches/strings/pseudocode/Charsets.pm (original)
+++ branches/strings/pseudocode/Charsets.pm Thu Jan 15 14:14:47 2009
@@ -45,10 +45,10 @@
while $offset < $len {
# Find end of current grapheme sequence
my $e = 0+$offset;
- $e++ while $e+1 <= $len && is_combining($nfc[$e+1]);
+ $e++ while $e+1 <= $len && is_combining(@nfc[$e+1]);
# Current grapheme sequence runs from $offset to $e.
if ($e - $offset) == 0 { # Simple case first
- push @nfg, $nfc[$offset];
+ push @nfg, @nfc[$offset];
} else {
push @nfg, $str.normalization.get_grapheme_table_entry(@nfg[$offset..$e], $e-$offset)
}
Modified: branches/strings/pseudocode/Encodings.pm
==============================================================================
--- branches/strings/pseudocode/Encodings.pm (original)
+++ branches/strings/pseudocode/Encodings.pm Thu Jan 15 14:14:47 2009
@@ -40,21 +40,31 @@
class ParrotEncoding::ParrotNative {
method string_char_iterate ($str, $callback, $parameter) {
- for (0..$str.bufused-1) { $callback($str.buffer.[$_], $parameter); }
+ for (0..$str.bufused-1) {
+ my $grapheme = grapheme_at_index($str, $_);
+ for (@( $grapheme )) {
+ $callback($str.buffer.[$_], $parameter);
+ }
+ }
}
method string_grapheme_iterate($str, $callback, $parameter) {
for (0..$str.bufused-1) { $callback($str.buffer.[$_], $parameter); }
}
- method char_at_index($str, $index) { return $str.buffer.[$index] }
+ method char_at_index($str, $index) {
+ ...
+ }
method grapheme_at_index($str, $index) {
if (!$str.normalization) {
$str.charset.normalize($str, ParrotNormalization::NFG);
- return $str.buffer.[$index]
}
- return $str.normalization.grapheme_at_index($str, $index);
+ my $c = $str.buffer[$index];
+ if $c >= 0 { return [ $c ]; }
+ return $str.charset.normalization.grapheme_table.[-$c];
+ # We are allowed to be pally with the normalization internals
+ # because NFG is specific to ParrotEncoding.
}
};
class ParrotEncoding::Byte is ParrotEncoding::ParrotNative; # Just a bit thinner
Modified: branches/strings/pseudocode/ParrotString.pm
==============================================================================
--- branches/strings/pseudocode/ParrotString.pm (original)
+++ branches/strings/pseudocode/ParrotString.pm Thu Jan 15 14:14:47 2009
@@ -76,7 +76,7 @@
sub Parrot_string_byte_length($str) { return $str.strlen }
sub Parrot_string_index($str, $index) { return $str.encoding.char_at_index($str, $index) }
-sub Parrot_string_grapheme_index($str, $index) { ... }
+sub Parrot_string_grapheme_index($str, $index) { return $str.encoding.grapheme_at_index($str, $index) }
sub Parrot_string_find_substr($str, $substr) { ... }
sub Parrot_string_copy($src, $dst) { ... }
Modified: branches/strings/pseudocode/t/create.t
==============================================================================
--- branches/strings/pseudocode/t/create.t (original)
+++ branches/strings/pseudocode/t/create.t Thu Jan 15 14:14:47 2009
@@ -13,3 +13,8 @@
is(Parrot_string_byte_length($str), 28, "String byte length correct");
is(Parrot_string_length($str), 15, "UTF8 char length correct");
is(Parrot_string_index($str, 3), 0x3ac, "UTF8 string indexing");
+
+# The standard NFG example...
+$str = Parrot_string_new_init("ABC \xd0\xb8\xcc\x8f", 8, ParrotCharset::Unicode, ParrotEncoding::UTF8);
+my $str2 = Parrot_string_new_init("", 0, ParrotCharset::Unicode, ParrotEncoding::ParrotNative);
+Parrot_string_grapheme_copy($str, $str2);
-
[svn:parrot] r35603 - in branches/strings/pseudocode: . t
by simon