develooper Front page | perl.cvs.parrot | Postings from January 2009

[svn:parrot] r35603 - in branches/strings/pseudocode: . t

From:
simon
Date:
January 15, 2009 14:14
Subject:
[svn:parrot] r35603 - in branches/strings/pseudocode: . t
Message ID:
20090115221448.20037CB9AE@x12.develooper.com
Author: simon
Date: Thu Jan 15 14:14:47 2009
New Revision: 35603

Modified:
   branches/strings/pseudocode/Charsets.pm
   branches/strings/pseudocode/Encodings.pm
   branches/strings/pseudocode/ParrotString.pm
   branches/strings/pseudocode/t/create.t

Log:
Clarification of the grapheme/char distinction in ParrotNative.


Modified: branches/strings/pseudocode/Charsets.pm
==============================================================================
--- branches/strings/pseudocode/Charsets.pm	(original)
+++ branches/strings/pseudocode/Charsets.pm	Thu Jan 15 14:14:47 2009
@@ -45,10 +45,10 @@
         while $offset < $len {
             # Find end of current grapheme sequence
             my $e = 0+$offset; 
-            $e++ while $e+1 <= $len && is_combining($nfc[$e+1]);
+            $e++ while $e+1 <= $len && is_combining(@nfc[$e+1]);
             # Current grapheme sequence runs from $offset to $e.
             if ($e - $offset) == 0 { # Simple case first
-                push @nfg, $nfc[$offset];
+                push @nfg, @nfc[$offset];
            } else {
                 push @nfg, $str.normalization.get_grapheme_table_entry(@nfg[$offset..$e], $e-$offset)
            }

Modified: branches/strings/pseudocode/Encodings.pm
==============================================================================
--- branches/strings/pseudocode/Encodings.pm	(original)
+++ branches/strings/pseudocode/Encodings.pm	Thu Jan 15 14:14:47 2009
@@ -40,21 +40,31 @@
 class ParrotEncoding::ParrotNative {
 
     method string_char_iterate ($str, $callback, $parameter) {
-        for (0..$str.bufused-1) { $callback($str.buffer.[$_], $parameter); }
+        for (0..$str.bufused-1) { 
+            my $grapheme = grapheme_at_index($str, $_);
+            for (@( $grapheme )) {
+                $callback($str.buffer.[$_], $parameter); 
+            }
+        }
     }
 
     method string_grapheme_iterate($str, $callback, $parameter) {
         for (0..$str.bufused-1) { $callback($str.buffer.[$_], $parameter); }
     }
 
-    method char_at_index($str, $index) { return $str.buffer.[$index] }
+    method char_at_index($str, $index) { 
+        ...
+    }
 
     method grapheme_at_index($str, $index) {
         if (!$str.normalization) { 
             $str.charset.normalize($str, ParrotNormalization::NFG);
-            return $str.buffer.[$index]
         }
-        return $str.normalization.grapheme_at_index($str, $index);
+        my $c = $str.buffer[$index];
+        if $c >= 0 { return [ $c ]; }
+        return $str.charset.normalization.grapheme_table.[-$c];
+        # We are allowed to be pally with the normalization internals
+        # because NFG is specific to ParrotEncoding.
     }
 };
 class ParrotEncoding::Byte is ParrotEncoding::ParrotNative; # Just a bit thinner

Modified: branches/strings/pseudocode/ParrotString.pm
==============================================================================
--- branches/strings/pseudocode/ParrotString.pm	(original)
+++ branches/strings/pseudocode/ParrotString.pm	Thu Jan 15 14:14:47 2009
@@ -76,7 +76,7 @@
 sub Parrot_string_byte_length($str) { return $str.strlen }
 
 sub Parrot_string_index($str, $index) { return $str.encoding.char_at_index($str, $index) }
-sub Parrot_string_grapheme_index($str, $index) { ... }
+sub Parrot_string_grapheme_index($str, $index) { return $str.encoding.grapheme_at_index($str, $index) }
 sub Parrot_string_find_substr($str, $substr) { ... }
 
 sub Parrot_string_copy($src, $dst) { ... }

Modified: branches/strings/pseudocode/t/create.t
==============================================================================
--- branches/strings/pseudocode/t/create.t	(original)
+++ branches/strings/pseudocode/t/create.t	Thu Jan 15 14:14:47 2009
@@ -13,3 +13,8 @@
 is(Parrot_string_byte_length($str), 28, "String byte length correct");
 is(Parrot_string_length($str), 15, "UTF8 char length correct");
 is(Parrot_string_index($str, 3), 0x3ac, "UTF8 string indexing");
+
+# The standard NFG example...
+$str = Parrot_string_new_init("ABC \xd0\xb8\xcc\x8f", 8, ParrotCharset::Unicode, ParrotEncoding::UTF8);
+my $str2 = Parrot_string_new_init("", 0, ParrotCharset::Unicode, ParrotEncoding::ParrotNative);
+Parrot_string_grapheme_copy($str, $str2);



nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About