develooper Front page | perl.cvs.parrot | Postings from January 2009

[svn:parrot] r35587 - branches/strings/pseudocode

From:
simon
Date:
January 15, 2009 04:48
Subject:
[svn:parrot] r35587 - branches/strings/pseudocode
Message ID:
20090115124759.095F0CB9AE@x12.develooper.com
Author: simon
Date: Thu Jan 15 04:47:58 2009
New Revision: 35587

Added:
   branches/strings/pseudocode/Charsets.pm
   branches/strings/pseudocode/Encodings.pm
Modified:
   branches/strings/pseudocode/ParrotString.pm

Log:
Split things into separate files.


Added: branches/strings/pseudocode/Charsets.pm
==============================================================================
--- (empty file)
+++ branches/strings/pseudocode/Charsets.pm	Thu Jan 15 04:47:58 2009
@@ -0,0 +1,6 @@
+class ParrotCharset::Unicode {  };
+class ParrotCharset::ASCII   {  };
+class ParrotCharset::Binary  {  };
+class ParrotCharset::SJIS    {  };
+class ParrotCharset::EUCJP   {  };
+

Added: branches/strings/pseudocode/Encodings.pm
==============================================================================
--- (empty file)
+++ branches/strings/pseudocode/Encodings.pm	Thu Jan 15 04:47:58 2009
@@ -0,0 +1,58 @@
+class ParrotEncoding::UTF8   {  
+    sub _skip($c) {
+        if $c <= 191 { return 1 }
+        if 191 < $c < 224 { return 2 }
+        return 3
+    }
+    sub char_at_byteoffset ($str, $offset is rw) { # Private helper
+        my $c = $str.buffer.[$offset++];
+        if 191 < $c < 224 {
+            # XXX Guard
+            $c = (($c +& 31) +< 6) +| ( $str.buffer.[$offset++] +& 63 );
+        } elsif $c >= 224 {
+            # XXX Guard
+            $c = (($c +& 15) +< 12) 
+                +| (( $str.buffer.[$offset++] +& 63 ) +< 6);
+            $c  +|= $str.buffer.[$offset++] +& 63;
+        }
+        return $c;
+    }
+    method string_char_iterate ($str, $callback, $parameter) {
+        my $index = 0;
+        while ($index < $str.bufused-1) {
+            $callback(char_at_byteoffset($str, $index), $parameter);
+        }
+    }
+
+    # We're not going to cache this because if it's worth caching it's
+    # worth converting to a Parrot native string rather than keeping
+    # UTF8. We'll keep it dumb and working and people can optimise later
+    method char_at_index($str, $index) { 
+        my $i = $index + 0; # work around Rakudo assignment weirdness
+        my $offset = 0;
+        while $i-- > 0 { $offset += _skip($str.buffer[$offset])  }
+        return char_at_byteoffset($str, $offset);
+    }
+};
+class ParrotEncoding::UTF16  {  };
+class ParrotEncoding::UTF32  {  };
+class ParrotEncoding::EBCDIC {  };
+class ParrotEncoding::ParrotNative {
+
+    method string_char_iterate ($str, $callback, $parameter) {
+        for (0..$str.bufused-1) { $callback($str.buffer.[$_], $parameter); }
+    }
+
+    method string_grapheme_iterate($str, $callback, $parameter) {
+        for (0..$str.bufused-1) { $callback($str.buffer.[$_], $parameter); }
+    }
+
+    method char_at_index($str, $index) { return $str.buffer.[$index] }
+
+    method grapheme_at_index($str, $index) {
+        $str.charset.normalize($str, ParrotNormalization::NFG);
+        return $str.buffer.[$index]
+    }
+};
+class ParrotEncoding::Byte is ParrotEncoding::ParrotNative; # Just a bit thinner
+

Modified: branches/strings/pseudocode/ParrotString.pm
==============================================================================
--- branches/strings/pseudocode/ParrotString.pm	(original)
+++ branches/strings/pseudocode/ParrotString.pm	Thu Jan 15 04:47:58 2009
@@ -11,63 +11,8 @@
     has ParrotString::Normalization $.normalization is rw;
 };
 
-class ParrotCharset::Unicode {  };
-class ParrotCharset::ASCII   {  };
-class ParrotCharset::Binary  {  };
-class ParrotCharset::SJIS    {  };
-class ParrotCharset::EUCJP   {  };
-
-class ParrotEncoding::UTF8   {  
-    sub _skip($c) {
-        if $c <= 191 { return 1 }
-        if 191 < $c < 224 { return 2 }
-        return 3
-    }
-    sub char_at_byteoffset ($str, $offset is rw) { # Private helper
-        my $c = $str.buffer.[$offset++];
-        if 191 < $c < 224 {
-            # XXX Guard
-            $c = (($c +& 31) +< 6) +| ( $str.buffer.[$offset++] +& 63 );
-        } elsif $c >= 224 {
-            # XXX Guard
-            $c = (($c +& 15) +< 12) 
-                +| (( $str.buffer.[$offset++] +& 63 ) +< 6);
-            $c  +|= $str.buffer.[$offset++] +& 63;
-        }
-        return $c;
-    }
-    method string_char_iterate ($str, $callback, $parameter) {
-        my $index = 0;
-        while ($index < $str.bufused-1) {
-            $callback(char_at_byteoffset($str, $index), $parameter);
-        }
-    }
-
-    # We're not going to cache this because if it's worth caching it's
-    # worth converting to a Parrot native string rather than keeping
-    # UTF8. We'll keep it dumb and working and people can optimise later
-    method char_at_index($str, $index) { 
-        my $i = $index + 0; # work around Rakudo assignment weirdness
-        my $offset = 0;
-        while $i-- > 0 { $offset += _skip($str.buffer[$offset])  }
-        return char_at_byteoffset($str, $offset);
-    }
-};
-class ParrotEncoding::UTF16  {  };
-class ParrotEncoding::UTF32  {  };
-class ParrotEncoding::EBCDIC {  };
-class ParrotEncoding::Byte   {
-    method string_char_iterate ($str, $callback, $parameter) {
-        for (0..$str.bufused-1) { $callback($str.buffer.[$_], $parameter); }
-    }
-
-    method string_grapheme_iterate($str, $callback, $parameter) {
-        for (0..$str.bufused-1) { $callback($str.buffer.[$_], $parameter); }
-    }
-
-    method char_at_index($str, $index) { return $str.buffer.[$index] }
-};
-
+use Charsets;
+use Encodings;
 
 ## COW stuff
 sub Parrot_string_new_COW($src) { ... }
@@ -135,7 +80,8 @@
 sub Parrot_string_find_substr($str, $substr) { ... }
 
 sub Parrot_string_copy($src, $dst) { ... }
-sub Parrot_string_grapheme_copy ($src, $dst) { ... } 
+sub Parrot_string_grapheme_copy ($src, $dst) { 
+}
 sub Parrot_string_repeat($src, $reps) { ... }
 sub Parrot_string_substr($src, $offset, $len) { ... }
 sub Parrot_string_grapheme_substr($src, $offset, $len) { ... }



nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About