Front page | perl.cvs.parrot |
Postings from January 2009
[svn:parrot] r35587 - branches/strings/pseudocode
From:
simon
Date:
January 15, 2009 04:48
Subject:
[svn:parrot] r35587 - branches/strings/pseudocode
Message ID:
20090115124759.095F0CB9AE@x12.develooper.com
Author: simon
Date: Thu Jan 15 04:47:58 2009
New Revision: 35587
Added:
branches/strings/pseudocode/Charsets.pm
branches/strings/pseudocode/Encodings.pm
Modified:
branches/strings/pseudocode/ParrotString.pm
Log:
Split things into separate files.
Added: branches/strings/pseudocode/Charsets.pm
==============================================================================
--- (empty file)
+++ branches/strings/pseudocode/Charsets.pm Thu Jan 15 04:47:58 2009
@@ -0,0 +1,6 @@
+class ParrotCharset::Unicode { };
+class ParrotCharset::ASCII { };
+class ParrotCharset::Binary { };
+class ParrotCharset::SJIS { };
+class ParrotCharset::EUCJP { };
+
Added: branches/strings/pseudocode/Encodings.pm
==============================================================================
--- (empty file)
+++ branches/strings/pseudocode/Encodings.pm Thu Jan 15 04:47:58 2009
@@ -0,0 +1,58 @@
+class ParrotEncoding::UTF8 {
+ sub _skip($c) {
+ if $c <= 191 { return 1 }
+ if 191 < $c < 224 { return 2 }
+ return 3
+ }
+ sub char_at_byteoffset ($str, $offset is rw) { # Private helper
+ my $c = $str.buffer.[$offset++];
+ if 191 < $c < 224 {
+ # XXX Guard
+ $c = (($c +& 31) +< 6) +| ( $str.buffer.[$offset++] +& 63 );
+ } elsif $c >= 224 {
+ # XXX Guard
+ $c = (($c +& 15) +< 12)
+ +| (( $str.buffer.[$offset++] +& 63 ) +< 6);
+ $c +|= $str.buffer.[$offset++] +& 63;
+ }
+ return $c;
+ }
+ method string_char_iterate ($str, $callback, $parameter) {
+ my $index = 0;
+ while ($index < $str.bufused-1) {
+ $callback(char_at_byteoffset($str, $index), $parameter);
+ }
+ }
+
+ # We're not going to cache this because if it's worth caching it's
+ # worth converting to a Parrot native string rather than keeping
+ # UTF8. We'll keep it dumb and working and people can optimise later
+ method char_at_index($str, $index) {
+ my $i = $index + 0; # work around Rakudo assignment weirdness
+ my $offset = 0;
+ while $i-- > 0 { $offset += _skip($str.buffer[$offset]) }
+ return char_at_byteoffset($str, $offset);
+ }
+};
+class ParrotEncoding::UTF16 { };
+class ParrotEncoding::UTF32 { };
+class ParrotEncoding::EBCDIC { };
+class ParrotEncoding::ParrotNative {
+
+ method string_char_iterate ($str, $callback, $parameter) {
+ for (0..$str.bufused-1) { $callback($str.buffer.[$_], $parameter); }
+ }
+
+ method string_grapheme_iterate($str, $callback, $parameter) {
+ for (0..$str.bufused-1) { $callback($str.buffer.[$_], $parameter); }
+ }
+
+ method char_at_index($str, $index) { return $str.buffer.[$index] }
+
+ method grapheme_at_index($str, $index) {
+ $str.charset.normalize($str, ParrotNormalization::NFG);
+ return $str.buffer.[$index]
+ }
+};
+class ParrotEncoding::Byte is ParrotEncoding::ParrotNative; # Just a bit thinner
+
Modified: branches/strings/pseudocode/ParrotString.pm
==============================================================================
--- branches/strings/pseudocode/ParrotString.pm (original)
+++ branches/strings/pseudocode/ParrotString.pm Thu Jan 15 04:47:58 2009
@@ -11,63 +11,8 @@
has ParrotString::Normalization $.normalization is rw;
};
-class ParrotCharset::Unicode { };
-class ParrotCharset::ASCII { };
-class ParrotCharset::Binary { };
-class ParrotCharset::SJIS { };
-class ParrotCharset::EUCJP { };
-
-class ParrotEncoding::UTF8 {
- sub _skip($c) {
- if $c <= 191 { return 1 }
- if 191 < $c < 224 { return 2 }
- return 3
- }
- sub char_at_byteoffset ($str, $offset is rw) { # Private helper
- my $c = $str.buffer.[$offset++];
- if 191 < $c < 224 {
- # XXX Guard
- $c = (($c +& 31) +< 6) +| ( $str.buffer.[$offset++] +& 63 );
- } elsif $c >= 224 {
- # XXX Guard
- $c = (($c +& 15) +< 12)
- +| (( $str.buffer.[$offset++] +& 63 ) +< 6);
- $c +|= $str.buffer.[$offset++] +& 63;
- }
- return $c;
- }
- method string_char_iterate ($str, $callback, $parameter) {
- my $index = 0;
- while ($index < $str.bufused-1) {
- $callback(char_at_byteoffset($str, $index), $parameter);
- }
- }
-
- # We're not going to cache this because if it's worth caching it's
- # worth converting to a Parrot native string rather than keeping
- # UTF8. We'll keep it dumb and working and people can optimise later
- method char_at_index($str, $index) {
- my $i = $index + 0; # work around Rakudo assignment weirdness
- my $offset = 0;
- while $i-- > 0 { $offset += _skip($str.buffer[$offset]) }
- return char_at_byteoffset($str, $offset);
- }
-};
-class ParrotEncoding::UTF16 { };
-class ParrotEncoding::UTF32 { };
-class ParrotEncoding::EBCDIC { };
-class ParrotEncoding::Byte {
- method string_char_iterate ($str, $callback, $parameter) {
- for (0..$str.bufused-1) { $callback($str.buffer.[$_], $parameter); }
- }
-
- method string_grapheme_iterate($str, $callback, $parameter) {
- for (0..$str.bufused-1) { $callback($str.buffer.[$_], $parameter); }
- }
-
- method char_at_index($str, $index) { return $str.buffer.[$index] }
-};
-
+use Charsets;
+use Encodings;
## COW stuff
sub Parrot_string_new_COW($src) { ... }
@@ -135,7 +80,8 @@
sub Parrot_string_find_substr($str, $substr) { ... }
sub Parrot_string_copy($src, $dst) { ... }
-sub Parrot_string_grapheme_copy ($src, $dst) { ... }
+sub Parrot_string_grapheme_copy ($src, $dst) {
+}
sub Parrot_string_repeat($src, $reps) { ... }
sub Parrot_string_substr($src, $offset, $len) { ... }
sub Parrot_string_grapheme_substr($src, $offset, $len) { ... }
-
[svn:parrot] r35587 - branches/strings/pseudocode
by simon