Front page | perl.cvs.parrot |
Postings from January 2009
[svn:parrot] r36039 - in branches/strings/pseudocode: . ParrotEncoding t
From:
simon
Date:
January 26, 2009 13:13
Subject:
[svn:parrot] r36039 - in branches/strings/pseudocode: . ParrotEncoding t
Message ID:
20090126211254.5EBC0CB9AE@x12.develooper.com
Author: simon
Date: Mon Jan 26 13:12:52 2009
New Revision: 36039
Modified:
branches/strings/pseudocode/ParrotEncoding/ParrotNative.pm
branches/strings/pseudocode/ParrotString.pm
branches/strings/pseudocode/t/recode.t
Log:
Implement char-wise equality testing, and rework the way native encoding iterates over stuff.
Modified: branches/strings/pseudocode/ParrotEncoding/ParrotNative.pm
==============================================================================
--- branches/strings/pseudocode/ParrotEncoding/ParrotNative.pm (original)
+++ branches/strings/pseudocode/ParrotEncoding/ParrotNative.pm Mon Jan 26 13:12:52 2009
@@ -1,7 +1,21 @@
-class ParrotEncoding::ParrotNative is ParrotEncoding::Base::Fixed {
+class ParrotEncoding::ParrotNative {
our $.width = 1;
method setup($str) { $str.normalization = ParrotNormalization::NFG.new(); }
+ method string_grapheme_iterate($str, $callback, $parameter) {
+ for (0..$str.bufused-1) {
+ $callback(self.grapheme_at_index($str,$_), $parameter);
+ }
+ }
+ method string_length($str) {
+ # We're not really all that fixed - borrow this from the
+ # variable one. You could optimize this by checking if there's
+ # any entries in the grapheme table.
+ my $data = 0;
+ my $callback = sub ($char, $data is rw) { $data++ };
+ $str.encoding.string_char_iterate($str, $callback, $data);
+ return $data;
+ }
method append_grapheme ($str, $g) {
if (@($g) > 1) {
my $item;
@@ -16,9 +30,9 @@
method string_char_iterate ($str, $callback, $parameter) {
for (0..$str.bufused-1) {
- my $grapheme = grapheme_at_index($str, $_);
+ my $grapheme = self.grapheme_at_index($str, $_);
for (@( $grapheme )) {
- $callback($str.buffer.[$_], $parameter);
+ $callback($_, $parameter);
}
}
}
@@ -27,6 +41,7 @@
# We need to look inside each grapheme, since NFG stores individual
# graphemes and graphemes are composed of multiple characters -
# this could be improved with caching later but we will
+ # do it the slow stupid way for now
...
}
@@ -34,6 +49,7 @@
if (!$str.normalization) {
$str.charset.normalize($str, ParrotNormalization::NFG);
}
+ die $index~" Read off end of buffer" if $index > $str.buffer - 1;
my $c = $str.buffer[$index];
if $c >= 0 { return [ $c ]; }
return $str.normalization.grapheme_table.[-$c - 1];
Modified: branches/strings/pseudocode/ParrotString.pm
==============================================================================
--- branches/strings/pseudocode/ParrotString.pm (original)
+++ branches/strings/pseudocode/ParrotString.pm Mon Jan 26 13:12:52 2009
@@ -116,13 +116,16 @@
if ($one.strlen != $two.strlen) { return 0; }
for (0 .. $one.strlen-1) {
if ($one.buffer.[$_] != $two.buffer.[$_]) {
- say "Oops, byte "~$_~" differed";
return 0
}
}
return 1;
}
sub Parrot_string_character_equal($one, $two) {
- say "Not implemented yet";
- return 0;
+ my $l = Parrot_string_length($one);
+ return 0 if $l != Parrot_string_length($two);
+ for (0.. $l-1) {
+ return 0 if Parrot_string_index($one, $_) != Parrot_string_index($two, $_);
+ }
+ return 1;
}
Modified: branches/strings/pseudocode/t/recode.t
==============================================================================
--- branches/strings/pseudocode/t/recode.t (original)
+++ branches/strings/pseudocode/t/recode.t Mon Jan 26 13:12:52 2009
@@ -1,12 +1,13 @@
use Test;
use ParrotString;
-plan 4;
+plan 5;
# The standard NFG example...
my $str = Parrot_string_new_init("ABC \xd0\xb8\xcc\x8f", 8, ParrotCharset::Unicode, ParrotEncoding::UTF8);
my $str2 = Parrot_string_new_init("", 0, ParrotCharset::Unicode, ParrotEncoding::ParrotNative);
Parrot_string_grapheme_copy($str, $str2);
is(Parrot_string_grapheme_length($str2), 5, "Four UTF8 bytes = one grapheme");
+is(Parrot_string_length($str2), 6, "One grapheme is actually two chars");
my $str3 = Parrot_string_new_init("", 0, ParrotCharset::Unicode, ParrotEncoding::UTF8);
Parrot_string_grapheme_copy($str2, $str3);
-
[svn:parrot] r36039 - in branches/strings/pseudocode: . ParrotEncoding t
by simon