Front page | perl.cvs.parrot |
Postings from January 2009
[svn:parrot] r35950 - in branches/strings/pseudocode: . t
From:
simon
Date:
January 24, 2009 00:21
Subject:
[svn:parrot] r35950 - in branches/strings/pseudocode: . t
Message ID:
20090124082103.4D043CB9AE@x12.develooper.com
Author: simon
Date: Sat Jan 24 00:21:01 2009
New Revision: 35950
Modified:
branches/strings/pseudocode/Charsets.pm
branches/strings/pseudocode/Encodings.pm
branches/strings/pseudocode/ParrotString.pm
branches/strings/pseudocode/t/create.t
Log:
First stab at appending graphemes; we can now convert strings from UTF8 to ParrotNative with NFG.
Modified: branches/strings/pseudocode/Charsets.pm
==============================================================================
--- branches/strings/pseudocode/Charsets.pm (original)
+++ branches/strings/pseudocode/Charsets.pm Sat Jan 24 00:21:01 2009
@@ -22,7 +22,7 @@
# want, but it's easier than that: the forms are hierarchical;
# to go to NFC you have to go through NFD, and to go to NFG you
# have to go through NFC. So...
- if (!$str.normalization or $str.normalization !~~ Normalization::NFD) {
+ if (!$str.normalization or $str.normalization !~~ ParrotNormalization::NFD) {
# Everyone starts in fully decomposed form (NFD)
# This is code-heavy and we'll do it in ICU in Parrot.
# Here, we just pretend we've already done it.
Modified: branches/strings/pseudocode/Encodings.pm
==============================================================================
--- branches/strings/pseudocode/Encodings.pm (original)
+++ branches/strings/pseudocode/Encodings.pm Sat Jan 24 00:21:01 2009
@@ -1,5 +1,6 @@
class ParrotEncoding::Base::Fixed {
our $.width;
+ method setup($str) { }
method string_length($str) { return $str.strlen / $str.encoding.width; }
method string_char_iterate($str, $callback, $parameter) {
@@ -25,6 +26,7 @@
}
class ParrotEncoding::Base::Variable {
+ method setup($str) { }
method string_length($str) {
# This code written funny to be a bit more C-like
my $data = 0;
@@ -109,6 +111,19 @@
class ParrotEncoding::ParrotNative is ParrotEncoding::Base::Fixed {
our $.width = 1;
+ method setup($str) { $str.normalization = ParrotNormalization::NFG.new(); }
+ method append_grapheme ($str, $g) {
+ my $item;
+ if (@($g) > 1) {
+ $item = $str.normalization.get_grapheme_table_entry(@($g));
+ } else {
+ ($item) = @($g);
+ }
+ $str.buffer.push($item);
+ $str.bufused++;
+ $str.strlen++;
+ }
+
method string_char_iterate ($str, $callback, $parameter) {
for (0..$str.bufused-1) {
my $grapheme = grapheme_at_index($str, $_);
@@ -131,7 +146,7 @@
}
my $c = $str.buffer[$index];
if $c >= 0 { return [ $c ]; }
- return $str.charset.normalization.grapheme_table.[-$c];
+ return $str.normalization.grapheme_table.[-$c];
# We are allowed to be pally with the normalization internals
# because NFG is specific to ParrotEncoding.
}
Modified: branches/strings/pseudocode/ParrotString.pm
==============================================================================
--- branches/strings/pseudocode/ParrotString.pm (original)
+++ branches/strings/pseudocode/ParrotString.pm Sat Jan 24 00:21:01 2009
@@ -8,11 +8,12 @@
has $.hashval is rw;
has ParrotString::Encoding $.encoding is rw;
has ParrotString::Charset $.charset is rw;
- has ParrotString::Normalization $.normalization is rw;
+ has $.normalization is rw;
};
use Charsets;
use Encodings;
+use Normalizations;
## COW stuff
sub Parrot_string_new_COW($src) { ... }
@@ -42,6 +43,7 @@
my $news = ParrotString.new();
$news.charset = $charset;
$news.encoding = $encoding;
+ $news.encoding.setup($news);
$news.buffer = map { ord $_ }, split("", $s);
$news.bufused = $news.strlen = $len || $s.chars;
return $news;
@@ -81,7 +83,7 @@
if ($src.encoding ~~ $dst.encoding and $src.charset ~~ $dst.charset) {
return Parrot_string_append($src, $dst);
}
- my $append_to = sub ($g, $dst) { $dst.encoding.append_grapheme($src, $g) };
+ my $append_to = sub ($g, $dst) { $dst.encoding.append_grapheme($dst, $g) };
$src.encoding.string_grapheme_iterate($src, $append_to, $dst);
return $src;
}
Modified: branches/strings/pseudocode/t/create.t
==============================================================================
--- branches/strings/pseudocode/t/create.t (original)
+++ branches/strings/pseudocode/t/create.t Sat Jan 24 00:21:01 2009
@@ -1,6 +1,6 @@
use Test;
use ParrotString;
-plan 10;
+plan 11;
my $str = Parrot_string_new_init("flurble", 4, ParrotCharset::ASCII, ParrotEncoding::Byte);
ok($str.charset ~~ ParrotCharset::ASCII, "Charset set properly");
@@ -21,3 +21,4 @@
$str = Parrot_string_new_init("ABC \xd0\xb8\xcc\x8f", 8, ParrotCharset::Unicode, ParrotEncoding::UTF8);
my $str2 = Parrot_string_new_init("", 0, ParrotCharset::Unicode, ParrotEncoding::ParrotNative);
Parrot_string_grapheme_copy($str, $str2);
+is(Parrot_string_grapheme_length($str2), 5, "Four UTF8 bytes = one grapheme");
-
[svn:parrot] r35950 - in branches/strings/pseudocode: . t
by simon