Add minor rules for XML support
Change-Id: I72baac5afc04f849a7c464a0e9b292e9d40ee2b4
diff --git a/datok_test.go b/datok_test.go
index 9558661..8562a98 100644
--- a/datok_test.go
+++ b/datok_test.go
@@ -213,7 +213,7 @@
dat = LoadDatokFile("testdata/tokenizer.datok")
}
assert.NotNil(dat)
- assert.True(dat.LoadFactor() >= 70)
+ assert.True(dat.LoadFactor() >= 60)
assert.Equal(dat.epsilon, 1)
assert.Equal(dat.unknown, 2)
assert.Equal(dat.identity, 3)
diff --git a/matrix_test.go b/matrix_test.go
index adce916..d650b52 100644
--- a/matrix_test.go
+++ b/matrix_test.go
@@ -1174,6 +1174,22 @@
assert.Equal("ging", tokens[2])
assert.Equal(".", tokens[3])
assert.Equal(4, len(tokens))
+
+ tokens = ttokenize(mat, w, "das <?robot xgh ?> <!-- hm hm --> <![CDATA[ cdata ]]> <br />")
+ assert.Equal("das", tokens[0])
+ assert.Equal("<?robot", tokens[1])
+ assert.Equal("xgh", tokens[2])
+ assert.Equal("?>", tokens[3])
+ assert.Equal("<!--", tokens[4])
+ assert.Equal("hm", tokens[5])
+ assert.Equal("hm", tokens[6])
+ assert.Equal("-->", tokens[7])
+ assert.Equal("<![CDATA[", tokens[8])
+ assert.Equal("cdata", tokens[9])
+ assert.Equal("]]>", tokens[10])
+ assert.Equal("<br />", tokens[11])
+ assert.Equal(12, len(tokens))
+
}
func TestMatokDatokEquivalence(t *testing.T) {
diff --git a/src/all/xml.xfst b/src/all/xml.xfst
index 06e247d..4526117 100644
--- a/src/all/xml.xfst
+++ b/src/all/xml.xfst
@@ -1,6 +1,11 @@
! XML rule
define XMLns [AsciiLetter [AsciiLetter|Digit|%-]* (%: AsciiLetter [AsciiLetter|Digit|%-]*)] .o. Caseinsensitive;
-define XML [
+
+define XMLcomment [ %< %! %- %- | %- %- %> ];
+define XMLpi [ %< %? AsciiLetter [AsciiLetter | Digit | %- ]* | %? %> ];
+define CDATA [ %< %! %[ {CDATA} %[ | %] %] %> ];
+
+define XML [[
"<" [
[
XMLns
@@ -16,4 +21,4 @@
"/" XMLns
]
] WS* ">"
-].u;
+].u | XMLcomment | XMLpi | CDATA ];
\ No newline at end of file