Fix XML empty element handling
Change-Id: I80a1653685e221731f9be889b2794f3bc6a38cf2
diff --git a/cmd/datok.go b/cmd/datok.go
index 66bc7e9..d9a71f0 100644
--- a/cmd/datok.go
+++ b/cmd/datok.go
@@ -103,4 +103,6 @@
dat.TransduceTokenWriter(os.Stdin, tw)
tw.Flush()
}
+
+ fmt.Println("\n")
}
diff --git a/matrix_test.go b/matrix_test.go
index 1a3f7b0..f985736 100644
--- a/matrix_test.go
+++ b/matrix_test.go
@@ -873,6 +873,13 @@
assert.Equal("Mann", tokens[8])
assert.Equal(".", tokens[9])
assert.Equal(10, len(tokens))
+
+ tokens = ttokenize(mat, w, "das<br class=\"br\" />ging.")
+ assert.Equal("das", tokens[0])
+ assert.Equal("<br class=\"br\" />", tokens[1])
+ assert.Equal("ging", tokens[2])
+ assert.Equal(".", tokens[3])
+ assert.Equal(4, len(tokens))
}
func TestMatokDatokEquivalence(t *testing.T) {
diff --git a/src/tokenizer.xfst b/src/tokenizer.xfst
index 0d703a3..7b16fb8 100644
--- a/src/tokenizer.xfst
+++ b/src/tokenizer.xfst
@@ -147,6 +147,7 @@
[[%" [? - %" - %>]+ %"] | [%' [? - %' - %>]+ %']]
)
]*
+ (WS* "/")
]
|
[