Ignore newline after EOT with a flag
Change-Id: Ia18cc0cbb1dda6311c6b2b8db1fae52c4b6335e0
diff --git a/cmd/datok.go b/cmd/datok.go
index 9314a93..e31745a 100644
--- a/cmd/datok.go
+++ b/cmd/datok.go
@@ -17,9 +17,10 @@
DoubleArray bool `kong:"optional,short='d',help='Convert to Double Array instead of Matrix representation'"`
} `kong:"cmd, help='Convert a foma file to a Matrix or Double Array tokenizer'"`
Tokenize struct {
- Tokenizer string `kong:"required,short='t',help='The Matrix or Double Array Tokenizer file'"`
- Positions bool `kong:"optional,negatable,default=false,short='p',help='Print token offsets'"`
- Tokens bool `kong:"optional,negatable,default=true,help="Print token surfaces""`
+ Tokenizer string `kong:"required,short='t',help='The Matrix or Double Array Tokenizer file'"`
+ Positions bool `kong:"optional,negatable,default=false,short='p',help='Print token offsets'"`
+ Tokens bool `kong:"optional,negatable,default=true,help='Print token surfaces'"`
+ NewlineAfterEOT bool `kong:"optional,negatable,help='Ignore newline after EOT'"`
} `kong:"cmd, help='Tokenize a text'"`
}
@@ -70,7 +71,11 @@
}
// Create token writer based on the options defined
- tw := datok.NewTokenWriterFromOptions(os.Stdout, cli.Tokenize.Positions)
+ tw := datok.NewTokenWriterFromOptions(
+ os.Stdout,
+ cli.Tokenize.Positions,
+ cli.Tokenize.NewlineAfterEOT,
+ )
// Program is running in a pipe
fileInfo, _ := os.Stdin.Stat()