Support both matrix and da in the command
diff --git a/cmd/datok.go b/cmd/datok.go
index 22f4875..adff996 100644
--- a/cmd/datok.go
+++ b/cmd/datok.go
@@ -12,11 +12,12 @@
var cli struct {
Convert struct {
- Foma string `kong:"required,short='i',help='The Foma file'"`
- Tokenizer string `kong:"required,short='o',help='The Double Array Tokenizer file'"`
- } `kong:"cmd, help='Convert a foma file to a double array tokenizer'"`
+ Foma string `kong:"required,short='i',help='The Foma file'"`
+ Tokenizer string `kong:"required,short='o',help='The Tokenizer file'"`
+ DoubleArray bool `kong:"optional,short='d',help='Convert to Double Array instead of Matrix representation'"`
+ } `kong:"cmd, help='Convert a foma file to a Matrix or Double Array tokenizer'"`
Tokenize struct {
- Tokenizer string `kong:"required,short='t',help='The Double Array Tokenizer file'"`
+ Tokenizer string `kong:"required,short='t',help='The Matrix or Double Array Tokenizer file'"`
} `kong:"cmd, help='Tokenize a text'"`
}
@@ -27,7 +28,7 @@
parser := kong.Must(
&cli,
kong.Name("datok"),
- kong.Description("Double Array based tokenizer"),
+ kong.Description("FSA based tokenizer"),
kong.UsageOnError(),
)
@@ -40,27 +41,35 @@
if tok == nil {
log.Fatalln("Unable to load foma file")
}
- dat := tok.ToDoubleArray()
- _, err := dat.Save(cli.Convert.Tokenizer)
- if err != nil {
- log.Fatalln(err)
+ if cli.Convert.DoubleArray {
+ dat := tok.ToDoubleArray()
+ _, err := dat.Save(cli.Convert.Tokenizer)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ } else {
+ mat := tok.ToMatrix()
+ _, err := mat.Save(cli.Convert.Tokenizer)
+ if err != nil {
+ log.Fatalln(err)
+ }
}
fmt.Println("File successfully converted.")
os.Exit(0)
}
- // Load the Datok file
- dat := datok.LoadDatokFile(cli.Tokenize.Tokenizer)
+ // Load the Datok or Matrix file
+ dat := datok.LoadTokenizerFile(cli.Tokenize.Tokenizer)
// Unable to load the datok file
if dat == nil {
+ log.Fatalln("Unable to load file")
os.Exit(1)
}
// Program is running in a pipe
fileInfo, _ := os.Stdin.Stat()
if fileInfo.Mode()&os.ModeCharDevice == 0 {
-
// Transduce from STDIN and write to STDOUT
dat.Transduce(os.Stdin, os.Stdout)
}