blob: adff996ee0e691b3ca230e63bad2e6debf57e771 [file] [log] [blame]
Akron8e1d69b2021-08-12 17:38:49 +02001package main
2
3import (
Akron7e269d42021-08-12 23:18:05 +02004 "fmt"
Akron8e1d69b2021-08-12 17:38:49 +02005 "os"
6
Akron527c10c2021-08-13 01:45:18 +02007 "log"
8
Akron7f1097f2021-09-21 16:00:29 +02009 datok "github.com/KorAP/datok"
Akron8e1d69b2021-08-12 17:38:49 +020010 "github.com/alecthomas/kong"
11)
12
13var cli struct {
Akron7e269d42021-08-12 23:18:05 +020014 Convert struct {
Akron941f2152021-09-26 15:14:25 +020015 Foma string `kong:"required,short='i',help='The Foma file'"`
16 Tokenizer string `kong:"required,short='o',help='The Tokenizer file'"`
17 DoubleArray bool `kong:"optional,short='d',help='Convert to Double Array instead of Matrix representation'"`
18 } `kong:"cmd, help='Convert a foma file to a Matrix or Double Array tokenizer'"`
Akron7e269d42021-08-12 23:18:05 +020019 Tokenize struct {
Akron941f2152021-09-26 15:14:25 +020020 Tokenizer string `kong:"required,short='t',help='The Matrix or Double Array Tokenizer file'"`
Akron7e269d42021-08-12 23:18:05 +020021 } `kong:"cmd, help='Tokenize a text'"`
Akron8e1d69b2021-08-12 17:38:49 +020022}
23
24// Main method for command line handling
25func main() {
26
27 // Parse command line parameters
28 parser := kong.Must(
29 &cli,
30 kong.Name("datok"),
Akron941f2152021-09-26 15:14:25 +020031 kong.Description("FSA based tokenizer"),
Akron8e1d69b2021-08-12 17:38:49 +020032 kong.UsageOnError(),
33 )
34
Akron7e269d42021-08-12 23:18:05 +020035 ctx, err := parser.Parse(os.Args[1:])
Akron8e1d69b2021-08-12 17:38:49 +020036
37 parser.FatalIfErrorf(err)
38
Akron7e269d42021-08-12 23:18:05 +020039 if ctx.Command() == "convert" {
40 tok := datok.LoadFomaFile(cli.Convert.Foma)
41 if tok == nil {
Akron527c10c2021-08-13 01:45:18 +020042 log.Fatalln("Unable to load foma file")
Akron7e269d42021-08-12 23:18:05 +020043 }
Akron941f2152021-09-26 15:14:25 +020044 if cli.Convert.DoubleArray {
45 dat := tok.ToDoubleArray()
46 _, err := dat.Save(cli.Convert.Tokenizer)
47 if err != nil {
48 log.Fatalln(err)
49 }
50 } else {
51 mat := tok.ToMatrix()
52 _, err := mat.Save(cli.Convert.Tokenizer)
53 if err != nil {
54 log.Fatalln(err)
55 }
Akron7e269d42021-08-12 23:18:05 +020056 }
57 fmt.Println("File successfully converted.")
58 os.Exit(0)
59 }
60
Akron941f2152021-09-26 15:14:25 +020061 // Load the Datok or Matrix file
62 dat := datok.LoadTokenizerFile(cli.Tokenize.Tokenizer)
Akron8e1d69b2021-08-12 17:38:49 +020063
64 // Unable to load the datok file
65 if dat == nil {
Akron941f2152021-09-26 15:14:25 +020066 log.Fatalln("Unable to load file")
Akron8e1d69b2021-08-12 17:38:49 +020067 os.Exit(1)
68 }
69
70 // Program is running in a pipe
71 fileInfo, _ := os.Stdin.Stat()
72 if fileInfo.Mode()&os.ModeCharDevice == 0 {
Akron8e1d69b2021-08-12 17:38:49 +020073 // Transduce from STDIN and write to STDOUT
74 dat.Transduce(os.Stdin, os.Stdout)
75 }
76}