blob: 489808b4ff9e015920ab072cee36ed51439e4b09 [file] [log] [blame]
Akron8e1d69b2021-08-12 17:38:49 +02001package main
2
3import (
Akron7e269d42021-08-12 23:18:05 +02004 "fmt"
Akron8e1d69b2021-08-12 17:38:49 +02005 "os"
6
7 datok "github.com/KorAP/datokenizer"
8 "github.com/alecthomas/kong"
Akron7e269d42021-08-12 23:18:05 +02009 "github.com/rs/zerolog/log"
Akron8e1d69b2021-08-12 17:38:49 +020010)
11
12var cli struct {
Akron7e269d42021-08-12 23:18:05 +020013 Convert struct {
14 Foma string `kong:"required,short='i',help='The Foma file'"`
15 Tokenizer string `kong:"required,short='o',help='The Double Array Tokenizer file'"`
16 } `kong:"cmd, help='Convert a foma file to a double array tokenizer'"`
17 Tokenize struct {
18 Tokenizer string `kong:"required,short='t',help='The Double Array Tokenizer file'"`
19 } `kong:"cmd, help='Tokenize a text'"`
Akron8e1d69b2021-08-12 17:38:49 +020020}
21
22// Main method for command line handling
23func main() {
24
25 // Parse command line parameters
26 parser := kong.Must(
27 &cli,
28 kong.Name("datok"),
29 kong.Description("Double Array based tokenizer"),
30 kong.UsageOnError(),
31 )
32
Akron7e269d42021-08-12 23:18:05 +020033 ctx, err := parser.Parse(os.Args[1:])
Akron8e1d69b2021-08-12 17:38:49 +020034
35 parser.FatalIfErrorf(err)
36
Akron7e269d42021-08-12 23:18:05 +020037 if ctx.Command() == "convert" {
38 tok := datok.LoadFomaFile(cli.Convert.Foma)
39 if tok == nil {
40 log.Error().Msg("Unable to load foma file")
41 os.Exit(1)
42 }
43 dat := tok.ToDoubleArray()
44 _, err := dat.Save(cli.Convert.Tokenizer)
45 if err != nil {
46 log.Error().Err(err)
47 os.Exit(1)
48 }
49 fmt.Println("File successfully converted.")
50 os.Exit(0)
51 }
52
Akron8e1d69b2021-08-12 17:38:49 +020053 // Load the Datok file
Akron7e269d42021-08-12 23:18:05 +020054 dat := datok.LoadDatokFile(cli.Tokenize.Tokenizer)
Akron8e1d69b2021-08-12 17:38:49 +020055
56 // Unable to load the datok file
57 if dat == nil {
58 os.Exit(1)
59 }
60
61 // Program is running in a pipe
62 fileInfo, _ := os.Stdin.Stat()
63 if fileInfo.Mode()&os.ModeCharDevice == 0 {
64
65 // Transduce from STDIN and write to STDOUT
66 dat.Transduce(os.Stdin, os.Stdout)
67 }
68}