Akron | 8e1d69b | 2021-08-12 17:38:49 +0200 | [diff] [blame] | 1 | package main |
| 2 | |
| 3 | import ( |
Akron | 7e269d4 | 2021-08-12 23:18:05 +0200 | [diff] [blame^] | 4 | "fmt" |
Akron | 8e1d69b | 2021-08-12 17:38:49 +0200 | [diff] [blame] | 5 | "os" |
| 6 | |
| 7 | datok "github.com/KorAP/datokenizer" |
| 8 | "github.com/alecthomas/kong" |
Akron | 7e269d4 | 2021-08-12 23:18:05 +0200 | [diff] [blame^] | 9 | "github.com/rs/zerolog/log" |
Akron | 8e1d69b | 2021-08-12 17:38:49 +0200 | [diff] [blame] | 10 | ) |
| 11 | |
| 12 | var cli struct { |
Akron | 7e269d4 | 2021-08-12 23:18:05 +0200 | [diff] [blame^] | 13 | Convert struct { |
| 14 | Foma string `kong:"required,short='i',help='The Foma file'"` |
| 15 | Tokenizer string `kong:"required,short='o',help='The Double Array Tokenizer file'"` |
| 16 | } `kong:"cmd, help='Convert a foma file to a double array tokenizer'"` |
| 17 | Tokenize struct { |
| 18 | Tokenizer string `kong:"required,short='t',help='The Double Array Tokenizer file'"` |
| 19 | } `kong:"cmd, help='Tokenize a text'"` |
Akron | 8e1d69b | 2021-08-12 17:38:49 +0200 | [diff] [blame] | 20 | } |
| 21 | |
| 22 | // Main method for command line handling |
| 23 | func main() { |
| 24 | |
| 25 | // Parse command line parameters |
| 26 | parser := kong.Must( |
| 27 | &cli, |
| 28 | kong.Name("datok"), |
| 29 | kong.Description("Double Array based tokenizer"), |
| 30 | kong.UsageOnError(), |
| 31 | ) |
| 32 | |
Akron | 7e269d4 | 2021-08-12 23:18:05 +0200 | [diff] [blame^] | 33 | ctx, err := parser.Parse(os.Args[1:]) |
Akron | 8e1d69b | 2021-08-12 17:38:49 +0200 | [diff] [blame] | 34 | |
| 35 | parser.FatalIfErrorf(err) |
| 36 | |
Akron | 7e269d4 | 2021-08-12 23:18:05 +0200 | [diff] [blame^] | 37 | if ctx.Command() == "convert" { |
| 38 | tok := datok.LoadFomaFile(cli.Convert.Foma) |
| 39 | if tok == nil { |
| 40 | log.Error().Msg("Unable to load foma file") |
| 41 | os.Exit(1) |
| 42 | } |
| 43 | dat := tok.ToDoubleArray() |
| 44 | _, err := dat.Save(cli.Convert.Tokenizer) |
| 45 | if err != nil { |
| 46 | log.Error().Err(err) |
| 47 | os.Exit(1) |
| 48 | } |
| 49 | fmt.Println("File successfully converted.") |
| 50 | os.Exit(0) |
| 51 | } |
| 52 | |
Akron | 8e1d69b | 2021-08-12 17:38:49 +0200 | [diff] [blame] | 53 | // Load the Datok file |
Akron | 7e269d4 | 2021-08-12 23:18:05 +0200 | [diff] [blame^] | 54 | dat := datok.LoadDatokFile(cli.Tokenize.Tokenizer) |
Akron | 8e1d69b | 2021-08-12 17:38:49 +0200 | [diff] [blame] | 55 | |
| 56 | // Unable to load the datok file |
| 57 | if dat == nil { |
| 58 | os.Exit(1) |
| 59 | } |
| 60 | |
| 61 | // Program is running in a pipe |
| 62 | fileInfo, _ := os.Stdin.Stat() |
| 63 | if fileInfo.Mode()&os.ModeCharDevice == 0 { |
| 64 | |
| 65 | // Transduce from STDIN and write to STDOUT |
| 66 | dat.Transduce(os.Stdin, os.Stdout) |
| 67 | } |
| 68 | } |