Introduce dash flag for STDIN and input file handling for tokenization

Change-Id: Iacc1f2ca69ed36f0f20730717a3a2b128dcdad88
diff --git a/Readme.md b/Readme.md
index 0e04e83..ed50d7e 100644
--- a/Readme.md
+++ b/Readme.md
@@ -10,7 +10,7 @@
 ## Tokenizing
 
 ```shell
-$ echo "Es war spät, schon ca. 2 Uhr. ;-)" | datok tokenize -t testdata/tokenizer.matok 
+$ echo "Es war spät, schon ca. 2 Uhr. ;-)" | datok tokenize -t testdata/tokenizer.matok -
 Es
 war
 spät
diff --git a/cmd/datok.go b/cmd/datok.go
index 66bc7e9..e2a4efb 100644
--- a/cmd/datok.go
+++ b/cmd/datok.go
@@ -2,6 +2,7 @@
 
 import (
 	"fmt"
+	"io"
 	"os"
 
 	"log"
@@ -12,12 +13,13 @@
 
 var cli struct {
 	Convert struct {
-		Foma        string `kong:"required,short='i',help='The Foma file'"`
+		Foma        string `kong:"required,short='i',help='The Foma FST file'"`
 		Tokenizer   string `kong:"required,short='o',help='The Tokenizer file'"`
 		DoubleArray bool   `kong:"optional,short='d',help='Convert to Double Array instead of Matrix representation'"`
 	} `kong:"cmd, help='Convert a foma file to a Matrix or Double Array tokenizer'"`
 	Tokenize struct {
 		Tokenizer         string `kong:"required,short='t',help='The Matrix or Double Array Tokenizer file'"`
+		Input             string `kong:"required,arg='',type='existingfile',help='Input file to tokenize (use - for STDIN)'"`
 		Tokens            bool   `kong:"optional,negatable,default=true,help='Print token surfaces (defaults to ${default})'"`
 		Sentences         bool   `kong:"optional,negatable,default=true,help='Print sentence boundaries (defaults to ${default})'"`
 		TokenPositions    bool   `kong:"optional,default=false,short='p',help='Print token offsets (defaults to ${default})'"`
@@ -97,10 +99,29 @@
 	// Create token writer based on the options defined
 	tw := datok.NewTokenWriter(os.Stdout, flags)
 
+	var r io.Reader
+
 	// Program is running in a pipe
-	fileInfo, _ := os.Stdin.Stat()
-	if fileInfo.Mode()&os.ModeCharDevice == 0 {
-		dat.TransduceTokenWriter(os.Stdin, tw)
-		tw.Flush()
+	if cli.Tokenize.Input == "-" {
+		fileInfo, _ := os.Stdin.Stat()
+		if fileInfo.Mode()&os.ModeCharDevice == 0 {
+			r = os.Stdin
+		} else {
+			log.Fatalln("Unable to read from STDIN")
+			os.Exit(1)
+			return
+		}
+	} else {
+		f, err := os.Open(cli.Tokenize.Input)
+		if err != nil {
+			log.Fatalln(err)
+			os.Exit(1)
+			return
+		}
+		defer f.Close()
+		r = f
 	}
+
+	dat.TransduceTokenWriter(r, tw)
+	tw.Flush()
 }