Add gzip support

Change-Id: I91895689a2204d43672b5638a9a293807b437a59
diff --git a/service.go b/service.go
index 55f312e..2fac3c1 100644
--- a/service.go
+++ b/service.go
@@ -1,12 +1,14 @@
 package main
 
 import (
+	"compress/gzip"
 	"encoding/csv"
 	"encoding/json"
 	"io"
 	"log"
 	"net/http"
 	"os"
+	"path/filepath"
 
 	badger "github.com/dgraph-io/badger/v3"
 	"github.com/gin-gonic/gin"
@@ -48,29 +50,71 @@
 	}
 }
 
-func add(corpusID, docID, textID string, provider string, url string) error {
-	err := db.Update(func(txn *badger.Txn) error {
+func add(dbx *badger.DB, corpusID, docID, textID string, provider string, url string) error {
+	err := dbx.Update(func(txn *badger.Txn) error {
 		err := txn.Set([]byte(corpusID+"/"+docID+"/"+textID), []byte(provider+","+url))
 		return err
 	})
 	return err
 }
 
-func initDB(dir string) {
+func InitDB(dir string) {
 	if db != nil {
 		return
 	}
-	var err error
-	db, err = badger.Open(badger.DefaultOptions(dir))
+	db = initDB(dir)
+}
+
+func initDB(dir string) *badger.DB {
+	dbx, err := badger.Open(badger.DefaultOptions(dir))
 	if err != nil {
 		log.Fatal(err)
 	}
+	return dbx
 }
 
 func closeDB() {
 	db.Close()
 }
 
+func IndexDB(ri io.Reader) error {
+	return indexDB(ri, db)
+}
+
+// indexDB reads in a csv file and adds
+// information to the database
+func indexDB(ri io.Reader, dbx *badger.DB) error {
+
+	r := csv.NewReader(ri)
+
+	txn := dbx.NewTransaction(true)
+
+	i := 0
+
+	for {
+		record, err := r.Read()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			log.Fatal(err)
+		}
+
+		if err := txn.Set([]byte(record[0]), []byte(record[1]+","+record[2])); err == badger.ErrTxnTooBig {
+			log.Println("Commit", record[0], "after", i, "inserts")
+			i = 0
+			err = txn.Commit()
+			if err != nil {
+				log.Fatal("Unable to commit")
+			}
+			txn = db.NewTransaction(true)
+			_ = txn.Set([]byte(record[0]), []byte(record[1]+","+record[2]))
+		}
+		i++
+	}
+	return txn.Commit()
+}
+
 func setupRouter() *gin.Engine {
 	r := gin.Default()
 	r.LoadHTMLGlob("templates/*")
@@ -140,7 +184,7 @@
 		log.Println(".env file not loaded.")
 	}
 
-	initDB("db")
+	InitDB("db")
 	defer closeDB()
 
 	// Index csv file
@@ -150,40 +194,24 @@
 		if err != nil {
 			log.Fatal(err)
 		}
-		r := csv.NewReader(file)
 
-		txn := db.NewTransaction(true)
+		fileExt := filepath.Ext(os.Args[1])
 
-		i := 0
-
-		for {
-			record, err := r.Read()
-			if err == io.EOF {
-				break
-			}
+		if fileExt == ".gz" || fileExt == ".csvz" {
+			var gzipr io.Reader
+			gzipr, err = gzip.NewReader(file)
 			if err != nil {
-				log.Fatal(err)
+				log.Fatal("Unable to open gzip file")
+			} else {
+				err = IndexDB(gzipr)
 			}
-
-			if err := txn.Set([]byte(record[0]), []byte(record[1]+","+record[2])); err == badger.ErrTxnTooBig {
-				log.Println("Commit", record[0], "after", i, "inserts")
-				i = 0
-				err = txn.Commit()
-				if err != nil {
-					log.Fatal("Unable to commit")
-				}
-				txn = db.NewTransaction(true)
-				_ = txn.Set([]byte(record[0]), []byte(record[1]+","+record[2]))
-			}
-			i++
+		} else {
+			err = IndexDB(file)
 		}
-		err = txn.Commit()
 
 		if err != nil {
 			log.Fatal("Unable to commit")
 		}
-
-		return
 	}
 	r := setupRouter()