Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 1 | package main |
| 2 | |
| 3 | import ( |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame^] | 4 | "compress/gzip" |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 5 | "encoding/csv" |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 6 | "encoding/json" |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 7 | "io" |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 8 | "log" |
| 9 | "net/http" |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 10 | "os" |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame^] | 11 | "path/filepath" |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 12 | |
| 13 | badger "github.com/dgraph-io/badger/v3" |
| 14 | "github.com/gin-gonic/gin" |
Akron | ce286af | 2022-12-28 13:58:26 +0100 | [diff] [blame] | 15 | "github.com/joho/godotenv" |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 16 | "github.com/mattn/go-jsonpointer" |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 17 | ) |
| 18 | |
| 19 | var db *badger.DB |
| 20 | |
| 21 | func CheckSaleUrl(c *gin.Context) { |
| 22 | |
| 23 | corpusID := c.Param("corpus_id") |
| 24 | docID := c.Param("doc_id") |
| 25 | textID := c.Param("text_id") |
| 26 | |
| 27 | err := db.View(func(txn *badger.Txn) error { |
| 28 | |
| 29 | item, err := txn.Get([]byte(corpusID + "/" + docID + "/" + textID)) |
| 30 | |
| 31 | if err != nil { |
| 32 | c.String(http.StatusNotFound, "No entry found") |
Akron | 906a2c5 | 2022-12-21 11:35:28 +0100 | [diff] [blame] | 33 | return nil |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 34 | } |
| 35 | |
| 36 | err = item.Value(func(v []byte) error { |
| 37 | c.String(http.StatusOK, string(v)) |
| 38 | return nil |
| 39 | }) |
| 40 | |
| 41 | if err != nil { |
Akron | ce5186d | 2022-12-21 11:15:50 +0100 | [diff] [blame] | 42 | return err |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 43 | } |
| 44 | |
| 45 | return nil |
| 46 | }) |
| 47 | |
| 48 | if err != nil { |
| 49 | c.String(http.StatusNotFound, err.Error()) |
| 50 | } |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 51 | } |
| 52 | |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame^] | 53 | func add(dbx *badger.DB, corpusID, docID, textID string, provider string, url string) error { |
| 54 | err := dbx.Update(func(txn *badger.Txn) error { |
Akron | 6aee7fe | 2022-12-20 16:00:54 +0100 | [diff] [blame] | 55 | err := txn.Set([]byte(corpusID+"/"+docID+"/"+textID), []byte(provider+","+url)) |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 56 | return err |
| 57 | }) |
| 58 | return err |
| 59 | } |
| 60 | |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame^] | 61 | func InitDB(dir string) { |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 62 | if db != nil { |
| 63 | return |
| 64 | } |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame^] | 65 | db = initDB(dir) |
| 66 | } |
| 67 | |
| 68 | func initDB(dir string) *badger.DB { |
| 69 | dbx, err := badger.Open(badger.DefaultOptions(dir)) |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 70 | if err != nil { |
| 71 | log.Fatal(err) |
| 72 | } |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame^] | 73 | return dbx |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 74 | } |
| 75 | |
| 76 | func closeDB() { |
| 77 | db.Close() |
| 78 | } |
| 79 | |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame^] | 80 | func IndexDB(ri io.Reader) error { |
| 81 | return indexDB(ri, db) |
| 82 | } |
| 83 | |
| 84 | // indexDB reads in a csv file and adds |
| 85 | // information to the database |
| 86 | func indexDB(ri io.Reader, dbx *badger.DB) error { |
| 87 | |
| 88 | r := csv.NewReader(ri) |
| 89 | |
| 90 | txn := dbx.NewTransaction(true) |
| 91 | |
| 92 | i := 0 |
| 93 | |
| 94 | for { |
| 95 | record, err := r.Read() |
| 96 | if err == io.EOF { |
| 97 | break |
| 98 | } |
| 99 | if err != nil { |
| 100 | log.Fatal(err) |
| 101 | } |
| 102 | |
| 103 | if err := txn.Set([]byte(record[0]), []byte(record[1]+","+record[2])); err == badger.ErrTxnTooBig { |
| 104 | log.Println("Commit", record[0], "after", i, "inserts") |
| 105 | i = 0 |
| 106 | err = txn.Commit() |
| 107 | if err != nil { |
| 108 | log.Fatal("Unable to commit") |
| 109 | } |
| 110 | txn = db.NewTransaction(true) |
| 111 | _ = txn.Set([]byte(record[0]), []byte(record[1]+","+record[2])) |
| 112 | } |
| 113 | i++ |
| 114 | } |
| 115 | return txn.Commit() |
| 116 | } |
| 117 | |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 118 | func setupRouter() *gin.Engine { |
| 119 | r := gin.Default() |
Akron | 5e1252e | 2022-12-19 17:57:56 +0100 | [diff] [blame] | 120 | r.LoadHTMLGlob("templates/*") |
| 121 | |
Akron | ce286af | 2022-12-28 13:58:26 +0100 | [diff] [blame] | 122 | korapServer := os.Getenv("KORAP_SERVER") |
| 123 | if korapServer == "" { |
| 124 | korapServer = "https://korap.ids-mannheim.de" |
| 125 | } |
| 126 | |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 127 | var pluginManifest map[string]any |
| 128 | json.Unmarshal([]byte(`{ |
Akron | de33179 | 2023-01-10 11:36:15 +0100 | [diff] [blame] | 129 | "name" : "External Resources", |
| 130 | "desc" : "Retrieve content from an external provider", |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 131 | "embed" : [{ |
| 132 | "panel" : "match", |
| 133 | "title" : "Full Text", |
| 134 | "classes" : ["plugin", "cart"], |
| 135 | "icon" : "\f07a", |
| 136 | "onClick" : { |
| 137 | "action" : "addWidget", |
| 138 | "template":"", |
| 139 | "permissions": [ |
| 140 | "scripts", |
| 141 | "popups" |
| 142 | ] |
| 143 | } |
| 144 | }] |
| 145 | }`), &pluginManifest) |
| 146 | |
Akron | de33179 | 2023-01-10 11:36:15 +0100 | [diff] [blame] | 147 | externalResources := os.Getenv("KORAP_EXTERNAL_RESOURCES") |
| 148 | if externalResources == "" { |
| 149 | externalResources = "https://korap.ids-mannheim.de/plugin/external/" |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 150 | } |
Akron | de33179 | 2023-01-10 11:36:15 +0100 | [diff] [blame] | 151 | jsonpointer.Set(pluginManifest, "/embed/0/onClick/template", externalResources) |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 152 | |
Akron | 79d8348 | 2023-01-02 10:53:24 +0100 | [diff] [blame] | 153 | r.Use(func() gin.HandlerFunc { |
| 154 | return func(c *gin.Context) { |
| 155 | h := c.Writer.Header() |
| 156 | h.Set("Access-Control-Allow-Origin", "null") |
| 157 | h.Set("Access-Control-Allow-Credentials", "null") |
| 158 | h.Set("Vary", "Origin") |
| 159 | } |
| 160 | }(), |
| 161 | ) |
| 162 | |
Akron | 60e1160 | 2023-01-02 11:16:43 +0100 | [diff] [blame] | 163 | // Return widget page |
Akron | 6aee7fe | 2022-12-20 16:00:54 +0100 | [diff] [blame] | 164 | r.GET("/", func(c *gin.Context) { |
Akron | 5e1252e | 2022-12-19 17:57:56 +0100 | [diff] [blame] | 165 | c.HTML(http.StatusOK, "main.html", gin.H{ |
Akron | ce286af | 2022-12-28 13:58:26 +0100 | [diff] [blame] | 166 | "korapServer": korapServer, |
Akron | 5e1252e | 2022-12-19 17:57:56 +0100 | [diff] [blame] | 167 | }) |
| 168 | }) |
| 169 | |
Akron | de33179 | 2023-01-10 11:36:15 +0100 | [diff] [blame] | 170 | // Return resource information |
Akron | ce5186d | 2022-12-21 11:15:50 +0100 | [diff] [blame] | 171 | r.HEAD("/:corpus_id/:doc_id/:text_id", CheckSaleUrl) |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 172 | r.GET("/:corpus_id/:doc_id/:text_id", CheckSaleUrl) |
Akron | 60e1160 | 2023-01-02 11:16:43 +0100 | [diff] [blame] | 173 | |
| 174 | // Return plugin manifest |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 175 | r.GET("/plugin.json", func(c *gin.Context) { |
| 176 | c.JSON(200, pluginManifest) |
| 177 | }) |
| 178 | |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 179 | return r |
| 180 | } |
| 181 | |
| 182 | func main() { |
Akron | 3c58d0d | 2023-01-02 13:35:41 +0100 | [diff] [blame] | 183 | if godotenv.Load() != nil { |
| 184 | log.Println(".env file not loaded.") |
| 185 | } |
Akron | ce286af | 2022-12-28 13:58:26 +0100 | [diff] [blame] | 186 | |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame^] | 187 | InitDB("db") |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 188 | defer closeDB() |
| 189 | |
| 190 | // Index csv file |
| 191 | if len(os.Args) > 1 { |
| 192 | |
| 193 | file, err := os.Open(os.Args[1]) |
| 194 | if err != nil { |
| 195 | log.Fatal(err) |
| 196 | } |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 197 | |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame^] | 198 | fileExt := filepath.Ext(os.Args[1]) |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 199 | |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame^] | 200 | if fileExt == ".gz" || fileExt == ".csvz" { |
| 201 | var gzipr io.Reader |
| 202 | gzipr, err = gzip.NewReader(file) |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 203 | if err != nil { |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame^] | 204 | log.Fatal("Unable to open gzip file") |
| 205 | } else { |
| 206 | err = IndexDB(gzipr) |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 207 | } |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame^] | 208 | } else { |
| 209 | err = IndexDB(file) |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 210 | } |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 211 | |
| 212 | if err != nil { |
| 213 | log.Fatal("Unable to commit") |
| 214 | } |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 215 | } |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 216 | r := setupRouter() |
Akron | ce286af | 2022-12-28 13:58:26 +0100 | [diff] [blame] | 217 | |
Akron | de33179 | 2023-01-10 11:36:15 +0100 | [diff] [blame] | 218 | port := os.Getenv("KORAP_EXTERNAL_RESOURCES_PORT") |
Akron | ce286af | 2022-12-28 13:58:26 +0100 | [diff] [blame] | 219 | if port == "" { |
| 220 | port = "5722" |
| 221 | } |
| 222 | |
| 223 | log.Fatal(http.ListenAndServe(":"+port, r)) |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 224 | } |