Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 1 | package main |
| 2 | |
| 3 | import ( |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame] | 4 | "compress/gzip" |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 5 | "encoding/csv" |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 6 | "encoding/json" |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 7 | "io" |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 8 | "log" |
| 9 | "net/http" |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 10 | "os" |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame] | 11 | "path/filepath" |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 12 | |
| 13 | badger "github.com/dgraph-io/badger/v3" |
Akron | 83b77a7 | 2023-03-17 13:37:06 +0100 | [diff] [blame] | 14 | ginI18n "github.com/gin-contrib/i18n" |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 15 | "github.com/gin-gonic/gin" |
Akron | ce286af | 2022-12-28 13:58:26 +0100 | [diff] [blame] | 16 | "github.com/joho/godotenv" |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 17 | "github.com/mattn/go-jsonpointer" |
Akron | 83b77a7 | 2023-03-17 13:37:06 +0100 | [diff] [blame] | 18 | "golang.org/x/text/language" |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 19 | ) |
| 20 | |
| 21 | var db *badger.DB |
| 22 | |
| 23 | func CheckSaleUrl(c *gin.Context) { |
| 24 | |
| 25 | corpusID := c.Param("corpus_id") |
| 26 | docID := c.Param("doc_id") |
| 27 | textID := c.Param("text_id") |
| 28 | |
| 29 | err := db.View(func(txn *badger.Txn) error { |
| 30 | |
| 31 | item, err := txn.Get([]byte(corpusID + "/" + docID + "/" + textID)) |
| 32 | |
| 33 | if err != nil { |
| 34 | c.String(http.StatusNotFound, "No entry found") |
Akron | 906a2c5 | 2022-12-21 11:35:28 +0100 | [diff] [blame] | 35 | return nil |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 36 | } |
| 37 | |
| 38 | err = item.Value(func(v []byte) error { |
| 39 | c.String(http.StatusOK, string(v)) |
| 40 | return nil |
| 41 | }) |
| 42 | |
| 43 | if err != nil { |
Akron | ce5186d | 2022-12-21 11:15:50 +0100 | [diff] [blame] | 44 | return err |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 45 | } |
| 46 | |
| 47 | return nil |
| 48 | }) |
| 49 | |
| 50 | if err != nil { |
| 51 | c.String(http.StatusNotFound, err.Error()) |
| 52 | } |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 53 | } |
| 54 | |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame] | 55 | func add(dbx *badger.DB, corpusID, docID, textID string, provider string, url string) error { |
| 56 | err := dbx.Update(func(txn *badger.Txn) error { |
Akron | 6aee7fe | 2022-12-20 16:00:54 +0100 | [diff] [blame] | 57 | err := txn.Set([]byte(corpusID+"/"+docID+"/"+textID), []byte(provider+","+url)) |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 58 | return err |
| 59 | }) |
| 60 | return err |
| 61 | } |
| 62 | |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame] | 63 | func InitDB(dir string) { |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 64 | if db != nil { |
| 65 | return |
| 66 | } |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame] | 67 | db = initDB(dir) |
| 68 | } |
| 69 | |
| 70 | func initDB(dir string) *badger.DB { |
| 71 | dbx, err := badger.Open(badger.DefaultOptions(dir)) |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 72 | if err != nil { |
| 73 | log.Fatal(err) |
| 74 | } |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame] | 75 | return dbx |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 76 | } |
| 77 | |
| 78 | func closeDB() { |
| 79 | db.Close() |
| 80 | } |
| 81 | |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame] | 82 | func IndexDB(ri io.Reader) error { |
| 83 | return indexDB(ri, db) |
| 84 | } |
| 85 | |
| 86 | // indexDB reads in a csv file and adds |
| 87 | // information to the database |
| 88 | func indexDB(ri io.Reader, dbx *badger.DB) error { |
| 89 | |
| 90 | r := csv.NewReader(ri) |
| 91 | |
| 92 | txn := dbx.NewTransaction(true) |
| 93 | |
| 94 | i := 0 |
| 95 | |
| 96 | for { |
| 97 | record, err := r.Read() |
| 98 | if err == io.EOF { |
| 99 | break |
| 100 | } |
| 101 | if err != nil { |
Akron | f4d6add | 2024-11-14 15:36:14 +0100 | [diff] [blame] | 102 | log.Println(err) |
| 103 | continue; |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame] | 104 | } |
| 105 | |
| 106 | if err := txn.Set([]byte(record[0]), []byte(record[1]+","+record[2])); err == badger.ErrTxnTooBig { |
| 107 | log.Println("Commit", record[0], "after", i, "inserts") |
| 108 | i = 0 |
| 109 | err = txn.Commit() |
| 110 | if err != nil { |
| 111 | log.Fatal("Unable to commit") |
| 112 | } |
| 113 | txn = db.NewTransaction(true) |
| 114 | _ = txn.Set([]byte(record[0]), []byte(record[1]+","+record[2])) |
| 115 | } |
| 116 | i++ |
| 117 | } |
| 118 | return txn.Commit() |
| 119 | } |
| 120 | |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 121 | func setupRouter() *gin.Engine { |
Akron | 83b77a7 | 2023-03-17 13:37:06 +0100 | [diff] [blame] | 122 | gin.SetMode(gin.ReleaseMode) |
| 123 | |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 124 | r := gin.Default() |
Akron | 83b77a7 | 2023-03-17 13:37:06 +0100 | [diff] [blame] | 125 | |
| 126 | // apply i18n middleware |
| 127 | r.Use(ginI18n.Localize(ginI18n.WithBundle(&ginI18n.BundleCfg{ |
| 128 | RootPath: "./i18n", |
| 129 | AcceptLanguage: []language.Tag{language.German, language.English}, |
| 130 | DefaultLanguage: language.English, |
| 131 | UnmarshalFunc: json.Unmarshal, |
| 132 | FormatBundleFile: "json", |
| 133 | }))) |
| 134 | |
Akron | 5e1252e | 2022-12-19 17:57:56 +0100 | [diff] [blame] | 135 | r.LoadHTMLGlob("templates/*") |
| 136 | |
Akron | ce286af | 2022-12-28 13:58:26 +0100 | [diff] [blame] | 137 | korapServer := os.Getenv("KORAP_SERVER") |
| 138 | if korapServer == "" { |
| 139 | korapServer = "https://korap.ids-mannheim.de" |
| 140 | } |
| 141 | |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 142 | var pluginManifest map[string]any |
| 143 | json.Unmarshal([]byte(`{ |
Akron | de33179 | 2023-01-10 11:36:15 +0100 | [diff] [blame] | 144 | "name" : "External Resources", |
| 145 | "desc" : "Retrieve content from an external provider", |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 146 | "embed" : [{ |
| 147 | "panel" : "match", |
| 148 | "title" : "Full Text", |
| 149 | "classes" : ["plugin", "cart"], |
| 150 | "icon" : "\f07a", |
| 151 | "onClick" : { |
| 152 | "action" : "addWidget", |
| 153 | "template":"", |
| 154 | "permissions": [ |
| 155 | "scripts", |
| 156 | "popups" |
| 157 | ] |
| 158 | } |
| 159 | }] |
| 160 | }`), &pluginManifest) |
| 161 | |
Akron | de33179 | 2023-01-10 11:36:15 +0100 | [diff] [blame] | 162 | externalResources := os.Getenv("KORAP_EXTERNAL_RESOURCES") |
| 163 | if externalResources == "" { |
| 164 | externalResources = "https://korap.ids-mannheim.de/plugin/external/" |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 165 | } |
Akron | de33179 | 2023-01-10 11:36:15 +0100 | [diff] [blame] | 166 | jsonpointer.Set(pluginManifest, "/embed/0/onClick/template", externalResources) |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 167 | |
Akron | 79d8348 | 2023-01-02 10:53:24 +0100 | [diff] [blame] | 168 | r.Use(func() gin.HandlerFunc { |
| 169 | return func(c *gin.Context) { |
| 170 | h := c.Writer.Header() |
| 171 | h.Set("Access-Control-Allow-Origin", "null") |
| 172 | h.Set("Access-Control-Allow-Credentials", "null") |
| 173 | h.Set("Vary", "Origin") |
| 174 | } |
| 175 | }(), |
| 176 | ) |
| 177 | |
Akron | 60e1160 | 2023-01-02 11:16:43 +0100 | [diff] [blame] | 178 | // Return widget page |
Akron | 6aee7fe | 2022-12-20 16:00:54 +0100 | [diff] [blame] | 179 | r.GET("/", func(c *gin.Context) { |
Akron | 5e1252e | 2022-12-19 17:57:56 +0100 | [diff] [blame] | 180 | c.HTML(http.StatusOK, "main.html", gin.H{ |
Akron | 83b77a7 | 2023-03-17 13:37:06 +0100 | [diff] [blame] | 181 | "korapServer": korapServer, |
Akron | f9caa3b | 2023-08-22 16:01:59 +0200 | [diff] [blame] | 182 | "title": ginI18n.MustGetMessage(c, "fulltext"), |
| 183 | "noAccess": ginI18n.MustGetMessage(c, "noAccess"), |
| 184 | "fromProvider": ginI18n.MustGetMessage(c, "fromProvider"), |
Akron | 5e1252e | 2022-12-19 17:57:56 +0100 | [diff] [blame] | 185 | }) |
| 186 | }) |
| 187 | |
Akron | de33179 | 2023-01-10 11:36:15 +0100 | [diff] [blame] | 188 | // Return resource information |
Akron | ce5186d | 2022-12-21 11:15:50 +0100 | [diff] [blame] | 189 | r.HEAD("/:corpus_id/:doc_id/:text_id", CheckSaleUrl) |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 190 | r.GET("/:corpus_id/:doc_id/:text_id", CheckSaleUrl) |
Akron | 60e1160 | 2023-01-02 11:16:43 +0100 | [diff] [blame] | 191 | |
| 192 | // Return plugin manifest |
Akron | 65503a3 | 2023-01-02 12:38:32 +0100 | [diff] [blame] | 193 | r.GET("/plugin.json", func(c *gin.Context) { |
| 194 | c.JSON(200, pluginManifest) |
| 195 | }) |
| 196 | |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 197 | return r |
| 198 | } |
| 199 | |
| 200 | func main() { |
Akron | 3c58d0d | 2023-01-02 13:35:41 +0100 | [diff] [blame] | 201 | if godotenv.Load() != nil { |
| 202 | log.Println(".env file not loaded.") |
| 203 | } |
Akron | ce286af | 2022-12-28 13:58:26 +0100 | [diff] [blame] | 204 | |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame] | 205 | InitDB("db") |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 206 | defer closeDB() |
| 207 | |
| 208 | // Index csv file |
| 209 | if len(os.Args) > 1 { |
| 210 | |
| 211 | file, err := os.Open(os.Args[1]) |
| 212 | if err != nil { |
| 213 | log.Fatal(err) |
| 214 | } |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 215 | |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame] | 216 | fileExt := filepath.Ext(os.Args[1]) |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 217 | |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame] | 218 | if fileExt == ".gz" || fileExt == ".csvz" { |
| 219 | var gzipr io.Reader |
| 220 | gzipr, err = gzip.NewReader(file) |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 221 | if err != nil { |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame] | 222 | log.Fatal("Unable to open gzip file") |
| 223 | } else { |
| 224 | err = IndexDB(gzipr) |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 225 | } |
Akron | a1f100a | 2023-03-07 11:03:01 +0100 | [diff] [blame] | 226 | } else { |
| 227 | err = IndexDB(file) |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 228 | } |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 229 | |
| 230 | if err != nil { |
| 231 | log.Fatal("Unable to commit") |
| 232 | } |
Akron | a99d90d | 2024-03-19 11:56:28 +0100 | [diff] [blame] | 233 | log.Println("indexation successfull") |
| 234 | return |
Akron | 3890fcc | 2022-12-20 15:26:36 +0100 | [diff] [blame] | 235 | } |
Akron | 83b77a7 | 2023-03-17 13:37:06 +0100 | [diff] [blame] | 236 | |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 237 | r := setupRouter() |
Akron | ce286af | 2022-12-28 13:58:26 +0100 | [diff] [blame] | 238 | |
Akron | de33179 | 2023-01-10 11:36:15 +0100 | [diff] [blame] | 239 | port := os.Getenv("KORAP_EXTERNAL_RESOURCES_PORT") |
Akron | ce286af | 2022-12-28 13:58:26 +0100 | [diff] [blame] | 240 | if port == "" { |
| 241 | port = "5722" |
| 242 | } |
| 243 | |
Akron | 80a3d27 | 2023-03-20 15:24:37 +0100 | [diff] [blame] | 244 | log.Println("Starting server on port " + port) |
| 245 | |
Akron | ce286af | 2022-12-28 13:58:26 +0100 | [diff] [blame] | 246 | log.Fatal(http.ListenAndServe(":"+port, r)) |
Akron | e216156 | 2022-12-19 17:05:39 +0100 | [diff] [blame] | 247 | } |