blob: 16ff59682120f1187c67bf158f6704c58f3bd5ef [file] [log] [blame]
Akronfc3bd272025-04-04 16:15:44 +02001package termmapper
2
3/*
4$( => PUNCT PunctType=Brck ``, '', *RRB*, *LRB*, -
5$, => PUNCT PunctType=Comm ,
6$. => PUNCT PunctType=Peri ., :, ?, ;, !
7ADJA => ADJ _ neuen, neue, deutschen, ersten, anderen
8ADJD => ADJ Variant=Short gut, rund, knapp, deutlich, möglich
9ADV => ADV _ auch, nur, noch, so, aber
10APPO => ADP AdpType=Post zufolge, nach, gegenüber, wegen, über
11APPR => ADP AdpType=Prep in, von, mit, für, auf
12APPRART => ADP AdpType=Prep|PronType=Art im, am, zum, zur, vom
13APZR => ADP AdpType=Circ an, hinaus, aus, her, heraus
14ART => DET PronType=Art der, die, den, des, das
15CARD => NUM NumType=Card 000, zwei, drei, vier, fünf
16FM => X Foreign=Yes New, of, de, Times, the
17ITJ => INTJ _ naja, Ach, äh, Na, piep
18KOKOM => CCONJ ConjType=Comp als, wie, denn, wir
19KON => CCONJ _ und, oder, sondern, sowie, aber
20KOUI => SCONJ _ um, ohne, statt, anstatt, Ums
21KOUS => SCONJ _ daß, wenn, weil, ob, als
22NE => PROPN _ SPD, Deutschland, USA, dpa, Bonn
23NN => NOUN _ Prozent, Mark, Millionen, November, Jahren
24PAV => ADV PronType=Dem
25PDAT => DET PronType=Dem dieser, diese, diesem, dieses, diesen
26PDS => PRON PronType=Dem das, dies, die, diese, der
27PIAT => DET PronType=Ind,Neg,Tot keine, mehr, alle, kein, beiden
28PIDAT => DET AdjType=Pdt|PronType=Ind,Neg,Tot
29PIS => PRON PronType=Ind,Neg,Tot man, allem, nichts, alles, mehr
30PPER => PRON PronType=Prs es, sie, er, wir, ich
31PPOSAT => DET Poss=Yes|PronType=Prs ihre, seine, seiner, ihrer, ihren
32PPOSS => PRON Poss=Yes|PronType=Prs ihren, Seinen, seinem, unsrigen, meiner
33PRELAT => DET PronType=Rel deren, dessen, die
34PRELS => PRON PronType=Rel die, der, das, dem, denen
35PRF => PRON PronType=Prs|Reflex=Yes sich, uns, mich, mir, dich
36PTKA => PART _ zu, am, allzu, Um
37PTKANT => PART PartType=Res nein, ja, bitte, Gewiß, Also
38PTKNEG => PART Polarity=Neg nicht
39PTKVZ => ADP PartType=Vbp an, aus, ab, vor, auf
40PTKZU => PART PartType=Inf zu, zur, zum
41PWAT => DET PronType=Int welche, welchen, welcher, wie, welchem
42PWAV => ADV PronType=Int wie, wo, warum, wobei, wonach
43PWS => PRON PronType=Int was, wer, wem, wen, welches
44TRUNC => X Hyph=Yes Staats-, Industrie-, Finanz-, Öl-, Lohn-
45VAFIN => AUX Mood=Ind|VerbForm=Fin ist, hat, wird, sind, sei
46VAIMP => AUX Mood=Imp|VerbForm=Fin Seid, werde, Sei
47VAINF => AUX VerbForm=Inf werden, sein, haben, worden, Dabeisein
48VAPP => AUX Aspect=Perf|VerbForm=Part worden, gewesen, geworden, gehabt, werden
49VMFIN => VERB Mood=Ind|VerbForm=Fin|VerbType=Mod kann, soll, will, muß, sollen
50VMINF => VERB VerbForm=Inf|VerbType=Mod können, müssen, wollen, dürfen, sollen
51VMPP => VERB Aspect=Perf|VerbForm=Part|VerbType=Mod gewollt
52VVFIN => VERB Mood=Ind|VerbForm=Fin sagte, gibt, geht, steht, kommt
53VVIMP => VERB Mood=Imp|VerbForm=Fin siehe, sprich, schauen, Sagen, gestehe
54VVINF => VERB VerbForm=Inf machen, lassen, bleiben, geben, bringen
55VVIZU => VERB VerbForm=Inf einzusetzen, durchzusetzen, aufzunehmen, abzubauen, umzusetzen
56VVPP => VERB Aspect=Perf|VerbForm=Part gemacht, getötet, gefordert, gegeben, gestellt
57XY => X _ dpa, ap, afp, rtr, wb
58*/
59
60import (
61 "strconv"
62 "strings"
63
64 "github.com/rs/zerolog/log"
65 "github.com/tidwall/gjson"
66 "github.com/tidwall/sjson"
67)
68
69/*
70import (
71 "encoding/json"
72 "fmt"
73 "log"
74 "strings"
75)
76
77var mapping = map[string]string{
78 "$(":"PUNCT",
79}
80
81// Recursive function to turn the UPos query into a STTS query
82func koralRewriteUpos2Stts(koralquery interface{}) interface{} {
83 switch v := koralquery.(type) {
84 case map[string]interface{}:
85 // Check for '@type' key and act accordingly
86 if typ, ok := v["@type"].(string); ok {
87 switch typ {
88 case "koral:term":
89
90 // Modify the key to use STTS
91// This may require to turn object into a koral:token with terms like:
92
93
94 if key, ok := v["key"].(string); ok {
95 v["key"] = "hallo-" + key
96 }
97 case "operation":
98 // Handle the 'operators' key by recursively modifying each operator
99 if operators, ok := v["operators"].([]interface{}); ok {
100 for i, operator := range operators {
101 operators[i] = modifyJSON(operator)
102 }
103 v["operators"] = operators
104 }
105 }
106 }
107 // Recursively modify any nested maps
108 for k, val := range v {
109 v[k] = modifyJSON(val)
110 }
111 return v
112 case []interface{}:
113 // Recursively modify elements of arrays
114 for i, item := range v {
115 v[i] = modifyJSON(item)
116 }
117 return v
118 }
119 return koralquery
120}
121
122func main() {
123 // Sample JSON input string
124 jsonStr := `{
125 "@type": "operation",
126 "operators": [
127 {
128 "@type": "term",
129 "key": "example1"
130 },
131 {
132 "@type": "term",
133 "key": "example2"
134 },
135 {
136 "@type": "operation",
137 "operators": [
138 {
139 "@type": "term",
140 "key": "nested"
141 }
142 ]
143 }
144 ]
145 }`
146
147 // Parse the JSON string into a generic interface{}
148 var data interface{}
149 err := json.Unmarshal([]byte(jsonStr), &data)
150 if err != nil {
151 log.Fatal("Error unmarshaling JSON:", err)
152 }
153
154 // Modify the JSON structure recursively
155 modifiedData := modifyJSON(data)
156
157 // Marshal the modified data back into a JSON string
158 modifiedJSON, err := json.MarshalIndent(modifiedData, "", " ")
159 if err != nil {
160 log.Fatal("Error marshaling JSON:", err)
161 }
162
163 // Output the modified JSON string
164 fmt.Println(string(modifiedJSON))
165}
166
167
168
169func turnupostostts(json string, targetFoundry string, targetLayer string) {
170 if targetLayer == "" {
171 targetLayer = "p"
172 }
173
174 ldType := "@type"
175
176 if ldType == "koral:span" {
177 next
178 }
179 if ldType == "koral:term" {
180 if foundry == if layer === key -> rewrite
181 }
182
183 // Iterate through the query and whenever a term is requested without a foundry, and without a layser or layer p,
184 // change the key following the mapping
185
186
187}
188
189func addupostooutput(json string, reffoundry string, foundry string) {
190 // https://universaldependencies.org/tagset-conversion/de-stts-uposf.html
191 // Iterate through all matches and add to all xml snippets a line of foundry
192
193}
194
195*/
196
Akron08aa2632025-04-07 17:38:28 +0200197type Term struct {
198 Foundry string
199 Layer string
200 Key string
201}
202
Akronfc3bd272025-04-04 16:15:44 +0200203func Map2(json []byte) string {
204 /*
205 result := gjson.GetBytes(json, "query")
206 var raw []byte
207 if result.Index > 0 {
208 raw = json[result.Index:result.Index+len(result.Raw)]
209 } else {
210 raw = []byte(result.Raw)
211 }
212
213 if result.IsObject() {
214 koralType := gjson.GetBytes(raw, "@type").String()
215 switch koralType {
216 case "koral:term":
217
218 }
219 }
220 */
221
222 koralObj := gjson.ParseBytes(json)
223
224 switch koralObj.Get("@type").String() {
225 case "koral:term":
226 {
227 if koralObj.Get("value").String() == "KOKOM" {
228 // TODO: Turn this in a token, if it isn't already!
229 newJson, _ := sjson.Set(string(json), "value", "CCONJ")
230 return newJson
231 }
232 }
233
234 case "koral:operation":
235 {
236
237 }
238
239 }
240 /*
241
242 var raw []byte
243 if result.Index > 0 {
244 raw = json[result.Index:result.Index+len(result.Raw)]
245 } else {
246 raw = []byte(result.Raw)
247 }
248 */
249 return "jj"
250}
251
252// token writes a token to the string builder
Akronf4614232025-04-08 11:17:53 +0200253func token(strBuilder *strings.Builder, terms []Term, positive bool) {
Akronfc3bd272025-04-04 16:15:44 +0200254 strBuilder.WriteString(`{"@type":"koral:token","wrap":`)
Akronf4614232025-04-08 11:17:53 +0200255 if len(terms) > 1 {
256 termGroup(strBuilder, terms, positive)
Akronfc3bd272025-04-04 16:15:44 +0200257 } else {
Akronf4614232025-04-08 11:17:53 +0200258 term(strBuilder, terms[0], positive)
Akronfc3bd272025-04-04 16:15:44 +0200259 }
260 strBuilder.WriteString(`}`)
261}
262
263// termGroup writes a termGroup to the string builder
Akronf4614232025-04-08 11:17:53 +0200264func termGroup(strBuilder *strings.Builder, terms []Term, positive bool) {
Akron08aa2632025-04-07 17:38:28 +0200265 strBuilder.WriteString(`{"@type":"koral:termGroup",`)
266
267 if positive {
268 strBuilder.WriteString(`"relation":"relation:and","operation":"operation:and",`)
269 } else {
270 strBuilder.WriteString(`"relation":"relation:or","operation":"operation:or",`)
271 }
272
273 strBuilder.WriteString(`"operands":[`)
Akronf4614232025-04-08 11:17:53 +0200274 for i, t := range terms {
275 term(strBuilder, t, positive)
Akron08aa2632025-04-07 17:38:28 +0200276 if i < len(terms)-1 {
277 strBuilder.WriteString(",")
278 }
279 }
280 strBuilder.WriteString(`]}`)
281}
282
Akronfc3bd272025-04-04 16:15:44 +0200283// term writes a term to the string builder
Akronf4614232025-04-08 11:17:53 +0200284func term(strBuilder *strings.Builder, term Term, match bool) {
Akronfc3bd272025-04-04 16:15:44 +0200285
Akron08aa2632025-04-07 17:38:28 +0200286 strBuilder.WriteString(`{"@type":"koral:term","match":"match:`)
287 if match {
288 strBuilder.WriteString("eq")
289 } else {
290 strBuilder.WriteString("ne")
291 }
292 strBuilder.WriteString(`","foundry":"`)
293 strBuilder.WriteString(term.Foundry)
294 strBuilder.WriteString(`","layer":"`)
295 strBuilder.WriteString(term.Layer)
296 strBuilder.WriteString(`","key":"`)
297 strBuilder.WriteString(term.Key)
298 strBuilder.WriteString(`"}`)
299}
300
Akronfc3bd272025-04-04 16:15:44 +0200301func flatten() {
302
303 // if a termGroup isan operand in a termGroup with the same relation/operation:
304 // flatten the termGroup into the parent termGroup
305
306 // if a termGroup has only a single term, remove the group
307}
308
Akron08aa2632025-04-07 17:38:28 +0200309func replaceWrappedTerms(jsonString string, terms []Term) string {
Akronfc3bd272025-04-04 16:15:44 +0200310 var err error
Akron08aa2632025-04-07 17:38:28 +0200311
312 if len(terms) == 1 {
313 jsonString, err = sjson.Set(jsonString, "foundry", terms[0].Foundry)
314 if err != nil {
315 log.Error().Err(err).Msg("Error setting foundry")
316 }
317 jsonString, err = sjson.Set(jsonString, "layer", terms[0].Layer)
318 if err != nil {
319 log.Error().Err(err).Msg("Error setting layer")
320 }
321 jsonString, err = sjson.Set(jsonString, "key", terms[0].Key)
322 if err != nil {
323 log.Error().Err(err).Msg("Error setting key")
324 }
325
326 return jsonString
Akronfc3bd272025-04-04 16:15:44 +0200327 }
Akron08aa2632025-04-07 17:38:28 +0200328
329 matchop := gjson.Get(jsonString, "match").String()
330
331 /*
332 foundry := gjson.Get(jsonString, "foundry").String()
333 layer := gjson.Get(jsonString, "layer").String()
334 key := gjson.Get(jsonString, "key").String()
335 term := Term{foundry, layer, key}
336
337
338 terms = append(terms, term)
339 */
340
341 var strBuilder strings.Builder
342 if matchop == "match:ne" {
Akronf4614232025-04-08 11:17:53 +0200343 termGroup(&strBuilder, terms, false)
Akron08aa2632025-04-07 17:38:28 +0200344 } else {
Akronf4614232025-04-08 11:17:53 +0200345 termGroup(&strBuilder, terms, true)
Akronfc3bd272025-04-04 16:15:44 +0200346 }
Akron08aa2632025-04-07 17:38:28 +0200347
348 return strBuilder.String()
349
Akronfc3bd272025-04-04 16:15:44 +0200350}
351
352func replaceGroupedTerm(jsonString string, op []int, foundry string, layer string, key string) string {
353 var err error
354
355 strInt := "operands." + strconv.Itoa(op[0]) + "."
356 jsonString, err = sjson.Set(jsonString, strInt+"foundry", foundry)
357 if err != nil {
358 log.Error().Err(err).Msg("Error setting foundry")
359 }
360 jsonString, err = sjson.Set(jsonString, strInt+"layer", layer)
361 if err != nil {
362 log.Error().Err(err).Msg("Error setting layer")
363 }
364 jsonString, err = sjson.Set(jsonString, strInt+"key", key)
365 if err != nil {
366 log.Error().Err(err).Msg("Error setting key")
367 }
368
369 if len(op) > 1 {
370 for i := 1; i < len(op); i++ {
371 jsonString, err = sjson.Delete(jsonString, "operands."+strconv.Itoa(op[i]))
372 if err != nil {
373 log.Error().Err(err).Msg("Error deleting operand")
374 }
375 }
376 }
377
378 return jsonString
379}
380
381/*
382func replaceTermWithToken(jsonString string) string {
383 // Replace the term with the token
384 replacedString, err := sjson.Set(jsonString, "wrap.operands.0", token())
385 if err != nil {
386 return jsonString // Return the original string in case of an error
387 }
388 return replacedString
389
390// case1: 1 -> 1 the term is an operand in a termGroup with the same relation/operation
391// case2: 1 -> 1 the term is wrapped
392// case3: 1 -> 1 the term is an operand in a termGroup with a different relation/operation
393// case4: n -> 1 the term is an operand in a termGroup with the same relation/operation
394// case5: n -> 1 the term is wrapped
395// case6: n -> 1 the term is an operand in a termGroup with a different relation/operation
396// case7: 1 -> n the term is an operand in a termGroup with the same relation/operation
397// case8: 1 -> n the term is wrapped
398// case9: 1 -> n the term is an operand in a termGroup with a different relation/operation
399 }
400*/
401
402func Map(jsonStr string) string {
403
404 obj := gjson.Get(jsonStr, "query")
405
406 // value := gjson.Get(json, "name.last")
407
408 /*
409
410 // Modify the JSON structure recursively
411 modifiedData := modifyJSON(ast.NewAny(data))
412
413 // Marshal the modified data back into a JSON string
414 modifiedJSON, err := sonic.MarshalString(modifiedData)
415
416 // Parse the JSON string into a generic interface{}
417 var data interface{}
418
419 err := sonic.UnmarshalString(jsonStr, data)
420
421 if err != nil {
422 log.Fatal("Error unmarshaling JSON:", err)
423 return ""
424 }
425
426
427
428 if err != nil {
429 log.Fatal("Error marshaling JSON:", err)
430 }
431 */
432 // Output the modified JSON string
433 return obj.String() //modifyJSON(obj)
434}
435
436// Recursive function to modify JSON using Sonic library
437//func modifyJSON(data gjson.Result) string {
438
439// Check if data is a map
440// if data.IsObject() {
441/*
442 dataMap := data.Map()
443
444 koralType := dataMap["@type"].String()
445
446 // Look for @type key
447
448 switch koralType {
449 case "koral:term":
450 // Modify the key by adding 'hallo-' prefix
451
452 // sjson.SetRaw(data.String())
453 sjson.Set(data.Path(data.Bytes()), "key", "hallo-"+dataMap["key"].String())
454
455 dataMap["key"] = "hallo-" + dataMap["key"].String()
456 /*
457 if key, found := data.GetString("key"); found {
458 data.Set("key", "hallo-"+key)
459 }
460*/
461/*
462 case "koral:operation":
463 // Handle the 'operators' key by recursively modifying each operator
464 if operators, found := data.GetArray("operators"); found {
465 for i := range operators {
466 operators[i] = modifyJSON(operators[i])
467 }
468 data.Set("operators", operators)
469 }
470 }*/
471/*
472 // Recursively modify any nested objects
473 data.ForEach(func(k string, v sonic.Any) {
474 data.Set(k, modifyJSON(v))
475 })
476*/
477//}
478// Handle arrays by modifying elements recursively
479/*
480 if data.IsArray() {
481 for i := range data.GetArray() {
482 data.Set(i, modifyJSON(data.GetArray()[i]))
483 }
484 }
485*/
486/*
487 return data
488}
489*/