blob: 791c1125a81c7f2f75d7ff3faab3e2f785c5b7ab [file] [log] [blame]
Akronfc3bd272025-04-04 16:15:44 +02001package termmapper
2
3/*
4$( => PUNCT PunctType=Brck ``, '', *RRB*, *LRB*, -
5$, => PUNCT PunctType=Comm ,
6$. => PUNCT PunctType=Peri ., :, ?, ;, !
7ADJA => ADJ _ neuen, neue, deutschen, ersten, anderen
8ADJD => ADJ Variant=Short gut, rund, knapp, deutlich, möglich
9ADV => ADV _ auch, nur, noch, so, aber
10APPO => ADP AdpType=Post zufolge, nach, gegenüber, wegen, über
11APPR => ADP AdpType=Prep in, von, mit, für, auf
12APPRART => ADP AdpType=Prep|PronType=Art im, am, zum, zur, vom
13APZR => ADP AdpType=Circ an, hinaus, aus, her, heraus
14ART => DET PronType=Art der, die, den, des, das
15CARD => NUM NumType=Card 000, zwei, drei, vier, fünf
16FM => X Foreign=Yes New, of, de, Times, the
17ITJ => INTJ _ naja, Ach, äh, Na, piep
18KOKOM => CCONJ ConjType=Comp als, wie, denn, wir
19KON => CCONJ _ und, oder, sondern, sowie, aber
20KOUI => SCONJ _ um, ohne, statt, anstatt, Ums
21KOUS => SCONJ _ daß, wenn, weil, ob, als
22NE => PROPN _ SPD, Deutschland, USA, dpa, Bonn
23NN => NOUN _ Prozent, Mark, Millionen, November, Jahren
24PAV => ADV PronType=Dem
25PDAT => DET PronType=Dem dieser, diese, diesem, dieses, diesen
26PDS => PRON PronType=Dem das, dies, die, diese, der
27PIAT => DET PronType=Ind,Neg,Tot keine, mehr, alle, kein, beiden
28PIDAT => DET AdjType=Pdt|PronType=Ind,Neg,Tot
29PIS => PRON PronType=Ind,Neg,Tot man, allem, nichts, alles, mehr
Akron8251bae2025-05-16 14:13:50 +020030
31
32PIS => PRON & PronType=[Ind|Neg|Tot]
33
Akronfc3bd272025-04-04 16:15:44 +020034PPER => PRON PronType=Prs es, sie, er, wir, ich
35PPOSAT => DET Poss=Yes|PronType=Prs ihre, seine, seiner, ihrer, ihren
36PPOSS => PRON Poss=Yes|PronType=Prs ihren, Seinen, seinem, unsrigen, meiner
37PRELAT => DET PronType=Rel deren, dessen, die
38PRELS => PRON PronType=Rel die, der, das, dem, denen
39PRF => PRON PronType=Prs|Reflex=Yes sich, uns, mich, mir, dich
40PTKA => PART _ zu, am, allzu, Um
41PTKANT => PART PartType=Res nein, ja, bitte, Gewiß, Also
42PTKNEG => PART Polarity=Neg nicht
43PTKVZ => ADP PartType=Vbp an, aus, ab, vor, auf
44PTKZU => PART PartType=Inf zu, zur, zum
45PWAT => DET PronType=Int welche, welchen, welcher, wie, welchem
46PWAV => ADV PronType=Int wie, wo, warum, wobei, wonach
47PWS => PRON PronType=Int was, wer, wem, wen, welches
48TRUNC => X Hyph=Yes Staats-, Industrie-, Finanz-, Öl-, Lohn-
49VAFIN => AUX Mood=Ind|VerbForm=Fin ist, hat, wird, sind, sei
50VAIMP => AUX Mood=Imp|VerbForm=Fin Seid, werde, Sei
51VAINF => AUX VerbForm=Inf werden, sein, haben, worden, Dabeisein
52VAPP => AUX Aspect=Perf|VerbForm=Part worden, gewesen, geworden, gehabt, werden
53VMFIN => VERB Mood=Ind|VerbForm=Fin|VerbType=Mod kann, soll, will, muß, sollen
54VMINF => VERB VerbForm=Inf|VerbType=Mod können, müssen, wollen, dürfen, sollen
55VMPP => VERB Aspect=Perf|VerbForm=Part|VerbType=Mod gewollt
56VVFIN => VERB Mood=Ind|VerbForm=Fin sagte, gibt, geht, steht, kommt
57VVIMP => VERB Mood=Imp|VerbForm=Fin siehe, sprich, schauen, Sagen, gestehe
58VVINF => VERB VerbForm=Inf machen, lassen, bleiben, geben, bringen
59VVIZU => VERB VerbForm=Inf einzusetzen, durchzusetzen, aufzunehmen, abzubauen, umzusetzen
60VVPP => VERB Aspect=Perf|VerbForm=Part gemacht, getötet, gefordert, gegeben, gestellt
61XY => X _ dpa, ap, afp, rtr, wb
62*/
63
64import (
65 "strconv"
66 "strings"
67
68 "github.com/rs/zerolog/log"
69 "github.com/tidwall/gjson"
70 "github.com/tidwall/sjson"
71)
72
73/*
74import (
75 "encoding/json"
76 "fmt"
77 "log"
78 "strings"
79)
80
81var mapping = map[string]string{
82 "$(":"PUNCT",
83}
84
85// Recursive function to turn the UPos query into a STTS query
86func koralRewriteUpos2Stts(koralquery interface{}) interface{} {
87 switch v := koralquery.(type) {
88 case map[string]interface{}:
89 // Check for '@type' key and act accordingly
90 if typ, ok := v["@type"].(string); ok {
91 switch typ {
92 case "koral:term":
93
94 // Modify the key to use STTS
95// This may require to turn object into a koral:token with terms like:
96
97
98 if key, ok := v["key"].(string); ok {
99 v["key"] = "hallo-" + key
100 }
101 case "operation":
102 // Handle the 'operators' key by recursively modifying each operator
103 if operators, ok := v["operators"].([]interface{}); ok {
104 for i, operator := range operators {
105 operators[i] = modifyJSON(operator)
106 }
107 v["operators"] = operators
108 }
109 }
110 }
111 // Recursively modify any nested maps
112 for k, val := range v {
113 v[k] = modifyJSON(val)
114 }
115 return v
116 case []interface{}:
117 // Recursively modify elements of arrays
118 for i, item := range v {
119 v[i] = modifyJSON(item)
120 }
121 return v
122 }
123 return koralquery
124}
125
126func main() {
127 // Sample JSON input string
128 jsonStr := `{
129 "@type": "operation",
130 "operators": [
131 {
132 "@type": "term",
133 "key": "example1"
134 },
135 {
136 "@type": "term",
137 "key": "example2"
138 },
139 {
140 "@type": "operation",
141 "operators": [
142 {
143 "@type": "term",
144 "key": "nested"
145 }
146 ]
147 }
148 ]
149 }`
150
151 // Parse the JSON string into a generic interface{}
152 var data interface{}
153 err := json.Unmarshal([]byte(jsonStr), &data)
154 if err != nil {
155 log.Fatal("Error unmarshaling JSON:", err)
156 }
157
158 // Modify the JSON structure recursively
159 modifiedData := modifyJSON(data)
160
161 // Marshal the modified data back into a JSON string
162 modifiedJSON, err := json.MarshalIndent(modifiedData, "", " ")
163 if err != nil {
164 log.Fatal("Error marshaling JSON:", err)
165 }
166
167 // Output the modified JSON string
168 fmt.Println(string(modifiedJSON))
169}
170
171
172
173func turnupostostts(json string, targetFoundry string, targetLayer string) {
174 if targetLayer == "" {
175 targetLayer = "p"
176 }
177
178 ldType := "@type"
179
180 if ldType == "koral:span" {
181 next
182 }
183 if ldType == "koral:term" {
184 if foundry == if layer === key -> rewrite
185 }
186
187 // Iterate through the query and whenever a term is requested without a foundry, and without a layser or layer p,
188 // change the key following the mapping
189
190
191}
192
193func addupostooutput(json string, reffoundry string, foundry string) {
194 // https://universaldependencies.org/tagset-conversion/de-stts-uposf.html
195 // Iterate through all matches and add to all xml snippets a line of foundry
196
197}
198
199*/
200
Akron08aa2632025-04-07 17:38:28 +0200201type Term struct {
202 Foundry string
203 Layer string
204 Key string
205}
206
Akronfc3bd272025-04-04 16:15:44 +0200207func Map2(json []byte) string {
208 /*
209 result := gjson.GetBytes(json, "query")
210 var raw []byte
211 if result.Index > 0 {
212 raw = json[result.Index:result.Index+len(result.Raw)]
213 } else {
214 raw = []byte(result.Raw)
215 }
216
217 if result.IsObject() {
218 koralType := gjson.GetBytes(raw, "@type").String()
219 switch koralType {
220 case "koral:term":
221
222 }
223 }
224 */
225
226 koralObj := gjson.ParseBytes(json)
227
228 switch koralObj.Get("@type").String() {
229 case "koral:term":
230 {
231 if koralObj.Get("value").String() == "KOKOM" {
232 // TODO: Turn this in a token, if it isn't already!
233 newJson, _ := sjson.Set(string(json), "value", "CCONJ")
234 return newJson
235 }
236 }
237
238 case "koral:operation":
239 {
240
241 }
242
243 }
244 /*
245
246 var raw []byte
247 if result.Index > 0 {
248 raw = json[result.Index:result.Index+len(result.Raw)]
249 } else {
250 raw = []byte(result.Raw)
251 }
252 */
253 return "jj"
254}
255
256// token writes a token to the string builder
Akronf4614232025-04-08 11:17:53 +0200257func token(strBuilder *strings.Builder, terms []Term, positive bool) {
Akronfc3bd272025-04-04 16:15:44 +0200258 strBuilder.WriteString(`{"@type":"koral:token","wrap":`)
Akronf4614232025-04-08 11:17:53 +0200259 if len(terms) > 1 {
Akron8251bae2025-05-16 14:13:50 +0200260 termGroup(strBuilder, terms, true, positive)
Akronfc3bd272025-04-04 16:15:44 +0200261 } else {
Akronf4614232025-04-08 11:17:53 +0200262 term(strBuilder, terms[0], positive)
Akronfc3bd272025-04-04 16:15:44 +0200263 }
264 strBuilder.WriteString(`}`)
265}
266
267// termGroup writes a termGroup to the string builder
Akron8251bae2025-05-16 14:13:50 +0200268func termGroup(strBuilder *strings.Builder, terms []Term, operationAnd bool, positive bool) {
Akron08aa2632025-04-07 17:38:28 +0200269 strBuilder.WriteString(`{"@type":"koral:termGroup",`)
270
Akron8251bae2025-05-16 14:13:50 +0200271 if operationAnd {
Akron08aa2632025-04-07 17:38:28 +0200272 strBuilder.WriteString(`"relation":"relation:and","operation":"operation:and",`)
273 } else {
274 strBuilder.WriteString(`"relation":"relation:or","operation":"operation:or",`)
275 }
276
277 strBuilder.WriteString(`"operands":[`)
Akronf4614232025-04-08 11:17:53 +0200278 for i, t := range terms {
279 term(strBuilder, t, positive)
Akron08aa2632025-04-07 17:38:28 +0200280 if i < len(terms)-1 {
281 strBuilder.WriteString(",")
282 }
283 }
284 strBuilder.WriteString(`]}`)
285}
286
Akronfc3bd272025-04-04 16:15:44 +0200287// term writes a term to the string builder
Akron8251bae2025-05-16 14:13:50 +0200288func term(strBuilder *strings.Builder, term Term, positive bool) {
Akron08aa2632025-04-07 17:38:28 +0200289 strBuilder.WriteString(`{"@type":"koral:term","match":"match:`)
Akron8251bae2025-05-16 14:13:50 +0200290 if positive {
Akron08aa2632025-04-07 17:38:28 +0200291 strBuilder.WriteString("eq")
292 } else {
293 strBuilder.WriteString("ne")
294 }
295 strBuilder.WriteString(`","foundry":"`)
296 strBuilder.WriteString(term.Foundry)
297 strBuilder.WriteString(`","layer":"`)
298 strBuilder.WriteString(term.Layer)
299 strBuilder.WriteString(`","key":"`)
300 strBuilder.WriteString(term.Key)
301 strBuilder.WriteString(`"}`)
302}
303
Akronfc3bd272025-04-04 16:15:44 +0200304func flatten() {
305
306 // if a termGroup isan operand in a termGroup with the same relation/operation:
307 // flatten the termGroup into the parent termGroup
308
309 // if a termGroup has only a single term, remove the group
310}
311
Akron8251bae2025-05-16 14:13:50 +0200312// replaceWrappedTerm replaces the wrapped term with the new term group
Akron08aa2632025-04-07 17:38:28 +0200313func replaceWrappedTerms(jsonString string, terms []Term) string {
Akronfc3bd272025-04-04 16:15:44 +0200314 var err error
Akron08aa2632025-04-07 17:38:28 +0200315
Akron8251bae2025-05-16 14:13:50 +0200316 // Replace with a single term
Akron08aa2632025-04-07 17:38:28 +0200317 if len(terms) == 1 {
318 jsonString, err = sjson.Set(jsonString, "foundry", terms[0].Foundry)
319 if err != nil {
320 log.Error().Err(err).Msg("Error setting foundry")
321 }
322 jsonString, err = sjson.Set(jsonString, "layer", terms[0].Layer)
323 if err != nil {
324 log.Error().Err(err).Msg("Error setting layer")
325 }
326 jsonString, err = sjson.Set(jsonString, "key", terms[0].Key)
327 if err != nil {
328 log.Error().Err(err).Msg("Error setting key")
329 }
330
331 return jsonString
Akronfc3bd272025-04-04 16:15:44 +0200332 }
Akron08aa2632025-04-07 17:38:28 +0200333
334 matchop := gjson.Get(jsonString, "match").String()
335
336 /*
337 foundry := gjson.Get(jsonString, "foundry").String()
338 layer := gjson.Get(jsonString, "layer").String()
339 key := gjson.Get(jsonString, "key").String()
340 term := Term{foundry, layer, key}
341
342
343 terms = append(terms, term)
344 */
345
346 var strBuilder strings.Builder
347 if matchop == "match:ne" {
Akron8251bae2025-05-16 14:13:50 +0200348 // ! Make or-Group with nes
349 termGroup(&strBuilder, terms, false, false)
Akron08aa2632025-04-07 17:38:28 +0200350 } else {
Akron8251bae2025-05-16 14:13:50 +0200351 termGroup(&strBuilder, terms, true, true)
Akronfc3bd272025-04-04 16:15:44 +0200352 }
Akron08aa2632025-04-07 17:38:28 +0200353
354 return strBuilder.String()
355
Akronfc3bd272025-04-04 16:15:44 +0200356}
357
358func replaceGroupedTerm(jsonString string, op []int, foundry string, layer string, key string) string {
359 var err error
360
361 strInt := "operands." + strconv.Itoa(op[0]) + "."
362 jsonString, err = sjson.Set(jsonString, strInt+"foundry", foundry)
363 if err != nil {
364 log.Error().Err(err).Msg("Error setting foundry")
365 }
366 jsonString, err = sjson.Set(jsonString, strInt+"layer", layer)
367 if err != nil {
368 log.Error().Err(err).Msg("Error setting layer")
369 }
370 jsonString, err = sjson.Set(jsonString, strInt+"key", key)
371 if err != nil {
372 log.Error().Err(err).Msg("Error setting key")
373 }
374
375 if len(op) > 1 {
376 for i := 1; i < len(op); i++ {
377 jsonString, err = sjson.Delete(jsonString, "operands."+strconv.Itoa(op[i]))
378 if err != nil {
379 log.Error().Err(err).Msg("Error deleting operand")
380 }
381 }
382 }
383
384 return jsonString
385}
386
Akron8251bae2025-05-16 14:13:50 +0200387func replaceGroupedTerms(jsonString string, op []int, terms []Term) string {
388 var err error
389
390 positive := true
391 operationAnd := true
392
393 operation := gjson.Get(jsonString, "operation")
394 if operation.String() == "operation:or" {
395 operationAnd = false
396 }
397
398 // TODO:
399 // matchop := gjson.Get(jsonString, strInt).String()
400
401 if len(op) == 1 {
402 strInt := "operands." + strconv.Itoa(op[0]) + ".match"
403
404 matchop := gjson.Get(jsonString, strInt).String()
405
406 if matchop == "match:ne" {
407 positive = false
408 }
409
410 // Delete the first term
411 jsonString, err = sjson.Delete(jsonString, strInt)
412
413 if err != nil {
414 log.Error().Err(err).Msg("Error deleting match")
415 }
416 }
417
418 for i := 0; i < len(op); i++ {
419 jsonString, err = sjson.Delete(jsonString, "operands."+strconv.Itoa(op[i]))
420
421 if err != nil {
422 log.Error().Err(err).Msg("Error deleting operand")
423 }
424 }
425
426 // TODO:
427 // Check if the group has only a single operand!
428
429 // TODO:
430 // All terms in the group require the same match!
431 // It's not possible to deal with !a & b
432 /*
433 jsonString, err = sjson.Set(jsonString, strInt+"foundry", foundry)
434 if err != nil {
435 log.Error().Err(err).Msg("Error setting foundry")
436 }
437 jsonString, err = sjson.Set(jsonString, strInt+"layer", layer)
438 if err != nil {
439 log.Error().Err(err).Msg("Error setting layer")
440 }
441 jsonString, err = sjson.Set(jsonString, strInt+"key", key)
442 if err != nil {
443 log.Error().Err(err).Msg("Error setting key")
444 }
445
446 if len(op) > 1 {
447 for i := 1; i < len(op); i++ {
448 jsonString, err = sjson.Delete(jsonString, "operands."+strconv.Itoa(op[i]))
449 if err != nil {
450 log.Error().Err(err).Msg("Error deleting operand")
451 }
452 }
453 }
454 */
455
456 var strBuilder strings.Builder
457 // Embed a new termGroup
458 if !operationAnd {
459 termGroup(&strBuilder, terms, true, false)
460 jsonString, err = sjson.SetRaw(jsonString, "operands.-1", strBuilder.String())
461 if err != nil {
462 log.Error().Err(err).Msg("Error adding termGroup")
463 }
464 strBuilder.Reset()
465 } else if !positive {
466 termGroup(&strBuilder, terms, false, false)
467 jsonString, err = sjson.SetRaw(jsonString, "operands.-1", strBuilder.String())
468 if err != nil {
469 log.Error().Err(err).Msg("Error adding termGroup")
470 }
471 strBuilder.Reset()
472 } else {
473 for i := 0; i < len(terms); i++ {
474 term(&strBuilder, terms[i], positive)
475 jsonString, err = sjson.SetRaw(jsonString, "operands.-1", strBuilder.String())
476
477 if err != nil {
478 log.Error().Err(err).Msg("Error adding term")
479 }
480 strBuilder.Reset()
481 }
482 }
483
484 return jsonString
485}
486
Akronfc3bd272025-04-04 16:15:44 +0200487/*
488func replaceTermWithToken(jsonString string) string {
489 // Replace the term with the token
490 replacedString, err := sjson.Set(jsonString, "wrap.operands.0", token())
491 if err != nil {
492 return jsonString // Return the original string in case of an error
493 }
494 return replacedString
495
496// case1: 1 -> 1 the term is an operand in a termGroup with the same relation/operation
497// case2: 1 -> 1 the term is wrapped
498// case3: 1 -> 1 the term is an operand in a termGroup with a different relation/operation
499// case4: n -> 1 the term is an operand in a termGroup with the same relation/operation
500// case5: n -> 1 the term is wrapped
501// case6: n -> 1 the term is an operand in a termGroup with a different relation/operation
502// case7: 1 -> n the term is an operand in a termGroup with the same relation/operation
503// case8: 1 -> n the term is wrapped
504// case9: 1 -> n the term is an operand in a termGroup with a different relation/operation
505 }
506*/
507
508func Map(jsonStr string) string {
509
510 obj := gjson.Get(jsonStr, "query")
511
512 // value := gjson.Get(json, "name.last")
513
514 /*
515
516 // Modify the JSON structure recursively
517 modifiedData := modifyJSON(ast.NewAny(data))
518
519 // Marshal the modified data back into a JSON string
520 modifiedJSON, err := sonic.MarshalString(modifiedData)
521
522 // Parse the JSON string into a generic interface{}
523 var data interface{}
524
525 err := sonic.UnmarshalString(jsonStr, data)
526
527 if err != nil {
528 log.Fatal("Error unmarshaling JSON:", err)
529 return ""
530 }
531
532
533
534 if err != nil {
535 log.Fatal("Error marshaling JSON:", err)
536 }
537 */
538 // Output the modified JSON string
539 return obj.String() //modifyJSON(obj)
540}
541
542// Recursive function to modify JSON using Sonic library
543//func modifyJSON(data gjson.Result) string {
544
545// Check if data is a map
546// if data.IsObject() {
547/*
548 dataMap := data.Map()
549
550 koralType := dataMap["@type"].String()
551
552 // Look for @type key
553
554 switch koralType {
555 case "koral:term":
556 // Modify the key by adding 'hallo-' prefix
557
558 // sjson.SetRaw(data.String())
559 sjson.Set(data.Path(data.Bytes()), "key", "hallo-"+dataMap["key"].String())
560
561 dataMap["key"] = "hallo-" + dataMap["key"].String()
562 /*
563 if key, found := data.GetString("key"); found {
564 data.Set("key", "hallo-"+key)
565 }
566*/
567/*
568 case "koral:operation":
569 // Handle the 'operators' key by recursively modifying each operator
570 if operators, found := data.GetArray("operators"); found {
571 for i := range operators {
572 operators[i] = modifyJSON(operators[i])
573 }
574 data.Set("operators", operators)
575 }
576 }*/
577/*
578 // Recursively modify any nested objects
579 data.ForEach(func(k string, v sonic.Any) {
580 data.Set(k, modifyJSON(v))
581 })
582*/
583//}
584// Handle arrays by modifying elements recursively
585/*
586 if data.IsArray() {
587 for i := range data.GetArray() {
588 data.Set(i, modifyJSON(data.GetArray()[i]))
589 }
590 }
591*/
592/*
593 return data
594}
595*/