blob: 5ddb6b80f0fff08e000171030481621a9e9b252a [file] [log] [blame]
Akronfc3bd272025-04-04 16:15:44 +02001package termmapper
2
3/*
4$( => PUNCT PunctType=Brck ``, '', *RRB*, *LRB*, -
5$, => PUNCT PunctType=Comm ,
6$. => PUNCT PunctType=Peri ., :, ?, ;, !
7ADJA => ADJ _ neuen, neue, deutschen, ersten, anderen
8ADJD => ADJ Variant=Short gut, rund, knapp, deutlich, möglich
9ADV => ADV _ auch, nur, noch, so, aber
10APPO => ADP AdpType=Post zufolge, nach, gegenüber, wegen, über
11APPR => ADP AdpType=Prep in, von, mit, für, auf
12APPRART => ADP AdpType=Prep|PronType=Art im, am, zum, zur, vom
13APZR => ADP AdpType=Circ an, hinaus, aus, her, heraus
14ART => DET PronType=Art der, die, den, des, das
15CARD => NUM NumType=Card 000, zwei, drei, vier, fünf
16FM => X Foreign=Yes New, of, de, Times, the
17ITJ => INTJ _ naja, Ach, äh, Na, piep
18KOKOM => CCONJ ConjType=Comp als, wie, denn, wir
19KON => CCONJ _ und, oder, sondern, sowie, aber
20KOUI => SCONJ _ um, ohne, statt, anstatt, Ums
21KOUS => SCONJ _ daß, wenn, weil, ob, als
22NE => PROPN _ SPD, Deutschland, USA, dpa, Bonn
23NN => NOUN _ Prozent, Mark, Millionen, November, Jahren
24PAV => ADV PronType=Dem
25PDAT => DET PronType=Dem dieser, diese, diesem, dieses, diesen
26PDS => PRON PronType=Dem das, dies, die, diese, der
27PIAT => DET PronType=Ind,Neg,Tot keine, mehr, alle, kein, beiden
28PIDAT => DET AdjType=Pdt|PronType=Ind,Neg,Tot
29PIS => PRON PronType=Ind,Neg,Tot man, allem, nichts, alles, mehr
30PPER => PRON PronType=Prs es, sie, er, wir, ich
31PPOSAT => DET Poss=Yes|PronType=Prs ihre, seine, seiner, ihrer, ihren
32PPOSS => PRON Poss=Yes|PronType=Prs ihren, Seinen, seinem, unsrigen, meiner
33PRELAT => DET PronType=Rel deren, dessen, die
34PRELS => PRON PronType=Rel die, der, das, dem, denen
35PRF => PRON PronType=Prs|Reflex=Yes sich, uns, mich, mir, dich
36PTKA => PART _ zu, am, allzu, Um
37PTKANT => PART PartType=Res nein, ja, bitte, Gewiß, Also
38PTKNEG => PART Polarity=Neg nicht
39PTKVZ => ADP PartType=Vbp an, aus, ab, vor, auf
40PTKZU => PART PartType=Inf zu, zur, zum
41PWAT => DET PronType=Int welche, welchen, welcher, wie, welchem
42PWAV => ADV PronType=Int wie, wo, warum, wobei, wonach
43PWS => PRON PronType=Int was, wer, wem, wen, welches
44TRUNC => X Hyph=Yes Staats-, Industrie-, Finanz-, Öl-, Lohn-
45VAFIN => AUX Mood=Ind|VerbForm=Fin ist, hat, wird, sind, sei
46VAIMP => AUX Mood=Imp|VerbForm=Fin Seid, werde, Sei
47VAINF => AUX VerbForm=Inf werden, sein, haben, worden, Dabeisein
48VAPP => AUX Aspect=Perf|VerbForm=Part worden, gewesen, geworden, gehabt, werden
49VMFIN => VERB Mood=Ind|VerbForm=Fin|VerbType=Mod kann, soll, will, muß, sollen
50VMINF => VERB VerbForm=Inf|VerbType=Mod können, müssen, wollen, dürfen, sollen
51VMPP => VERB Aspect=Perf|VerbForm=Part|VerbType=Mod gewollt
52VVFIN => VERB Mood=Ind|VerbForm=Fin sagte, gibt, geht, steht, kommt
53VVIMP => VERB Mood=Imp|VerbForm=Fin siehe, sprich, schauen, Sagen, gestehe
54VVINF => VERB VerbForm=Inf machen, lassen, bleiben, geben, bringen
55VVIZU => VERB VerbForm=Inf einzusetzen, durchzusetzen, aufzunehmen, abzubauen, umzusetzen
56VVPP => VERB Aspect=Perf|VerbForm=Part gemacht, getötet, gefordert, gegeben, gestellt
57XY => X _ dpa, ap, afp, rtr, wb
58*/
59
60import (
61 "strconv"
62 "strings"
63
64 "github.com/rs/zerolog/log"
65 "github.com/tidwall/gjson"
66 "github.com/tidwall/sjson"
67)
68
69/*
70import (
71 "encoding/json"
72 "fmt"
73 "log"
74 "strings"
75)
76
77var mapping = map[string]string{
78 "$(":"PUNCT",
79}
80
81// Recursive function to turn the UPos query into a STTS query
82func koralRewriteUpos2Stts(koralquery interface{}) interface{} {
83 switch v := koralquery.(type) {
84 case map[string]interface{}:
85 // Check for '@type' key and act accordingly
86 if typ, ok := v["@type"].(string); ok {
87 switch typ {
88 case "koral:term":
89
90 // Modify the key to use STTS
91// This may require to turn object into a koral:token with terms like:
92
93
94 if key, ok := v["key"].(string); ok {
95 v["key"] = "hallo-" + key
96 }
97 case "operation":
98 // Handle the 'operators' key by recursively modifying each operator
99 if operators, ok := v["operators"].([]interface{}); ok {
100 for i, operator := range operators {
101 operators[i] = modifyJSON(operator)
102 }
103 v["operators"] = operators
104 }
105 }
106 }
107 // Recursively modify any nested maps
108 for k, val := range v {
109 v[k] = modifyJSON(val)
110 }
111 return v
112 case []interface{}:
113 // Recursively modify elements of arrays
114 for i, item := range v {
115 v[i] = modifyJSON(item)
116 }
117 return v
118 }
119 return koralquery
120}
121
122func main() {
123 // Sample JSON input string
124 jsonStr := `{
125 "@type": "operation",
126 "operators": [
127 {
128 "@type": "term",
129 "key": "example1"
130 },
131 {
132 "@type": "term",
133 "key": "example2"
134 },
135 {
136 "@type": "operation",
137 "operators": [
138 {
139 "@type": "term",
140 "key": "nested"
141 }
142 ]
143 }
144 ]
145 }`
146
147 // Parse the JSON string into a generic interface{}
148 var data interface{}
149 err := json.Unmarshal([]byte(jsonStr), &data)
150 if err != nil {
151 log.Fatal("Error unmarshaling JSON:", err)
152 }
153
154 // Modify the JSON structure recursively
155 modifiedData := modifyJSON(data)
156
157 // Marshal the modified data back into a JSON string
158 modifiedJSON, err := json.MarshalIndent(modifiedData, "", " ")
159 if err != nil {
160 log.Fatal("Error marshaling JSON:", err)
161 }
162
163 // Output the modified JSON string
164 fmt.Println(string(modifiedJSON))
165}
166
167
168
169func turnupostostts(json string, targetFoundry string, targetLayer string) {
170 if targetLayer == "" {
171 targetLayer = "p"
172 }
173
174 ldType := "@type"
175
176 if ldType == "koral:span" {
177 next
178 }
179 if ldType == "koral:term" {
180 if foundry == if layer === key -> rewrite
181 }
182
183 // Iterate through the query and whenever a term is requested without a foundry, and without a layser or layer p,
184 // change the key following the mapping
185
186
187}
188
189func addupostooutput(json string, reffoundry string, foundry string) {
190 // https://universaldependencies.org/tagset-conversion/de-stts-uposf.html
191 // Iterate through all matches and add to all xml snippets a line of foundry
192
193}
194
195*/
196
Akron08aa2632025-04-07 17:38:28 +0200197type Term struct {
198 Foundry string
199 Layer string
200 Key string
201}
202
Akronfc3bd272025-04-04 16:15:44 +0200203func Hui() string {
204 return "test"
205}
206
207func Map2(json []byte) string {
208 /*
209 result := gjson.GetBytes(json, "query")
210 var raw []byte
211 if result.Index > 0 {
212 raw = json[result.Index:result.Index+len(result.Raw)]
213 } else {
214 raw = []byte(result.Raw)
215 }
216
217 if result.IsObject() {
218 koralType := gjson.GetBytes(raw, "@type").String()
219 switch koralType {
220 case "koral:term":
221
222 }
223 }
224 */
225
226 koralObj := gjson.ParseBytes(json)
227
228 switch koralObj.Get("@type").String() {
229 case "koral:term":
230 {
231 if koralObj.Get("value").String() == "KOKOM" {
232 // TODO: Turn this in a token, if it isn't already!
233 newJson, _ := sjson.Set(string(json), "value", "CCONJ")
234 return newJson
235 }
236 }
237
238 case "koral:operation":
239 {
240
241 }
242
243 }
244 /*
245
246 var raw []byte
247 if result.Index > 0 {
248 raw = json[result.Index:result.Index+len(result.Raw)]
249 } else {
250 raw = []byte(result.Raw)
251 }
252 */
253 return "jj"
254}
255
256// token writes a token to the string builder
257func token(strBuilder *strings.Builder, foundry string, layer string, keys []string) {
258 strBuilder.WriteString(`{"@type":"koral:token","wrap":`)
259 if len(keys) > 1 {
260 termGroup(strBuilder, foundry, layer, keys)
261 } else {
262 term(strBuilder, foundry, layer, keys[0], true)
263 }
264 strBuilder.WriteString(`}`)
265}
266
267// termGroup writes a termGroup to the string builder
268func termGroup(strBuilder *strings.Builder, foundry string, layer string, keys []string) {
269 strBuilder.WriteString(`{"@type":"koral:termGroup","relation":"relation:and","operation":"operation:and","operands":[`)
270 for i, key := range keys {
271 term(strBuilder, foundry, layer, key, true) // temporary
272 if i < len(keys)-1 {
273 strBuilder.WriteString(",")
274 }
275 }
276 strBuilder.WriteString(`]}`)
277}
278
Akron08aa2632025-04-07 17:38:28 +0200279// termGroup2 writes a termGroup to the string builder
280func termGroup2(strBuilder *strings.Builder, terms []Term, positive bool) {
281 strBuilder.WriteString(`{"@type":"koral:termGroup",`)
282
283 if positive {
284 strBuilder.WriteString(`"relation":"relation:and","operation":"operation:and",`)
285 } else {
286 strBuilder.WriteString(`"relation":"relation:or","operation":"operation:or",`)
287 }
288
289 strBuilder.WriteString(`"operands":[`)
290 for i, term := range terms {
291 term2(strBuilder, term, positive)
292 if i < len(terms)-1 {
293 strBuilder.WriteString(",")
294 }
295 }
296 strBuilder.WriteString(`]}`)
297}
298
Akronfc3bd272025-04-04 16:15:44 +0200299// term writes a term to the string builder
300func term(strBuilder *strings.Builder, foundry string, layer string, key string, match bool) {
301
302 // TODO: May have ne!!!!
303 strBuilder.WriteString(`{"@type":"koral:term","match":"match:`)
304 if match {
305 strBuilder.WriteString("eq")
306 } else {
307 strBuilder.WriteString("ne")
308 }
309 strBuilder.WriteString(`","foundry":"`)
310 strBuilder.WriteString(foundry)
311 strBuilder.WriteString(`","layer":"`)
312 strBuilder.WriteString(layer)
313 strBuilder.WriteString(`","key":"`)
314 strBuilder.WriteString(key)
315 strBuilder.WriteString(`"}`)
316}
317
Akron08aa2632025-04-07 17:38:28 +0200318// term writes a term to the string builder
319func term2(strBuilder *strings.Builder, term Term, match bool) {
320
321 // TODO: May have ne!!!!
322 strBuilder.WriteString(`{"@type":"koral:term","match":"match:`)
323 if match {
324 strBuilder.WriteString("eq")
325 } else {
326 strBuilder.WriteString("ne")
327 }
328 strBuilder.WriteString(`","foundry":"`)
329 strBuilder.WriteString(term.Foundry)
330 strBuilder.WriteString(`","layer":"`)
331 strBuilder.WriteString(term.Layer)
332 strBuilder.WriteString(`","key":"`)
333 strBuilder.WriteString(term.Key)
334 strBuilder.WriteString(`"}`)
335}
336
Akronfc3bd272025-04-04 16:15:44 +0200337func flatten() {
338
339 // if a termGroup isan operand in a termGroup with the same relation/operation:
340 // flatten the termGroup into the parent termGroup
341
342 // if a termGroup has only a single term, remove the group
343}
344
Akron08aa2632025-04-07 17:38:28 +0200345func replaceWrappedTerms(jsonString string, terms []Term) string {
Akronfc3bd272025-04-04 16:15:44 +0200346 var err error
Akron08aa2632025-04-07 17:38:28 +0200347
348 if len(terms) == 1 {
349 jsonString, err = sjson.Set(jsonString, "foundry", terms[0].Foundry)
350 if err != nil {
351 log.Error().Err(err).Msg("Error setting foundry")
352 }
353 jsonString, err = sjson.Set(jsonString, "layer", terms[0].Layer)
354 if err != nil {
355 log.Error().Err(err).Msg("Error setting layer")
356 }
357 jsonString, err = sjson.Set(jsonString, "key", terms[0].Key)
358 if err != nil {
359 log.Error().Err(err).Msg("Error setting key")
360 }
361
362 return jsonString
Akronfc3bd272025-04-04 16:15:44 +0200363 }
Akron08aa2632025-04-07 17:38:28 +0200364
365 matchop := gjson.Get(jsonString, "match").String()
366
367 /*
368 foundry := gjson.Get(jsonString, "foundry").String()
369 layer := gjson.Get(jsonString, "layer").String()
370 key := gjson.Get(jsonString, "key").String()
371 term := Term{foundry, layer, key}
372
373
374 terms = append(terms, term)
375 */
376
377 var strBuilder strings.Builder
378 if matchop == "match:ne" {
379 termGroup2(&strBuilder, terms, false)
380 } else {
381 termGroup2(&strBuilder, terms, true)
Akronfc3bd272025-04-04 16:15:44 +0200382 }
Akron08aa2632025-04-07 17:38:28 +0200383
384 return strBuilder.String()
385
Akronfc3bd272025-04-04 16:15:44 +0200386}
387
388func replaceGroupedTerm(jsonString string, op []int, foundry string, layer string, key string) string {
389 var err error
390
391 strInt := "operands." + strconv.Itoa(op[0]) + "."
392 jsonString, err = sjson.Set(jsonString, strInt+"foundry", foundry)
393 if err != nil {
394 log.Error().Err(err).Msg("Error setting foundry")
395 }
396 jsonString, err = sjson.Set(jsonString, strInt+"layer", layer)
397 if err != nil {
398 log.Error().Err(err).Msg("Error setting layer")
399 }
400 jsonString, err = sjson.Set(jsonString, strInt+"key", key)
401 if err != nil {
402 log.Error().Err(err).Msg("Error setting key")
403 }
404
405 if len(op) > 1 {
406 for i := 1; i < len(op); i++ {
407 jsonString, err = sjson.Delete(jsonString, "operands."+strconv.Itoa(op[i]))
408 if err != nil {
409 log.Error().Err(err).Msg("Error deleting operand")
410 }
411 }
412 }
413
414 return jsonString
415}
416
417/*
418func replaceTermWithToken(jsonString string) string {
419 // Replace the term with the token
420 replacedString, err := sjson.Set(jsonString, "wrap.operands.0", token())
421 if err != nil {
422 return jsonString // Return the original string in case of an error
423 }
424 return replacedString
425
426// case1: 1 -> 1 the term is an operand in a termGroup with the same relation/operation
427// case2: 1 -> 1 the term is wrapped
428// case3: 1 -> 1 the term is an operand in a termGroup with a different relation/operation
429// case4: n -> 1 the term is an operand in a termGroup with the same relation/operation
430// case5: n -> 1 the term is wrapped
431// case6: n -> 1 the term is an operand in a termGroup with a different relation/operation
432// case7: 1 -> n the term is an operand in a termGroup with the same relation/operation
433// case8: 1 -> n the term is wrapped
434// case9: 1 -> n the term is an operand in a termGroup with a different relation/operation
435 }
436*/
437
438func Map(jsonStr string) string {
439
440 obj := gjson.Get(jsonStr, "query")
441
442 // value := gjson.Get(json, "name.last")
443
444 /*
445
446 // Modify the JSON structure recursively
447 modifiedData := modifyJSON(ast.NewAny(data))
448
449 // Marshal the modified data back into a JSON string
450 modifiedJSON, err := sonic.MarshalString(modifiedData)
451
452 // Parse the JSON string into a generic interface{}
453 var data interface{}
454
455 err := sonic.UnmarshalString(jsonStr, data)
456
457 if err != nil {
458 log.Fatal("Error unmarshaling JSON:", err)
459 return ""
460 }
461
462
463
464 if err != nil {
465 log.Fatal("Error marshaling JSON:", err)
466 }
467 */
468 // Output the modified JSON string
469 return obj.String() //modifyJSON(obj)
470}
471
472// Recursive function to modify JSON using Sonic library
473//func modifyJSON(data gjson.Result) string {
474
475// Check if data is a map
476// if data.IsObject() {
477/*
478 dataMap := data.Map()
479
480 koralType := dataMap["@type"].String()
481
482 // Look for @type key
483
484 switch koralType {
485 case "koral:term":
486 // Modify the key by adding 'hallo-' prefix
487
488 // sjson.SetRaw(data.String())
489 sjson.Set(data.Path(data.Bytes()), "key", "hallo-"+dataMap["key"].String())
490
491 dataMap["key"] = "hallo-" + dataMap["key"].String()
492 /*
493 if key, found := data.GetString("key"); found {
494 data.Set("key", "hallo-"+key)
495 }
496*/
497/*
498 case "koral:operation":
499 // Handle the 'operators' key by recursively modifying each operator
500 if operators, found := data.GetArray("operators"); found {
501 for i := range operators {
502 operators[i] = modifyJSON(operators[i])
503 }
504 data.Set("operators", operators)
505 }
506 }*/
507/*
508 // Recursively modify any nested objects
509 data.ForEach(func(k string, v sonic.Any) {
510 data.Set(k, modifyJSON(v))
511 })
512*/
513//}
514// Handle arrays by modifying elements recursively
515/*
516 if data.IsArray() {
517 for i := range data.GetArray() {
518 data.Set(i, modifyJSON(data.GetArray()[i]))
519 }
520 }
521*/
522/*
523 return data
524}
525*/