)]}'
{
  "commit": "2173013a1ef865a231026ca4fd81f041732c769f",
  "tree": "63c9029b5d691871a7141ac73fd843103df0c45e",
  "parents": [
    "9ef5dec549c076b6d4ef028f862ed6954133a2be"
  ],
  "author": {
    "name": "Marc Kupietz",
    "email": "kupietz@ids-mannheim.de",
    "time": "Sat Feb 07 12:29:13 2026 +0100"
  },
  "committer": {
    "name": "Marc Kupietz",
    "email": "kupietz@ids-mannheim.de",
    "time": "Sat Feb 07 12:32:10 2026 +0100"
  },
  "message": "Add lookahead to noun gender endings to prevent false matches\n\nForms like Innenminister/Innenministerinnen were incorrectly being\nparsed as a single gender-marked token (Innenminister/Innen +\nministerinnen).\n\nNow gender noun endings (:in, :innen, /in, /innen, (in), /frau, etc.)\nuse a lookahead character to verify the ending is NOT followed by a\nletter - ensuring they are at a word boundary.\n\nThis correctly tokenizes:\n- Innenminister/Innenministerinnen → Innenminister / Innenministerinnen\n- Nutzer/in → Nutzer/in (valid gender form, unchanged)\n- Kaufmann/frau → Kaufmann/frau (valid gender form, unchanged)\n\nAdds test cases for both split and don\u0027t-split scenarios.\n\nChange-Id: I509a6f12ec1bb5678b1d8e8a063d0164498de5de\n",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "ada404cd989568a7357cbc4b96be835e925bfae9",
      "old_mode": 33188,
      "old_path": "Readme.md",
      "new_id": "90535cdcdb6b637e536a1a5b2091c48748b55ba7",
      "new_mode": 33188,
      "new_path": "Readme.md"
    },
    {
      "type": "modify",
      "old_id": "6219a012dcd85e6a4f5d31819c5178373dff233c",
      "old_mode": 33188,
      "old_path": "src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex",
      "new_id": "342c6682095ea19f7eedb315de4fd9c293e60271",
      "new_mode": 33188,
      "new_path": "src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex"
    },
    {
      "type": "modify",
      "old_id": "4b7173169e0314ad6224b5ba7cfc753375b60a41",
      "old_mode": 33188,
      "old_path": "src/test/resources/tokenizer/dontsplit.txt",
      "new_id": "1997f6e0954ea32cf7c3495ad4067a554bbb984b",
      "new_mode": 33188,
      "new_path": "src/test/resources/tokenizer/dontsplit.txt"
    },
    {
      "type": "modify",
      "old_id": "99650b4c9fda5a9245770ad1593da40cffd81b9e",
      "old_mode": 33188,
      "old_path": "src/test/resources/tokenizer/split.txt",
      "new_id": "14a0e370a41c79abf75fc8e2c3147d18304fadab",
      "new_mode": 33188,
      "new_path": "src/test/resources/tokenizer/split.txt"
    }
  ]
}
