Added benchmark table regarding performance
Change-Id: I8f8cb4a2ad0ee04c418093da761c8dfb717c8fcc
diff --git a/Readme.md b/Readme.md
index c7b53cd..ee3a9c8 100644
--- a/Readme.md
+++ b/Readme.md
@@ -13,6 +13,14 @@
- [german](testdata/tokenizer_de.matok)
- [english](testdata/tokenizer_en.matok)
+## Performance
+
+![Speed comparison of german tokenizers](https://raw.githubusercontent.com/KorAP/Datok/master/misc/benchmarks.svg)
+
+Speed comparison of different tokenizers and sentence splitters for German.
+For further benchmarks, especially regarding the quality of tokenization,
+see Diewald/Kupietz/Lüngen (2022).
+
## Tokenization
```
diff --git a/misc/benchmarks.svg b/misc/benchmarks.svg
new file mode 100644
index 0000000..3433119
--- /dev/null
+++ b/misc/benchmarks.svg
@@ -0,0 +1,981 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+ id="svg320"
+ version="1.1"
+ viewBox="0 0 552.66882 351.913"
+ height="351.91299pt"
+ width="552.66882pt"
+ class="svglite"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <metadata
+ id="metadata324">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs4">
+ <style
+ id="style2"
+ type="text/css"><![CDATA[
+ .svglite line, .svglite polyline, .svglite polygon, .svglite path, .svglite rect, .svglite circle {
+ fill: none;
+ stroke: #000000;
+ stroke-linecap: round;
+ stroke-linejoin: round;
+ stroke-miterlimit: 10.00;
+ }
+ ]]></style>
+ </defs>
+ <defs
+ id="defs11">
+ <clipPath
+ id="cpMC4wMHw1NjQuNTd8MC4wMHw0MDMuMjc=">
+ <rect
+ style="fill:none;stroke:#000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10"
+ id="rect8"
+ height="403.26999"
+ width="564.57001"
+ y="0"
+ x="0" />
+ </clipPath>
+ </defs>
+ <defs
+ id="defs20">
+ <clipPath
+ id="cpMTExLjkzfDU1OS4wOXw1LjQ4fDMzMS4xMg==">
+ <rect
+ style="fill:none;stroke:#000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10"
+ id="rect17"
+ height="325.64001"
+ width="447.16"
+ y="5.48"
+ x="111.93" />
+ </clipPath>
+ </defs>
+ <g
+ id="g1350">
+ <g
+ clip-path="url(#cpMTExLjkzfDU1OS4wOXw1LjQ4fDMzMS4xMg==)"
+ id="g176"
+ transform="translate(-6.4211765,-5.48)">
+ <polyline
+ points="111.93,323.94 559.09,323.94 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline22" />
+ <polyline
+ points="111.93,311.96 559.09,311.96 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline24" />
+ <polyline
+ points="111.93,299.99 559.09,299.99 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline26" />
+ <polyline
+ points="111.93,288.02 559.09,288.02 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline28" />
+ <polyline
+ points="111.93,276.05 559.09,276.05 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline30" />
+ <polyline
+ points="111.93,264.08 559.09,264.08 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline32" />
+ <polyline
+ points="111.93,252.10 559.09,252.10 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline34" />
+ <polyline
+ points="111.93,240.13 559.09,240.13 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline36" />
+ <polyline
+ points="111.93,228.16 559.09,228.16 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline38" />
+ <polyline
+ points="111.93,216.19 559.09,216.19 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline40" />
+ <polyline
+ points="111.93,204.22 559.09,204.22 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline42" />
+ <polyline
+ points="111.93,192.24 559.09,192.24 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline44" />
+ <polyline
+ points="111.93,180.27 559.09,180.27 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline46" />
+ <polyline
+ points="111.93,168.30 559.09,168.30 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline48" />
+ <polyline
+ points="111.93,156.33 559.09,156.33 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline50" />
+ <polyline
+ points="111.93,144.36 559.09,144.36 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline52" />
+ <polyline
+ points="111.93,132.38 559.09,132.38 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline54" />
+ <polyline
+ points="111.93,120.41 559.09,120.41 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline56" />
+ <polyline
+ points="111.93,108.44 559.09,108.44 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline58" />
+ <polyline
+ points="111.93,96.47 559.09,96.47 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline60" />
+ <polyline
+ points="111.93,84.49 559.09,84.49 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline62" />
+ <polyline
+ points="111.93,72.52 559.09,72.52 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline64" />
+ <polyline
+ points="111.93,60.55 559.09,60.55 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline66" />
+ <polyline
+ points="111.93,48.58 559.09,48.58 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline68" />
+ <polyline
+ points="111.93,36.61 559.09,36.61 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline70" />
+ <polyline
+ points="111.93,24.63 559.09,24.63 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline72" />
+ <polyline
+ points="111.93,12.66 559.09,12.66 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ id="polyline74" />
+ <polyline
+ points="132.26,331.12 132.26,5.48 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10;stroke-dasharray:1.42, 4.27"
+ id="polyline76" />
+ <polyline
+ points="268.82,331.12 268.82,5.48 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10;stroke-dasharray:1.42, 4.27"
+ id="polyline78" />
+ <polyline
+ points="405.38,331.12 405.38,5.48 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10;stroke-dasharray:1.42, 4.27"
+ id="polyline80" />
+ <polyline
+ points="541.94,331.12 541.94,5.48 "
+ style="fill:none;stroke:#e0e0e3;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10;stroke-dasharray:1.42, 4.27"
+ id="polyline82" />
+ <rect
+ x="132.25999"
+ y="79.110001"
+ width="27.209999"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect84" />
+ <rect
+ x="132.25999"
+ y="84.489998"
+ width="9.96"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect86" />
+ <rect
+ x="132.25999"
+ y="19.25"
+ width="338.48999"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect88" />
+ <rect
+ x="132.25999"
+ y="24.629999"
+ width="114.42"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect90" />
+ <rect
+ x="132.25999"
+ y="7.2800002"
+ width="406.51001"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect92" />
+ <rect
+ x="132.25999"
+ y="12.66"
+ width="187.25"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect94" />
+ <rect
+ x="132.25999"
+ y="43.189999"
+ width="231.84"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect96" />
+ <rect
+ x="132.25999"
+ y="48.580002"
+ width="58.98"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect98" />
+ <rect
+ x="132.25999"
+ y="31.219999"
+ width="260.67999"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect100" />
+ <rect
+ x="132.25999"
+ y="36.610001"
+ width="56.959999"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect102" />
+ <rect
+ x="132.25999"
+ y="288.01999"
+ width="0.052000001"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect104" />
+ <rect
+ x="132.25999"
+ y="127"
+ width="16.01"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect106" />
+ <rect
+ x="132.25999"
+ y="132.38"
+ width="4.2600002"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect108" />
+ <rect
+ x="132.25999"
+ y="55.16"
+ width="181.66"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect110" />
+ <rect
+ x="132.25999"
+ y="60.549999"
+ width="39.700001"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect112" />
+ <rect
+ x="132.25999"
+ y="103.05"
+ width="19.809999"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect114" />
+ <rect
+ x="132.25999"
+ y="108.44"
+ width="10.19"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect116" />
+ <rect
+ x="132.25999"
+ y="67.139999"
+ width="116.49"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect118" />
+ <rect
+ x="132.25999"
+ y="72.519997"
+ width="33.849998"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect120" />
+ <rect
+ x="132.25999"
+ y="234.74001"
+ width="1.15"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect122" />
+ <rect
+ x="132.25999"
+ y="240.13"
+ width="1.11"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect124" />
+ <rect
+ x="132.25999"
+ y="198.83"
+ width="5.4499998"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect126" />
+ <rect
+ x="132.25999"
+ y="204.22"
+ width="3.73"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect128" />
+ <rect
+ x="132.25999"
+ y="186.86"
+ width="6.0599999"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect130" />
+ <rect
+ x="132.25999"
+ y="192.24001"
+ width="2.6900001"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect132" />
+ <rect
+ x="132.25999"
+ y="216.19"
+ width="2.3099999"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect134" />
+ <rect
+ x="132.25999"
+ y="252.10001"
+ width="0.67000002"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect136" />
+ <rect
+ x="132.25999"
+ y="264.07999"
+ width="0.31"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect138" />
+ <rect
+ x="132.25999"
+ y="91.080002"
+ width="21.34"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect140" />
+ <rect
+ x="132.25999"
+ y="96.470001"
+ width="10.31"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect142" />
+ <rect
+ x="132.25999"
+ y="150.94"
+ width="12.5"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect144" />
+ <rect
+ x="132.25999"
+ y="156.33"
+ width="6.4099998"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect146" />
+ <rect
+ x="132.25999"
+ y="174.88"
+ width="8.3400002"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect148" />
+ <rect
+ x="132.25999"
+ y="180.27"
+ width="8.1499996"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect150" />
+ <rect
+ x="132.25999"
+ y="138.97"
+ width="14.8"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect152" />
+ <rect
+ x="132.25999"
+ y="144.36"
+ width="14.19"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect154" />
+ <rect
+ x="132.25999"
+ y="115.02"
+ width="19.790001"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect156" />
+ <rect
+ x="132.25999"
+ y="120.41"
+ width="19.26"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect158" />
+ <rect
+ x="132.25999"
+ y="222.77"
+ width="1.1900001"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect160" />
+ <rect
+ x="132.25999"
+ y="228.16"
+ width="1.17"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect162" />
+ <rect
+ x="132.25999"
+ y="162.91"
+ width="9.9700003"
+ height="5.3899999"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect164" />
+ <rect
+ x="132.25999"
+ y="168.3"
+ width="9.5500002"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect166" />
+ <rect
+ x="132.25999"
+ y="323.94"
+ width="0.034000002"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect168" />
+ <rect
+ x="132.25999"
+ y="311.95999"
+ width="0.037"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect170" />
+ <rect
+ x="132.25999"
+ y="299.98999"
+ width="0.039999999"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect172" />
+ <rect
+ x="132.25999"
+ y="276.04999"
+ width="0.12"
+ height="5.3899999"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ id="rect174" />
+ </g>
+ <text
+ id="text178"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="101.52"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="327.72"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">Deep-EOS (bi-lstm-de)</text>
+ <text
+ id="text180"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="85.480003"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="315.75"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">Deep-EOS (cnn-de)</text>
+ <text
+ id="text182"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="88.830002"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="303.78"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">Deep-EOS (lstm-de)</text>
+ <text
+ id="text184"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="27.809999"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="291.81"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">Cutter</text>
+ <text
+ id="text186"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="33.169998"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="279.82999"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">NNsplit</text>
+ <text
+ id="text188"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="91.93"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="267.85999"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">SpaCy (Dependency)</text>
+ <text
+ id="text190"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="81.169998"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="255.89"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">SpaCy (Statistical)</text>
+ <text
+ id="text192"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="57.360001"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="243.92"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">SoMaJo (P=1)</text>
+ <text
+ id="text194"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="39.939999"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="231.95"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">Elephant</text>
+ <text
+ id="text196"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="88.889999"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="219.97"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">SpaCy (Sentencizer)</text>
+ <text
+ id="text198"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="58.560001"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="208"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">SoMaJo (P=8)</text>
+ <text
+ id="text200"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="79.900002"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="196.03"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">SpaCy (Tokenizer)</text>
+ <text
+ id="text202"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="87.620003"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="184.06"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">Syntok (segmenter)</text>
+ <text
+ id="text204"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="49.900002"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="172.09"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">TreeTagger</text>
+ <text
+ id="text206"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="71.18"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="160.11"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">Stanford (T,S,M)</text>
+ <text
+ id="text208"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="81.889999"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="148.14"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">Syntok (tokenizer)</text>
+ <text
+ id="text210"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="19.01"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="136.17"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">JTok</text>
+ <text
+ id="text212"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="27.360001"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="124.2"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">Waste</text>
+ <text
+ id="text214"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="94"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="112.23"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">OpenNLP (Tokenizer)</text>
+ <text
+ id="text216"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="39.139999"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="100.25"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">Stanford</text>
+ <text
+ id="text218"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="74.669998"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="88.279999"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">KorAP-Tokenizer</text>
+ <text
+ id="text220"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="98.43"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="76.309998"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">OpenNLP (SentenceD)</text>
+ <text
+ id="text222"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="81.25"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="64.339996"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">OpenNLP (Simple)</text>
+ <text
+ id="text224"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="84.309998"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="52.369999"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">BlingFire (wbd.bin)</text>
+ <text
+ id="text226"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="81.800003"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="40.389999"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">BlingFire (sbd.bin)</text>
+ <text
+ id="text228"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="61.189999"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="28.42"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">Datok (datok)</text>
+ <text
+ id="text230"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="63.75"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:end;fill:#4d4d4d"
+ y="16.450001"
+ x="107"
+ transform="translate(-6.4211765,-5.48)">Datok (matok)</text>
+ <polyline
+ id="polyline232"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,323.94 111.93,323.94 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline234"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,311.96 111.93,311.96 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline236"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,299.99 111.93,299.99 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline238"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,288.02 111.93,288.02 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline240"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,276.05 111.93,276.05 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline242"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,264.08 111.93,264.08 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline244"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,252.10 111.93,252.10 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline246"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,240.13 111.93,240.13 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline248"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,228.16 111.93,228.16 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline250"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,216.19 111.93,216.19 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline252"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,204.22 111.93,204.22 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline254"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,192.24 111.93,192.24 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline256"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,180.27 111.93,180.27 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline258"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,168.30 111.93,168.30 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline260"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,156.33 111.93,156.33 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline262"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,144.36 111.93,144.36 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline264"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,132.38 111.93,132.38 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline266"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,120.41 111.93,120.41 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline268"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,108.44 111.93,108.44 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline270"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,96.47 111.93,96.47 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline272"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,84.49 111.93,84.49 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline274"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,72.52 111.93,72.52 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline276"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,60.55 111.93,60.55 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline278"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,48.58 111.93,48.58 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline280"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,36.61 111.93,36.61 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline282"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,24.63 111.93,24.63 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline284"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="109.19,12.66 111.93,12.66 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline286"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="132.26,333.86 132.26,331.12 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline288"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="268.82,333.86 268.82,331.12 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline290"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="405.38,333.86 405.38,331.12 "
+ transform="translate(-6.4211765,-5.48)" />
+ <polyline
+ id="polyline292"
+ style="fill:none;stroke:#333333;stroke-width:1.07000005;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:10"
+ points="541.94,333.86 541.94,331.12 "
+ transform="translate(-6.4211765,-5.48)" />
+ <text
+ id="text294"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="5.73"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:middle;fill:#4d4d4d"
+ y="343.62"
+ x="132.25999"
+ transform="translate(-6.4211765,-5.48)">0</text>
+ <text
+ id="text296"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="21.48"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:middle;fill:#4d4d4d"
+ y="343.62"
+ x="268.82001"
+ transform="translate(-6.4211765,-5.48)">1000</text>
+ <text
+ id="text298"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="22.049999"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:middle;fill:#4d4d4d"
+ y="343.62"
+ x="405.38"
+ transform="translate(-6.4211765,-5.48)">2000</text>
+ <text
+ id="text300"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="22.15"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:middle;fill:#4d4d4d"
+ y="343.62"
+ x="541.94"
+ transform="translate(-6.4211765,-5.48)">3000</text>
+ <text
+ id="text302"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="55.400002"
+ style="font-size:11px;font-family:'Fira Sans Condensed';text-anchor:middle;fill:#404043"
+ y="356.26001"
+ x="335.51001"
+ transform="translate(-6.4211765,-5.48)">Tokens / ms</text>
+ <rect
+ id="rect304"
+ style="fill:#ffffff;stroke:none;stroke-width:1.07000005;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10"
+ height="28.24"
+ width="133.39"
+ y="268.06998"
+ x="380.89883" />
+ <rect
+ id="rect306"
+ style="fill:none;stroke:none;stroke-width:1.07000005;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10"
+ height="17.280001"
+ width="17.280001"
+ y="273.54999"
+ x="392.34882" />
+ <rect
+ id="rect308"
+ style="fill:#f18700;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ height="15.86"
+ width="15.86"
+ y="274.25998"
+ x="393.05884" />
+ <rect
+ id="rect310"
+ style="fill:none;stroke:none;stroke-width:1.07000005;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10"
+ height="17.280001"
+ width="17.280001"
+ y="273.54999"
+ x="450.69882" />
+ <rect
+ id="rect312"
+ style="fill:#1e2662;stroke:none;stroke-width:1.07000005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:10"
+ height="15.86"
+ width="15.86"
+ y="274.25998"
+ x="451.40881" />
+ <text
+ id="text314"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="29.120001"
+ style="font-size:11px;font-family:'Fira Sans Condensed'"
+ y="387.45999"
+ x="303.53"
+ transform="translate(112.07882,-101.48)">1 × Effi</text>
+ <text
+ id="text316"
+ lengthAdjust="spacingAndGlyphs"
+ textLength="34.849998"
+ style="font-size:11px;font-family:'Fira Sans Condensed'"
+ y="387.45999"
+ x="361.88"
+ transform="translate(112.07882,-101.48)">10 × Effi</text>
+ </g>
+</svg>