Add topic domain classification in XSLT pass2

Generated with mallet based on the old training data in /vol/work/TE via
calling a Java function from XSLT.

Resolves #6
diff --git a/Makefile b/Makefile
index 9b38bed..674c69a 100644
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,7 @@
 MAKE ?= make -j $(shell nproc)
 KORAPXML2CONLLU_HEAP ?= $(shell echo "$$(($(MAX_THREADS) * 2100))")
 KORAPXML2CONLLU ?= java -Xmx$(KORAPXML2CONLLU_HEAP)m -jar lib/korapxml2conllu.jar
-SAXON ?= java -cp lib/saxon9ee.jar:lib/xml-resolver-1.2.jar net.sf.saxon.Transform -expand:off -catalog:"lib/dtds/xhtml11/xhtmlcatalog.xml;lib/dtds/xhtml/dtd/xhtmlcatalog.xml"
+SAXON ?= java -cp lib/saxon9ee.jar:lib/xml-resolver-1.2.jar:lib/textclassifier.jar net.sf.saxon.Transform -expand:off -catalog:"lib/dtds/xhtml11/xhtmlcatalog.xml;lib/dtds/xhtml/dtd/xhtmlcatalog.xml"
 
 .DELETE_ON_ERROR:
 
@@ -92,6 +92,10 @@
 	mkdir -p models
 	curl -sL -o $@  https://corpora.ids-mannheim.de/tools/$@
 
+models/dereko_domains_s.classifier:
+	mkdir -p models
+	curl -sL -o $@ https://corpora.ids-mannheim.de/tools/$@
+
 %.marmot-malt.zip: %.zip models/de.marmot models/german.mco
 	$(KORAPXML2CONLLU) -T $(MAX_THREADS) -t marmot:models/de.marmot -P malt:models/german.mco $< | conllu2korapxml > $@
 
diff --git a/lib/textclassifier.jar b/lib/textclassifier.jar
new file mode 100644
index 0000000..ef38851
--- /dev/null
+++ b/lib/textclassifier.jar
Binary files differ
diff --git a/xslt/pass2.xsl b/xslt/pass2.xsl
index 697c108..087fb78 100644
--- a/xslt/pass2.xsl
+++ b/xslt/pass2.xsl
@@ -1,14 +1,28 @@
 <xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
                 xmlns:saxon="http://saxon.sf.net/"
-                exclude-result-prefixes="saxon">
+                xmlns:xs="http://www.w3.org/2001/XMLSchema"
+                xmlns:TextClassifier="java:de.ids_mannheim.TextClassifier"
+                exclude-result-prefixes="saxon xs TextClassifier">
 
     <xsl:output method="xml" indent="yes" saxon:line-length="1000"
     doctype-public="-//IDS//DTD IDS-I5 1.0//EN"
     doctype-system="http://corpora.ids-mannheim.de/I5/DTD/i5.dtd"
     />
 
+    <xsl:variable name="domainClassifier" select="TextClassifier:new('models/dereko_domains_s.classifier')"/>
+
     <xsl:mode on-no-match="shallow-copy"/>
 
+    <xsl:template match="textClass">
+        <xsl:variable name="classification" select="tokenize(TextClassifier:topicDomainsFromText($domainClassifier, ../../../text), ';')"/>
+        <textClass>
+            <catRef n="{$classification[1]}" target="{$classification[2]}" scheme="topic"/>
+            <xsl:if test="xs:decimal($classification[3]) > 0.0000001">
+                <catRef n="{$classification[3]}" target="{$classification[4]}" scheme="topic"/>
+            </xsl:if>
+        </textClass>
+    </xsl:template>
+
     <xsl:template match="p[not(normalize-space())]" priority="1.0"/>
 
     <xsl:template match="div[not(normalize-space())]" priority="1.0"/>