Importing WformServices and GlemmServices
Change-Id: Ifa95576d69e0d3863f63d3fdedb48c2c21cf64bc
diff --git a/GlemmServices/.classpath b/GlemmServices/.classpath
new file mode 100644
index 0000000..d72ac2e
--- /dev/null
+++ b/GlemmServices/.classpath
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" output="target/classes" path="src">
+ <attributes>
+ <attribute name="optional" value="true"/>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry exported="true" kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+ <attributes>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry combineaccessrules="false" exported="true" kind="src" path="/JsonTraverse"/>
+ <classpathentry kind="con" path="org.eclipse.jst.server.core.container/org.eclipse.jst.server.tomcat.runtimeTarget/Apache Tomcat v9.0 (2)">
+ <attributes>
+ <attribute name="owner.project.facets" value="jst.web"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
+ <attributes>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="con" path="org.eclipse.jst.j2ee.internal.web.container"/>
+ <classpathentry combineaccessrules="false" kind="src" path="/utils"/>
+ <classpathentry kind="output" path="target/classes"/>
+</classpath>
diff --git a/GlemmServices/.project b/GlemmServices/.project
new file mode 100644
index 0000000..acd73d2
--- /dev/null
+++ b/GlemmServices/.project
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>GlemmServices</name>
+ <comment></comment>
+ <projects>
+ <project>JsonTraverse</project>
+ <project>Utils</project>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.wst.common.project.facet.core.builder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.wst.validation.validationbuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.m2e.core.maven2Builder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.m2e.core.maven2Nature</nature>
+ <nature>org.eclipse.jem.workbench.JavaEMFNature</nature>
+ <nature>org.eclipse.wst.common.modulecore.ModuleCoreNature</nature>
+ <nature>org.eclipse.wst.common.project.facet.core.nature</nature>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ <nature>org.eclipse.wst.jsdt.core.jsNature</nature>
+ </natures>
+</projectDescription>
diff --git a/GlemmServices/.settings/.jsdtscope b/GlemmServices/.settings/.jsdtscope
new file mode 100644
index 0000000..92e666d
--- /dev/null
+++ b/GlemmServices/.settings/.jsdtscope
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry excluding="**/bower_components/*|**/node_modules/*|**/*.min.js" kind="src" path="WebContent"/>
+ <classpathentry kind="con" path="org.eclipse.wst.jsdt.launching.JRE_CONTAINER"/>
+ <classpathentry kind="con" path="org.eclipse.wst.jsdt.launching.WebProject">
+ <attributes>
+ <attribute name="hide" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="con" path="org.eclipse.wst.jsdt.launching.baseBrowserLibrary"/>
+ <classpathentry kind="output" path=""/>
+</classpath>
diff --git a/GlemmServices/.settings/org.eclipse.core.runtime.prefs b/GlemmServices/.settings/org.eclipse.core.runtime.prefs
new file mode 100644
index 0000000..5a0ad22
--- /dev/null
+++ b/GlemmServices/.settings/org.eclipse.core.runtime.prefs
@@ -0,0 +1,2 @@
+eclipse.preferences.version=1
+line.separator=\n
diff --git a/GlemmServices/.settings/org.eclipse.jdt.core.prefs b/GlemmServices/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..4e4a3ad
--- /dev/null
+++ b/GlemmServices/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,9 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
+org.eclipse.jdt.core.compiler.compliance=1.8
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.release=disabled
+org.eclipse.jdt.core.compiler.source=1.8
diff --git a/GlemmServices/.settings/org.eclipse.m2e.core.prefs b/GlemmServices/.settings/org.eclipse.m2e.core.prefs
new file mode 100644
index 0000000..f897a7f
--- /dev/null
+++ b/GlemmServices/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
diff --git a/GlemmServices/.settings/org.eclipse.wst.common.component b/GlemmServices/.settings/org.eclipse.wst.common.component
new file mode 100644
index 0000000..4355618
--- /dev/null
+++ b/GlemmServices/.settings/org.eclipse.wst.common.component
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?><project-modules id="moduleCoreId" project-version="1.5.0">
+ <wb-module deploy-name="GlemmServices2-0.0.1-SNAPSHOT">
+ <wb-resource deploy-path="/" source-path="/target/m2e-wtp/web-resources"/>
+ <wb-resource deploy-path="/" source-path="/WebContent" tag="defaultRootSource"/>
+ <wb-resource deploy-path="/WEB-INF/classes" source-path="/src"/>
+ <dependent-module archiveName="utils-0.1-SNAPSHOT.jar" deploy-path="/WEB-INF/lib" handle="module:/resource/utils/utils">
+ <dependency-type>uses</dependency-type>
+ </dependent-module>
+ <dependent-module archiveName="JsonTraverse-0.1-SNAPSHOT.jar" deploy-path="/WEB-INF/lib" handle="module:/resource/JsonTraverse/JsonTraverse">
+ <dependency-type>uses</dependency-type>
+ </dependent-module>
+ <property name="java-output-path" value="/GlemmServices/build/classes"/>
+ <property name="context-root" value="GlemmServices"/>
+ </wb-module>
+</project-modules>
diff --git a/GlemmServices/.settings/org.eclipse.wst.common.project.facet.core.prefs.xml b/GlemmServices/.settings/org.eclipse.wst.common.project.facet.core.prefs.xml
new file mode 100644
index 0000000..cc81385
--- /dev/null
+++ b/GlemmServices/.settings/org.eclipse.wst.common.project.facet.core.prefs.xml
@@ -0,0 +1,7 @@
+<root>
+ <facet id="jst.jaxrs">
+ <node name="libprov">
+ <attribute name="provider-id" value="jaxrs-no-op-library-provider"/>
+ </node>
+ </facet>
+</root>
diff --git a/GlemmServices/.settings/org.eclipse.wst.common.project.facet.core.xml b/GlemmServices/.settings/org.eclipse.wst.common.project.facet.core.xml
new file mode 100644
index 0000000..3c4e8c4
--- /dev/null
+++ b/GlemmServices/.settings/org.eclipse.wst.common.project.facet.core.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<faceted-project>
+ <runtime name="Apache Tomcat v9.0 (2)"/>
+ <fixed facet="java"/>
+ <fixed facet="jst.web"/>
+ <fixed facet="wst.jsdt.web"/>
+ <installed facet="java" version="1.8"/>
+ <installed facet="wst.jsdt.web" version="1.0"/>
+ <installed facet="jst.jaxrs" version="2.1"/>
+ <installed facet="jst.web" version="4.0"/>
+</faceted-project>
diff --git a/GlemmServices/.settings/org.eclipse.wst.jsdt.ui.superType.container b/GlemmServices/.settings/org.eclipse.wst.jsdt.ui.superType.container
new file mode 100644
index 0000000..3bd5d0a
--- /dev/null
+++ b/GlemmServices/.settings/org.eclipse.wst.jsdt.ui.superType.container
@@ -0,0 +1 @@
+org.eclipse.wst.jsdt.launching.baseBrowserLibrary
\ No newline at end of file
diff --git a/GlemmServices/.settings/org.eclipse.wst.jsdt.ui.superType.name b/GlemmServices/.settings/org.eclipse.wst.jsdt.ui.superType.name
new file mode 100644
index 0000000..05bd71b
--- /dev/null
+++ b/GlemmServices/.settings/org.eclipse.wst.jsdt.ui.superType.name
@@ -0,0 +1 @@
+Window
\ No newline at end of file
diff --git a/GlemmServices/.settings/org.eclipse.wst.validation.prefs b/GlemmServices/.settings/org.eclipse.wst.validation.prefs
new file mode 100644
index 0000000..04cad8c
--- /dev/null
+++ b/GlemmServices/.settings/org.eclipse.wst.validation.prefs
@@ -0,0 +1,2 @@
+disabled=06target
+eclipse.preferences.version=1
diff --git a/GlemmServices/WebContent/META-INF/MANIFEST.MF b/GlemmServices/WebContent/META-INF/MANIFEST.MF
new file mode 100644
index 0000000..254272e
--- /dev/null
+++ b/GlemmServices/WebContent/META-INF/MANIFEST.MF
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Class-Path:
+
diff --git a/GlemmServices/WebContent/WEB-INF/GlemmServices.properties b/GlemmServices/WebContent/WEB-INF/GlemmServices.properties
new file mode 100644
index 0000000..f205c7c
--- /dev/null
+++ b/GlemmServices/WebContent/WEB-INF/GlemmServices.properties
@@ -0,0 +1,32 @@
+# GlemmServices properties:
+# expected in /WEB-INF/.
+
+## General properties:
+GS_WorkingPath = /home/bodmer/KorAP/GlemmServices/work-live
+
+# GlemmDB properties:
+# DerbyArchivDB = database for the wform list of a whole C2-Archive:
+fnameDB = /home/bodmer/KorAP/GlemmServices/Dbs/DerbyArchiveDB
+#fnameDB = /home/bodmer/KorAP/GlemmServices/Dbs/DerbyTestDB2
+serverHost = //localhost:1527/
+
+## Glemm: properties for lemmatizing:
+
+# fnameScript = GS_WorkingPath + doGlemm
+fnameScript = doGlemm
+# fnameWfs = GS_WorkingPath + "/gl_wfs_in.txt";
+fnameWfs = gl_wfs_in.txt
+# fnameScript = GS_WorkingPath + "/doGlemm";
+fnameScript = doGlemm
+# fnameLemata = GS_WorkingPath + "/tmp/lem3.utf8";
+# lem3 expected to be ISO8859-1 encoded.
+fnameLemata = tmp/lem3
+
+## Loging:
+
+# fnameOut = GS_WorkingPath + "/GlemmServices.log";
+#fnameOut = GlemmServices-t.log
+fnameOut = GlemmServices-t2.log
+# fnameErr = GS_WorkingPath + "/GlemmServices.err";
+#fnameErr = GlemmServices-t.err
+fnameErr = GlemmServices-t2.err
diff --git a/GlemmServices/WebContent/WEB-INF/web.xml b/GlemmServices/WebContent/WEB-INF/web.xml
new file mode 100644
index 0000000..eb2c72c
--- /dev/null
+++ b/GlemmServices/WebContent/WEB-INF/web.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- kopiert aus https://blog.dejavu.sk/registering-resources-and-providers-in-jersey-2/ , 09.12.19/FB -->
+<web-app xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://java.sun.com/xml/ns/javaee" xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd" version="2.5">
+ <servlet>
+ <servlet-name>GlemmServices mit Jersey REST</servlet-name>
+ <servlet-class>org.glassfish.jersey.servlet.ServletContainer</servlet-class>
+ <init-param>
+ <param-name>jersey.config.server.provider.packages</param-name>
+ <param-value>de.korap.services</param-value>
+ </init-param>
+ <init-param>
+ <param-name>jersey.config.server.provider.classnames</param-name>
+ <param-value>de.korap.services</param-value>
+ </init-param>
+ <init-param>
+ <param-name>jersey.config.server.tracing</param-name>
+ <param-value>ALL</param-value>
+ </init-param>
+ <load-on-startup>1</load-on-startup>
+ </servlet>
+ <servlet-mapping>
+ <servlet-name>GlemmServices mit Jersey REST</servlet-name>
+ <url-pattern>/*</url-pattern>
+ </servlet-mapping>
+</web-app>
\ No newline at end of file
diff --git a/GlemmServices/pom.xml b/GlemmServices/pom.xml
new file mode 100644
index 0000000..91838d0
--- /dev/null
+++ b/GlemmServices/pom.xml
@@ -0,0 +1,86 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>de.korap.services</groupId>
+ <artifactId>GlemmServices</artifactId>
+ <version>0.2-Test</version>
+ <!-- >version>0.2-live</version -->
+
+ <packaging>war</packaging>
+ <name>GlemmServices (Test-Konfiguration)</name>
+
+ <description>RESTfull GlemmServices für KorAP (Live-Konfiguration)</description>
+
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>3.8.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ <version>2.5</version>
+ </dependency>
+ <!-- https://mvnrepository.com/artifact/javax.ws.rs/javax.ws.rs-api -->
+ <dependency>
+ <groupId>javax.ws.rs</groupId>
+ <artifactId>javax.ws.rs-api</artifactId>
+ <version>2.0</version>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ <version>2.9.6</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ <version>2.9.6</version>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ <version>2.9.6</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>de.korap.services</groupId>
+ <artifactId>utils</artifactId>
+ <version>0.1-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>de.korap.json</groupId>
+ <artifactId>JsonTraverse</artifactId>
+ <version>0.1-SNAPSHOT</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <sourceDirectory>src</sourceDirectory>
+ <plugins>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>3.7.0</version>
+ <configuration>
+ <source>1.8</source>
+ <target>1.8</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-war-plugin</artifactId>
+ <version>3.2.1</version>
+ <configuration>
+ <failOnMissingWebXml>false</failOnMissingWebXml>
+ <warSourceDirectory>WebContent</warSourceDirectory>
+ </configuration>
+ </plugin>
+ </plugins>
+ <finalName>GlemmServices</finalName>
+ </build>
+
+</project>
\ No newline at end of file
diff --git a/GlemmServices/src/de/korap/services/GlemmDB.java b/GlemmServices/src/de/korap/services/GlemmDB.java
new file mode 100644
index 0000000..ef88019
--- /dev/null
+++ b/GlemmServices/src/de/korap/services/GlemmDB.java
@@ -0,0 +1,815 @@
+package de.korap.services;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.time.LocalDateTime;
+import java.util.Properties;
+
+import de.korap.services.utils.Utils;
+
+public class GlemmDB
+
+{
+ static public String dbTable = "GlemmLemmata";
+ static String dbLemma = "lemma";
+ static String dbWf = "wf";
+ static String dbProp1 = "prop1";
+ static String dbProp2 = "prop2";
+
+ // Derby Database access (overwritten by GlemmServices.properties):
+ private static String fnameDB = "/home/bodmer/KorAP/GlemmServices/Dbs/DerbyTestDB";
+ private static String serverHost = "//localhost:1527/";
+ private static String derbyDBURL = "jdbc:derby:" + serverHost + fnameDB;
+ // Derby Database access (final assignments):
+ final private String derbyDriverEmb = "org.apache.derby.jdbc.EmbeddedDriver";
+ final private String derbyDriverClient = "org.apache.derby.jdbc.ClientDriver";
+
+ final static boolean
+ usePreparedStmt = true; // 1=use PreparedStatement and not Statement.
+
+ // values to be inserted: lemma, wf, prop1, prop2, spec.
+ static String insertQuery = "INSERT into " + dbTable + " VALUES ('%s', '%s', '%s', '%s', '%s')";
+ static String insertPreparedQuery = "INSERT into " + dbTable + " VALUES (?, ?, ?, ?, ?)";
+
+ // Constants of table GlemmLemmata:
+ static final int maxLemmaSize = 64; // max. no. of chars, as declared in table GlemmLemmata.
+ static final int maxWfSize = 64; // max. no. of chars, as declared in table GlemmLemmata.
+
+ /* variables */
+ static int nExceptStmt = 0; // count no. of exceptions for preparedStmt.
+ static int nExceptInsert = 0; // count no. of exceptions for Inserts.
+
+ /*
+ * set_ConstantsFromProperties
+ *
+ * Arguments:
+ * props : Properties loaded by the GlemmServices.
+ * Notes:
+ * - this function loads only those Constants needed in this class.
+ * 15.10.21/FB
+ */
+
+ private static void set_ConstantsFromProperties(Properties props)
+
+ {
+ fnameDB = Utils.getConstantFromProperties(props, "fnameDB", "/home/bodmer/KorAP/GlemmServices/Dbs/DerbyTestDB");
+ serverHost = Utils.getConstantFromProperties(props, "serverHost", "//localhost:1527/");
+ derbyDBURL = "jdbc:derby:" + serverHost + fnameDB;
+
+ } // set_ConstantsFromProperties
+
+ /* checkDerbyConnection:
+ *
+ * check if Derby Network Server is available.
+ * Returns: 0 (=OK) or -1 (on error).
+ * 05.11.21/FB
+ */
+
+ public static int checkDerbyConnection()
+
+ {
+ final String
+ func = "checkDerbyConnection";
+ Connection
+ con = null;
+
+ // open Connection to Derby Database,
+ // set Autocommit = false to accelerate insertions:
+ try {
+ GlemmServices.fout.printf("Debug: %s: connecting to '%s'...\n", func, derbyDBURL);
+ con = DriverManager.getConnection(derbyDBURL);
+ GlemmServices.fout.printf("Debug: %s: connected.\n", func);
+ }
+ catch (SQLException e)
+ {
+ GlemmServices.ferr.printf("Error: %s: connecting failed!\n", func);
+ e.printStackTrace(GlemmServices.ferr);
+ return -1;
+ }
+ finally
+ {
+ try { // closing connection:
+ if( con != null )
+ con.close();
+ }
+ catch (SQLException e) {
+ e.printStackTrace(GlemmServices.ferr);
+ }
+ }
+
+ return 0; // OK
+
+ } // check_DerbyConnection
+
+ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * importGlemmLemata
+ * Parameters:
+ * fn : absolute file name of GLEMM output [ISO8859-1] expected, not UTF-8!
+ * props : Properties vom GlemmServices.properties to set constants needed
+ * in this class.
+ * dbpath : location + name of database where to store lemmata+wfs.
+ * Returns : 0=OK, -1: general error; > 0 : no. of lemmata not stored.
+ *
+ * 10.01.20/FB
+ * 15.10.21/FB dynamically loading constants from GlemmServices.properties.
+ * 04.11.21/FB dbpath.
+ *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+ public static int importGlemmLemmata(String fn, Properties props, final String dbpath)
+
+ {
+ final String
+ func = "importGlemmLemmata";
+ BufferedReader
+ in = null;
+ int
+ nErrors;
+
+ // load GlemmServices properties and log them (when last param = true):
+ set_ConstantsFromProperties(props);
+
+ // when indexing data into the database, select the submitted dbpath,
+ // (not the one stored in the GlemmServices.properties):
+ derbyDBURL = "jdbc:derby:" + serverHost + dbpath;
+
+ // open reader on Glemm Lemmata:
+ try {
+ GlemmServices.fout.printf("Debug: %s: reading from '%s'.\n", func, fn);
+ in = new BufferedReader(new FileReader(fn));
+
+ nErrors = writeDBLemmata(in);
+ }
+ catch (IOException e) {
+ GlemmServices.ferr.printf("Error: %s: while reading from '%s'!\n", func, fn);
+ e.printStackTrace(GlemmServices.ferr);
+ return -1;
+ }
+
+ // close Reader:
+ try {
+ in.close();
+ }
+ catch (IOException e) {
+ nErrors = -1;
+ }
+
+ if( nErrors == -1 )
+ GlemmServices.ferr.printf("Error: %s: connection error to Derby Server!\n", func);
+ else if( nErrors == -2 )
+ GlemmServices.ferr.printf("Error: %s: SQL error when creating SQL statement!\n", func);
+ else if( nErrors == -3 )
+ GlemmServices.ferr.printf("Error: %s: I/O error when reading lemma/wfs!\n", func);
+ else if( nErrors == 0 )
+ GlemmServices.fout.printf("Debug: %s: Import done successfully.\n", func);
+ else
+ GlemmServices.ferr.printf("Debug: %s: Import: %d Errors!\n", func, nErrors);
+
+ return nErrors; // OK
+
+ } // importGlemmLemmata
+
+
+ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ * writeDBLemmata()
+ *
+ * stores list of lemmata + infos into Derby db.
+ * File format: "Lemma Decomposition Wf",
+ * e.g.: "Häuser +- Haus".
+ * Returns: 0: OK or
+ * -1: connection error to Derby Server, or
+ * -2: SQL error, or
+ * -3: I/O error when reading wfs, or
+ * > 0: no. of lemmata not stored.
+ * 09.01.20/FB
+ * 04.11.21/FB turn autocommit off to accelerate huge amount of insertions.
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+ private static int writeDBLemmata(BufferedReader in)
+
+ {
+ final String
+ func = "writeDBLemmata";
+ Connection
+ con = null;
+ PreparedStatement
+ stmtPrep = null;
+ Statement
+ stmt = null;
+ String
+ line;
+ String[]
+ lemmaInfo;
+ int
+ nErrors = 0,
+ nInserted = 0, // no. of lemma/wf pairs imported.
+ ret = 0; // OK.
+ boolean
+ containsSpec;
+
+ // open Connection to Derby Database,
+ // set Autocommit = false to accelerate insertions:
+ try {
+ GlemmServices.fout.printf("Debug: %s: connecting '%s'...\n", func, derbyDBURL);
+ con = DriverManager.getConnection(derbyDBURL);
+ con.setAutoCommit(false);
+ GlemmServices.fout.printf("Debug: %s: connected (autoCommit=off).\n", func);
+
+ if( usePreparedStmt )
+ stmtPrep = con.prepareStatement(insertPreparedQuery);
+ else
+ stmt = con.createStatement();
+
+ while( (line = in.readLine()) != null )
+ {
+ //System.out.printf("Debug: import: '%s'.\n", line);
+ lemmaInfo = line.split("\\s");
+ if( lemmaInfo.length != 3 )
+ {
+ GlemmServices.ferr.printf("Error: %s: '%s' has %d components! Expected = 3! Cannot be stored!\n", func, line, lemmaInfo.length);
+ nErrors++;
+ continue;
+ }
+
+ // Get Info, if wordform contains special chars like '-', interpunction etc.:
+ // lemmaInfo[2] = wordform.
+ containsSpec = LemmaInfo.isWfSpecialCase(lemmaInfo[2]);
+ // store into DB:
+ if( usePreparedStmt )
+ ret = writeDBLemmaInfos_prepared(stmtPrep, lemmaInfo[0], lemmaInfo[2], lemmaInfo[1], containsSpec);
+ else
+ ret = writeDBLemmaInfos(stmt, lemmaInfo[0], lemmaInfo[2], lemmaInfo[1], containsSpec);
+
+ if( ret != 0 )
+ {
+ GlemmServices.ferr.printf("Error: %s: cannot store lemma '%s' into DB! (ret=%d)\n", func, line, ret);
+ nErrors++;
+ }
+ else
+ nInserted++;
+ } // while
+ }
+ catch (IOException e) {
+ GlemmServices.ferr.printf("Error: %s: while reading Glemm lemmata+infos from file! Exit!\n", func);
+ e.printStackTrace(GlemmServices.ferr);
+ return -3;
+ }
+ catch (SQLException e) {
+ if( con == null )
+ GlemmServices.ferr.printf("Error: %s: connection failed!\n", func);
+ else
+ GlemmServices.ferr.printf("Error: %s: cannot create stmt!\n", func);
+ e.printStackTrace(GlemmServices.ferr);
+ return con == null ? -1 : -2;
+ }
+ finally
+ {
+ try {
+ // important: now commit, as autoCommit is deactivated,
+ // this also removes database locks:
+ con.commit();
+ GlemmServices.fout.printf("Debug: %s: commit successfull.\n", func);
+ }
+ catch (SQLException e) {
+ GlemmServices.ferr.printf("Error: %s: commit at the end of the wfs list failed!\n", func);
+ e.printStackTrace(GlemmServices.ferr);
+ }
+
+ try { // closing connection:
+ if( con != null )
+ con.close();
+ if( stmt != null )
+ stmt.close();
+ if( stmtPrep != null )
+ stmtPrep.close();
+ }
+ catch (SQLException e) {
+ e.printStackTrace(GlemmServices.ferr);
+ }
+ }
+
+ GlemmServices.fout.printf("Info : %s: no. of lemmata/wf pairs inserted = %d.\n", func, nInserted);
+
+ return nErrors; // 0=OK, >0 = no. of errors.
+ } // writeDBLemmata
+
+ /* * * * * * * * * * * * * * * * * * * * * * * * *
+ * writeDBLemmaInfos_prepared:
+ * write Lemma + Wf + Decomposition rules into Derby DB.
+ * Notes:
+ * - this is the version with a PreparedQuery.
+ * Parameters:
+ * spec : if true: lemma contains special chars like hyphen etc.
+ * returns 0 = OK.
+ * 17.01.20/FB
+ * 05.1.21/FB reject lemma/wf > 64 chars.
+ * * * * * * * * * * * * * * * * * * * * * * * * */
+
+ private static int writeDBLemmaInfos_prepared(
+ PreparedStatement stmt,
+ String lemma,
+ String wf,
+ String decomposition,
+ boolean spec)
+
+ {
+ final String
+ func = "writeDBLemmaInfos_prepared";
+ final boolean
+ bLog = false;
+ String
+ sqlInsert = null;
+ boolean
+ prop1, prop2;
+ int
+ n;
+
+ // check size of wf and lemma against field size in table GlemmLemmata:
+ if( lemma.length() >= GlemmDB.maxLemmaSize )
+ {
+ GlemmServices.ferr.printf("Error: %s: lemma rejected (> %d): '%s'!\n",
+ func, maxLemmaSize, lemma);
+ return 4;
+ }
+ if( wf.length() >= GlemmDB.maxWfSize )
+ {
+ GlemmServices.ferr.printf("Error: %s: wf rejected (> %d): '%s'!\n",
+ func, maxWfSize, wf);
+ return 5;
+ }
+
+ prop1 = decomposition.charAt(0) == '+' ? true : false;
+ prop2 = decomposition.charAt(1) == '+' ? true : false;
+
+ // for loging only:
+ sqlInsert = String.format(insertQuery, lemma, wf, prop1, prop2, spec);
+ if( bLog )
+ GlemmServices.fout.printf("Debug: %s: sqlInsert='%s'.\n", func, sqlInsert);
+
+ // now insert new values into PreparedStatement:
+ try {
+ stmt.setString( 1, lemma);
+ stmt.setString( 2, wf);
+ stmt.setBoolean(3, prop1);
+ stmt.setBoolean(4, prop2);
+ stmt.setBoolean(5, spec);
+ //System.out.printf("Debug: %s: prepared stmt = '%s'.\n", func, stmt.toString());
+ }
+ catch (SQLException e1) {
+ GlemmServices.ferr.printf( "Error: %s: while filling prepared statement with lemma='%s' wf='%s' prop1='%b' prop2='%b' spec='%b'!\n",
+ func, lemma, wf, prop1, prop2, spec);
+ if( ++nExceptStmt <= 1 )
+ e1.printStackTrace(GlemmServices.ferr);
+ return 1;
+ }
+
+ try {
+ // execute the filled prepared statement:
+ n = stmt.executeUpdate();
+ if( n != 1 )
+ {
+ GlemmServices.ferr.printf("Error: %s: while inserting lemma='%s' wf='%s' prop1=%b prop2=%b spec=%b! (n=%d)\n",
+ func, lemma, wf, prop1, prop2, spec, n);
+ return 2;
+ }
+ }
+ catch (SQLException e) {
+ GlemmServices.ferr.printf("Error: %s: while inserting lemma='%s' wf='%s' prop1=%b prop2=%b spec=%b!\n",
+ func, lemma, wf, prop1, prop2, spec);
+ if( ++nExceptInsert <= 1 )
+ e.printStackTrace(GlemmServices.ferr);
+ return 3;
+ };
+
+ return 0; // OK
+ } // writeDBLemmaInfos_prepared
+
+ /* * * * * * * * * * * * * * * * * * * * * * * * *
+ * writeDBLemmaInfos:
+ * write Lemma + Wf + Decomposition into Derby DB.
+ * Parameters:
+ * containsSpec : if true: lemma contains special chars like hyphen etc.
+ * returns 0 = OK.
+ * 10.01.20/FB
+ * * * * * * * * * * * * * * * * * * * * * * * * */
+
+ private static int writeDBLemmaInfos(Statement stmt, String lemma, String wf, String decomposition, boolean spec)
+
+ {
+ final String
+ func = "writeDBLemmaInfos";
+ String
+ sqlInsert = null;
+ Boolean
+ prop1, prop2;
+ int
+ n;
+
+ prop1 = decomposition.charAt(0) == '+' ? true : false;
+ prop2 = decomposition.charAt(1) == '+' ? true : false;
+ sqlInsert = String.format(insertQuery, lemma, wf, prop1, prop2, spec);
+
+ GlemmServices.fout.printf("Debug: %s: sqlInsert='%s'.\n", func, sqlInsert);
+
+ try {
+ n = stmt.executeUpdate(sqlInsert);
+ if( n != 1 )
+ {
+ GlemmServices.ferr.printf("Error: %s: while inserting lemma='%s' wf='%s' prop1=%b prop2=%b spec=%b!\n",
+ func, lemma, wf, prop1, prop2, spec);
+ return 1;
+ }
+ }
+ catch (SQLException e) {
+ GlemmServices.ferr.printf("Error: %s: while inserting lemma='%s' wf='%s' prop1=%b prop2=%b spec=%b!\n",
+ func, lemma, wf, prop1, prop2, spec);
+ e.printStackTrace(GlemmServices.ferr);
+ return 1;
+ };
+
+ return 0; // OK
+ } // writeDBLemmaInfos
+
+ /* * * * * * * * * * * * * * * * * * * * * * * * * *
+ * readDBLemmaData
+ *
+ * Reads wordforms for one lemma from the GlemmDB.
+ * Input: lemma + GLEMM_OPTS_xxx (lemmatization rules).
+ *
+ * 14.01.20/FB
+ * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+ static int readDBLemmaData(LemmaInfo lemmaInfo, Properties props)
+
+ {
+ final String
+ func = "readDBLemmaData";
+
+ // load GlemmServices properties and log them (when last param = true):
+ set_ConstantsFromProperties(props);
+
+ try {
+ GlemmServices.fout.printf("Debug: %s: connecting '%s'...\n", func, derbyDBURL);
+ lemmaInfo.con = DriverManager.getConnection(derbyDBURL);
+ GlemmServices.fout.printf("Debug: %s: connected.\n", func);
+
+ lemmaInfo.stmt = lemmaInfo.con.createStatement();
+
+ int r = readDBLemma(lemmaInfo.stmt, lemmaInfo);
+
+ if( r != 0 )
+ {
+ GlemmServices.ferr.printf("Error: %s: cannot get data for lemma='%s' options='%s'!\n",
+ func, lemmaInfo.lemma, lemmaInfo.optionstoString());
+ return 1;
+ }
+ }
+ catch (SQLException e) {
+ if( lemmaInfo.con == null )
+ GlemmServices.ferr.printf("Error: %s: connection failed!\n", func);
+ else
+ GlemmServices.ferr.printf("Error: %s: cannot create stmt!\n", func);
+ e.printStackTrace(GlemmServices.ferr);
+ return 1;
+ }
+
+ return 0; // OK
+
+ } // readDBLemmaData
+
+ /* * * * * * * * * * * * * * * * * * * * * * * * * *
+ * readDBLemma
+ *
+ * Reads wordforms for one lemma by accessing the GlemmDB.
+ * Input : lemma + prop1+2 (lemmatization rules).
+ * Output:
+ * - contains a ResultSet for fetching the wordforms.
+ * This ResultSet will have be be closed with LemmaInfo.close().
+ * The no. of found wordforms is not known till we
+ * fetch all of them from ResultSet!
+ * - due to SELECT DISTINCT only uniq wf are returned.
+ *
+ * 14.01.20/FB
+ * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+ static int readDBLemma(Statement stmt, LemmaInfo lemmaInfo)
+
+ {
+ final String
+ func = "readDBLemma";
+ String
+ sql;
+
+ sql = String.format("SELECT DISTINCT wf from %s WHERE lemma='%s' %s %s %s",
+ dbTable,
+ lemmaInfo.lemma,
+ lemmaInfo.prop1 == 0 ? "AND prop1='false'" :
+ lemmaInfo.prop1 == 1 ? "AND prop1='true'" : "",
+ lemmaInfo.prop2 == 0 ? "AND prop2='false'" :
+ lemmaInfo.prop2 == 1 ? "AND prop2='true'" : "",
+ lemmaInfo.spec == 1 ? "AND spec='true'" :
+ lemmaInfo.spec == 0 ? "AND spec='false'" : "");
+
+ //System.out.printf("Debug: %s: prop1=%d prop2=%d spec=%d.\n", func, lemmaInfo.prop1, lemmaInfo.prop2, lemmaInfo.spec);
+ GlemmServices.fout.printf("Debug: %s: sql = '%s'.\n", func, sql);
+
+ try {
+ lemmaInfo.res = stmt.executeQuery(sql);
+ }
+ catch (SQLException e) {
+ GlemmServices.ferr.printf("Error: %s: when executing sql='%s'!\n", func, sql);
+ e.printStackTrace(GlemmServices.ferr);
+ return 1;
+ }
+ finally {
+ // all sql stuff are closed outside this function, so don't close them here!
+ }
+
+ return 0; // OK
+
+ } // readDBLemma
+
+ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * readDbInfos
+ * Returns General Infos about the data stored in the Glemm Database.
+ * 10.01.20/FB
+ * 15.10.21/FB read GlemmServices.properties.
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+ public static GlemmDBInfo readDbInfos(Properties props)
+
+ {
+ final String
+ func = "readDbInfos";
+ Connection
+ con = null;
+ Statement
+ stmt = null;
+ GlemmDBInfo
+ dbInfo = null;
+
+ // load GlemmServices properties and log them (when last param = true):
+ set_ConstantsFromProperties(props);
+
+ // open Connection to Derby Database:
+ try {
+ GlemmServices.fout.printf("Debug: %s: connecting '%s'...\n", func, derbyDBURL);
+ con = DriverManager.getConnection(derbyDBURL);
+ GlemmServices.fout.printf("Debug: %s: connected.\n", func);
+
+ stmt = con.createStatement();
+
+ dbInfo = readDbLemmataInfos(stmt);
+
+ if( dbInfo == null )
+ {
+ GlemmServices.ferr.printf("Error: %s: cannot read General Infos from Glemm Database!\n", func);
+ }
+ }
+ catch (SQLException e) {
+ if( con == null )
+ GlemmServices.ferr.printf("Error: %s: connection failed!\n", func);
+ else
+ GlemmServices.ferr.printf("Error: %s: cannot create stmt!\n", func);
+ e.printStackTrace(GlemmServices.ferr);
+ return null;
+ }
+ finally {
+ // closing connection:
+ try {
+ if( con != null ) con.close();
+ if( stmt != null ) stmt.close();
+ }
+ catch (SQLException e) {
+ e.printStackTrace(GlemmServices.ferr);
+ }
+ }
+
+ return dbInfo; // may be null.
+
+ } // readDbInfos
+
+ /* * * * * * * * * * * * * * * * * *
+ * readDbLemmataInfos:
+ * reads and returns general infos about the content
+ * of the Glemm Database.
+ * Returns 0=OK or error code > 0.
+ * 10.01.20/FB
+ */
+
+ private static GlemmDBInfo readDbLemmataInfos(Statement stmt)
+
+ {
+ final String
+ func = "readDbLemmaInfos";
+ String[]
+ sql = { "SELECT COUNT(distinct lemma) from " + dbTable , // counts no. of distinct lemmata.
+ "SELECT COUNT(*) from " + dbTable, // counts total no. of stored wordforms.
+ "SELECT COUNT(*) from " + dbTable + " WHERE prop1=false and prop2=false",
+ "SELECT COUNT(*) from " + dbTable + " WHERE prop1=true and prop2=false",
+ "SELECT COUNT(*) from " + dbTable + " WHERE prop1=false and prop2=true",
+ "SELECT COUNT(*) from " + dbTable + " WHERE prop1=true and prop2=true"};
+ ResultSet
+ res = null;
+ int[]
+ nCounts = new int[sql.length];
+ int
+ i = 0,
+ nDistLemmata = -1, // no. of distinct lemmata in DB.
+ nWfs = -1; // no. of wordforms in DB.
+ GlemmDBInfo
+ dbInfo = null;
+
+ GlemmServices.fout.printf("Debug: %s: extracting agregated infos about Database contents...\n", func);
+
+ dbInfo = new GlemmDBInfo();
+
+ // extract DB infos and return them:
+ try {
+ for(i=0; i<sql.length; i++)
+ {
+ res = stmt.executeQuery(sql[i]);
+ if( res.next() )
+ dbInfo.counts[i] = res.getInt(1); // count is in column 1.
+ else
+ {
+ GlemmServices.ferr.printf("Error: %s: cannot extract count from '%s'!\n", func, sql[i]);
+ dbInfo.counts[i] = -1;
+ }
+ }
+
+ }
+ catch (SQLException sqlExcept)
+ {
+ GlemmServices.ferr.printf("Error: %s: sql='%s' failed!\n", func, sql[i]);
+ sqlExcept.printStackTrace(GlemmServices.ferr);
+ return null;
+ }
+ finally {
+ try {
+ res.close();
+ }
+ catch (SQLException e) {
+ e.printStackTrace(GlemmServices.ferr);
+ }
+ }
+
+ return dbInfo; // OK
+ } // readDbLemmataInfos
+
+ /******************** Test functions ****************************************/
+
+ /* * * * * * * * * * *
+ * readDbInfos_test
+ * This is debug code written to test if we can access the Derby
+ * Database from an external Java Programm.
+ * 08.01.20/FB
+ * * * * * * * * * * */
+
+ public static int readDbInfos_test()
+
+ {
+ final String
+ func = "readDbInfos";
+ Connection
+ con = null;
+ Statement
+ stmt = null;
+ String
+ insertStmt = "";
+
+ // Connect to Derby database:
+ GlemmServices.fout.printf("Debug: %s: connecting '%s'...\n", func, derbyDBURL);
+
+ // open Connection to Derby Database:
+ try {
+ con = DriverManager.getConnection(derbyDBURL);
+ GlemmServices.fout.printf("Debug: %s: connected.\n", func);
+
+ // extract DB infos and return them:
+ try
+ {
+ stmt = con.createStatement();
+ String
+ sql = "SELECT * FROM " + dbTable;
+ ResultSet
+ result = stmt.executeQuery(sql);
+
+ GlemmServices.fout.printf("Debug: %s: table '%s':\n", func, dbTable);
+
+ while (result.next()) {
+ GlemmServices.fout.printf("'%s' : '%s' : '%s' : '%s' .\n", result.getString(1),
+ result.getString(2),
+ result.getString(3),
+ result.getString(4));
+ insertStmt = insertStmt + "<p>" + result.getString(1) + " : " + result.getString(2) + " : " +
+ result.getString(3) + " : " + result.getString(4) + "</p>";
+ }
+ }
+ catch (SQLException sqlExcept)
+ {
+ GlemmServices.ferr.printf("Error: %s: insertStmt='%s' failed!.\n", func, stmt);
+ sqlExcept.printStackTrace(GlemmServices.ferr);
+ con.close();
+ return 1;
+ }
+ }
+ catch (SQLException e)
+ {
+ e.printStackTrace();
+ GlemmServices.ferr.printf("Error: %s: connecting '%s failed!\n", func, derbyDBURL);
+ return 1;
+ }
+
+ // closing connection:
+ try {
+ con.close();
+ }
+ catch (SQLException e) {
+ e.printStackTrace(GlemmServices.ferr);
+ }
+
+ return 0; // OK
+
+ } // readDbInfos_test
+
+ /* * * * * * * * * * *
+ * importGlemmLemata_test
+ * Test Version.
+ * 12.12.19/FB
+ * * * * * * * * * * */
+
+ public static int importGlemmLemmata_test(String fn)
+
+ {
+ final String
+ func = "importGlemmLemmata_test";
+ InputStream
+ in;
+ BufferedReader
+ in2 = null;
+ String
+ line;
+ String[]
+ lemmaInfo;
+ int
+ r;
+
+ /*
+ * Read Lemmata + Info from file:
+ */
+ GlemmServices.fout.printf("Debug: %s: reading from '%s'.\n", func, fn);
+
+ try {
+ //in = new BufferedInputStream( new FileInputStream(fn) );
+ in2 = new BufferedReader( new FileReader(fn));
+ while( (line = in2.readLine()) != null )
+ {
+ GlemmServices.fout.printf("Debug: import: '%s'.\n", line);
+ lemmaInfo = line.split("\\s");
+ if( lemmaInfo.length == 3 )
+ GlemmServices.fout.printf("Debug: Import: lem='%s' Decomp='%s' Wf='%s'.\n", lemmaInfo[0], lemmaInfo[1], lemmaInfo[2]);
+ else
+ GlemmServices.ferr.printf("Error: importGlemmLamata: '%s' has %d components!\n", line, lemmaInfo.length);
+ }
+ }
+ catch (FileNotFoundException e)
+ {
+ e.printStackTrace(GlemmServices.ferr);
+ GlemmServices.ferr.printf("Error: %s: cannot open '%s'!\n", func, fn);
+ return 1;
+ }
+ catch (IOException e) {
+ e.printStackTrace(GlemmServices.ferr);
+ return 1;
+ }
+
+ try {
+ in2.close();
+ }
+ catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace(GlemmServices.ferr);
+ }
+
+ /* Check connection to Derby Database:
+ */
+ r = readDbInfos_test();
+ if( r != 0 )
+ return r;
+
+ return 0; // OK
+
+ } // importGlemmLemmata_test
+
+}
\ No newline at end of file
diff --git a/GlemmServices/src/de/korap/services/GlemmDBInfo.java b/GlemmServices/src/de/korap/services/GlemmDBInfo.java
new file mode 100644
index 0000000..14fc7a9
--- /dev/null
+++ b/GlemmServices/src/de/korap/services/GlemmDBInfo.java
@@ -0,0 +1,43 @@
+package de.korap.services;
+
+public class GlemmDBInfo
+
+{
+ public int
+ nDistLemmata, // no. of distinct lemmata in DB.
+ nWfs, // no. of wordforms in DB.
+ nFlexions, // no. of flexions.
+ nCompounds, // no. of compounds.
+ nOthers, // no. of "sonstige Wortbildungsformen".
+ nSpec; // no. of "Spezialfälle".
+ public int[]
+ counts = {-1,-1,-1,-1,-1,-1};
+ final public String[]
+ countLabels = { "eindeutige Lemmata",
+ "Wortformen",
+ "Flexionsformen",
+ "Komposita",
+ "sonstige Wortbildungsformen",
+ "Komposita mit sonst. Wortbildungsformen"};
+
+ /* GlemmDBInfo constructor
+ * 13.01.20/FB
+ */
+
+ GlemmDBInfo(int nDistLemmata, int nWfs)
+
+ {
+ this.nDistLemmata = nDistLemmata;
+ this.nWfs = nWfs;
+ }
+
+ /* GlemmDBInfo constructor
+ * 13.01.20/FB
+ */
+
+ GlemmDBInfo()
+
+ {
+ }
+
+}
\ No newline at end of file
diff --git a/GlemmServices/src/de/korap/services/GlemmServices.java b/GlemmServices/src/de/korap/services/GlemmServices.java
new file mode 100644
index 0000000..557225b
--- /dev/null
+++ b/GlemmServices/src/de/korap/services/GlemmServices.java
@@ -0,0 +1,990 @@
+package de.korap.services;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
+import java.io.PrintStream;
+import java.nio.file.FileSystem;
+import java.nio.file.FileSystems;
+import java.nio.file.Files;
+import java.time.LocalDateTime;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import javax.servlet.ServletContext;
+import javax.servlet.http.HttpServletRequest;
+import javax.ws.rs.Consumes;
+import javax.ws.rs.DefaultValue;
+import javax.ws.rs.GET;
+import javax.ws.rs.POST;
+import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
+import javax.ws.rs.Produces;
+import javax.ws.rs.QueryParam;
+import javax.ws.rs.core.Context;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+
+import de.korap.services.utils.Utils;
+
+/* 0.1 : 17.06.20/FB
+ * 0.2 : 25.10.21/FB
+ * loging of the API Requests.
+ * loading constants from GlemmServices.properties.
+ * correcting calling of lemma + prop1/prop2/spec.
+ * handling of lemmata with 'ß'.
+ *
+ * General comments to the encoding of derivations from lemma to wfs and how they are encoded
+ * in the Derby database:
+ *
+ * GLEMM encoding:
+ * _ _ : +Flex Öle -> Öl
+ * + _ : +Comp Öllampe -> Öl or Lampe (compound).
+ * _ + : +other Antiturnschuh -> Anti- + Turnschuh (no compound for 'Turnschuh', only prefix).
+ * + + : +Comp+other Antiturnschuh -> Anti- + Turn + Schuh (compound for 'Schuh' + Prefix).
+ * C2 lemmatization options:
+ * Flex (identical to Glemm)
+ * Comp (identical to Glemm, includes all Flexions)
+ * Other (identical to Glemm, includes all Flexions).
+ * Spec (only C2: includes all wfs with hiphen and/or some non-alphabetic ISO8859-1 chars)
+ * E.g. Öl-Exporte, "Marken"-Öle.
+ * Derby Encoding:
+ * prop1 prop2 spec meaning in C2:
+ * 0 0 - Flex
+ * 1 0 - Comp
+ * 0 1 - Other (prefix)
+ * 1 1 - Comp + Other
+ * - - 0 -Spec
+ * - - 1 +Spec
+ *
+ * Examples:
+ * &Schuh : Schuh, Schuhe, ... only flexions
+ * &Schuh+comp : Schuhe, Turnschuhe... flexions and compounds.
+ * &Schuh+other : Schuhe, Antischuhe... flexions and prefixed wfs (no compounds).
+ * &Schuh+comp+other: Schuhe, Antischuhe, Turnschuhe, Antiturnschuhe... flexions and compounds and
+ * prefixed single wfs (Antischuhe) and prefixed compounds.
+ * &Schuh+spec : "Anti"-Schuhe, 68ger-Schuhe... may be combined with every options: adds
+ * wfs with hyphen and/or non-alphabetic chars.
+ *
+ * Conversion rules:
+ *
+ * &schließen -> internal search for 'schliessen',
+ * because Glemm outputs 'ss' for 'ß'.
+ *
+ * Character Encoding:
+ *
+ * - When importing wfs to the Derby Database with doCurlGlemmServices/indexLemmata:
+ *
+ * INPUT: wordform list in [UTF-8] : UTF-8 is then converted to Lat1 for Glemm,
+ * the output of Glemm [Lat1] is returned to the GlemmServices without reconversion
+ * to UTF8 and imported into Derby Database unchanged.
+ *
+ */
+
+@Path("/index")
+
+public class GlemmServices {
+
+ final private String versionDate = "05.11.21";
+ final private String version = "0.2";
+
+ final private String fnameProps = "/WEB-INF/GlemmServices.properties";
+
+ // the following constants are read from GlemmServices.properties (these are the default values):
+ private String GS_WorkingPath = "/home/bodmer/KorAP/GlemmServices/Tests";
+ private String fnameWfs = GS_WorkingPath + "/gl_wfs_in.txt";
+ private String fnameOut = GS_WorkingPath + "/GlemmServices.log";
+ private String fnameErr = GS_WorkingPath + "/GlemmServices.err";
+ private String fnameScript = GS_WorkingPath + "/doGlemm";
+ private String fnameLemata = GS_WorkingPath + "/tmp/lem3.utf8";
+
+ @Context
+ private HttpServletRequest httpServletRequest;
+ @Context
+ private ServletContext servletContext;
+
+ final private int ACC_FORMAT_TEXT = 1;
+ final private int ACC_FORMAT_JSON = 2;
+ final private int ACC_FORMAT_HTML = 3;
+
+ public static PrintStream fout = null;
+ public static PrintStream ferr = null;
+
+ /*
+ *
+ * - after loadProperties, sets the class variable with loaded property values.
+ *
+ * 15.10.21/FB
+ */
+
+ private void set_ConstantsFromProperties(Properties props)
+
+ {
+ GS_WorkingPath = Utils.getConstantFromProperties(props, "GS_WorkingPath", "/home/bodmer/KorAP/GlemmServices/Tests");
+ fnameWfs = Utils.getConstantFromProperties(props, "fnameWfs", "gl_wfs_in.txt");
+ fnameWfs = GS_WorkingPath + "/" + fnameWfs;
+ fnameOut = Utils.getConstantFromProperties(props, "fnameOut", "GlemmServices.log");
+ fnameOut = GS_WorkingPath + "/" + fnameOut;
+ fnameErr = Utils.getConstantFromProperties(props, "fnameErr", "GlemmServices.err");
+ fnameErr = GS_WorkingPath + "/" + fnameErr;
+ fnameScript = Utils.getConstantFromProperties(props, "fnameScript", "doGlemm");
+ fnameScript = GS_WorkingPath + "/" + fnameScript;
+ fnameLemata = Utils.getConstantFromProperties(props, "fnameLemata", "tmp/lem3");
+ fnameLemata = GS_WorkingPath + "/" + fnameLemata;
+ } // set_ConstantsFromProperties
+
+ /*
+ * log_Request:
+ *
+ * - write info about API Request.
+ * Parameters:
+ * requestPath : path of webapp addressed by the request.
+ * func : name of the function which implements the request.
+ *
+ * 19.10.21/FB
+ */
+
+ private void log_Request(String requestPath, String func)
+
+ {
+ // transform "dateTtime" -> "date time":
+
+ fout.printf("###\n");
+ fout.printf("### %s: %s : vers='%s', vdate='%s' request='%s'.\n", func, requestPath, version, versionDate,
+ LocalDateTime.now().toString().replace("T", " "));
+ fout.printf("###\n");
+ } // log_Request
+
+ /* formatDbName
+ *
+ * - gets name of Database used by Derby from props
+ * and format it to HTML.
+ *
+ * 19.10.21/FB
+ */
+
+ private String formatDbName(Properties props)
+
+ {
+ String
+ dbName;
+ String
+ dbPath = Utils.getConstantFromProperties(props, "fnameDB", "/home/bodmer/KorAP/GlemmServices/Dbs/DerbyTestDB");
+ int
+ j = dbPath.lastIndexOf('/');
+
+ if( j > 0 )
+ dbName = dbPath.substring(j+1);
+ else
+ dbName = dbPath;
+
+ return dbName; // "<p><b>Database: " + dbName + "</b></p>\n";
+ } // formatDbName
+
+ /*
+ * closeLogStreams:
+ *
+ * - close the fout/ferr log streams.
+ *
+ * 26.10.21/FB
+ */
+
+ private void closeLogStreams()
+
+ {
+ if( fout != null)
+ fout.close();
+ fout = null;
+
+ if( ferr != null )
+ ferr.close();
+ ferr = null;
+ } // closeLogStreams
+
+ /* init_and_log_Request
+ *
+ * - load GlemmServices.properties, sets GlemmServices constants
+ * and logs the request.
+ * - load properties and set constants before logging anything, because
+ * logging will be done into the last stdout/stderr opened in glassfish's JVM,
+ * e.g. in the log of another Service.
+ * 19.10.21/FB
+ */
+
+ private Properties init_and_log_Request(String func, String requestPath)
+
+ {
+ String
+ realPath = servletContext.getRealPath(fnameProps);
+
+ // load GlemmServices properties and log them (if last param = true):
+ Properties
+ props = Utils.loadProperties(servletContext.getResourceAsStream(fnameProps), fnameProps, realPath, false);
+
+ set_ConstantsFromProperties(props);
+
+ // open fout and ferr as replacement for stdout/stderr for use in GlemmServices.
+ // do not use or redirect System.out and System.err, as they will be redirected
+ // by other Services in the same glassfish JVM, e.g. by the WformServices.
+ fout = Utils.setLogStream(fnameOut);
+ ferr = Utils.setLogStream(fnameErr);
+
+ //Utils.setStdFileStreams(fnameErr, fnameOut);
+
+ log_Request(requestPath, func);
+
+ return props;
+ } // init_and_log_Request
+
+ /* * * * * * * * * * * * * * * * * *
+ *
+ * doGET_status() on /index/status.
+ *
+ * GET status of Glemm Services
+ *
+ * 17.06.20/FB
+ *
+ * * * * * * * * * * * * * * * * * *
+ */
+
+ @Path("status")
+ @GET
+ @Produces("text/html")
+
+ public String doGET_status()
+
+ {
+ final String func = "doGET_status";
+ String
+ status;
+ Properties
+ props;
+ boolean
+ bDerbyServerConnected = false;
+
+ props = init_and_log_Request(func, "/index/status/");
+
+ bDerbyServerConnected = GlemmDB.checkDerbyConnection() == 0 ? true : false;
+
+ // httpServletRequest.setCharacterEncoding(env);
+
+ status = String.format( "<html>\n" +
+ " <h3>Glemm Services</h3>\n" +
+ " <p><b>Version</b> : %s</p>" +
+ " <p><b>Version date</b> : %s</p>" +
+ " <p><b>Services status</b> : ready.</p>" +
+ " <p><b>Derby Server status</b> : %s.</p>" +
+ " <p><b>Request time</b>: %s</p>" +
+ " <p><b>properties</b>: %s </p>" +
+ " <p><b>database</b>: %s </p>" +
+ "</html>",
+ version,
+ versionDate,
+ bDerbyServerConnected ? "connected" : "not available",
+ LocalDateTime.now().toString().replace("T", " "),
+ servletContext.getResourceAsStream(fnameProps) == null ? "not found" :
+ props.isEmpty() ? "empty" : "loaded",
+ formatDbName(props)
+ );
+
+ fout.printf("Debug: %s: done.\n", func);
+ closeLogStreams();
+ return status;
+
+ } // doGET_status
+
+ /* ***********************************************************
+ *
+ * doPOST_importLemmaData:
+ *
+ * POST: /index/add/{databasename}: text/plain
+ *
+ * Parameters:
+ * list : the list of wfs sent in the body of the POST request.
+ * dbpathEncoded: the location of the database where the lemma+wfs are to be stored.
+ * encoded as ".home.user.KorAP....", must be converted to
+ * "/home/user/KorAP/..."
+ * IMPORTANT: do not use a '.' for encoding anything else than a '/'.
+ * Notes :
+ * - adds a list of new wordforms to the GlemmService's Database located at 'databasename'.
+ * - The wfs are lemmatized and the morphological data stored into a Derby database.
+ * Input : wordform data expected in [UTF-8].
+ * 25.11.19/FB
+ * 04.11.21/FB
+ * - submitted database path.
+ */
+
+ @Path("add/{databasepath}")
+ @POST
+ @Consumes("text/plain")
+ @Produces("text/html")
+
+ public String doPOST_importLemmaData
+ (
+ String list,
+ @PathParam(value="databasepath") final String dbpathEncoded
+ )
+
+ {
+ final String
+ func = "doPOST_importLemmaData";
+ String[]
+ arList = list.split("\n");
+ String
+ dbpath;
+ FileSystem
+ fs = FileSystems.getDefault();
+ java.nio.file.Path
+ p = null;
+ OutputStream
+ os;
+ boolean
+ bDone = false;
+ String
+ msg;
+ int
+ nErrors = 0;
+ Properties
+ props;
+
+ props = init_and_log_Request(func, "/index/add/{dbpath}");
+
+ dbpath = dbpathEncoded.replace('.', '/');
+
+ fout.printf("Info : %s: selected database path = '%s'.\n", func, dbpath);
+ fout.printf("Info : %s: working directory = '%s'.\n", func, GS_WorkingPath);
+
+ // set file and paths after GS_WorkingPath has been initialised:
+ p = fs.getPath(fnameWfs);
+
+ // Write submitted list of wordforms to file:
+ try
+ {
+ os = Files.newOutputStream(p);
+ os.write(list.getBytes(StandardCharsets.ISO_8859_1));
+ os.close();
+ bDone = true;
+ fout.printf("Debug: %s: file '%s' beschrieben.\n", func, fnameWfs);
+ }
+ catch(IOException e)
+ {
+ e.printStackTrace(ferr);
+ }
+
+ if( bDone )
+ msg = "<p>List written to file: " + p.toString() + "</p>";
+ else
+ msg = "<p><b>File Error: </b> cannot write to " + p.toString() + "!</p>";
+
+ /*
+ * execute Glemm shell script:
+ */
+ String
+ msgGlemmScript, msgImport;
+ int
+ r = Utils.executeShellScript(fnameScript, fnameWfs, fout, ferr);
+
+ if( r == 0 )
+ msgGlemmScript = "<p><b>Glemm Script:</b> Ausführung OK.</p>";
+ else
+ msgGlemmScript = "<p><b>Glemm Script:</b> Fehler aufgetreten!";
+
+ // Build a reponse and return it:
+ if( r != 0 )
+ return
+ "<html>" +
+ "<h1>GlemmServices: POST on /index/add: add List of wordforms.</h1>" +
+ "<p>Version vom " + versionDate + "</p>" +
+ "<p>Consumes = text/plain</p>" +
+ "<p>List length =" + arList.length +
+ msg + msgGlemmScript +
+ "</html>";
+
+
+ // Read lemmatized data and store them into database:
+ nErrors = GlemmDB.importGlemmLemmata(fnameLemata, props, dbpath);
+
+ fout.printf("Debug: %s: importGlemmLemmata returns r=%d.\n", func, r);
+
+ if( nErrors == 0 )
+ msgImport = "<p><b>Import:</b> " + fnameLemata + ": OK.</p>";
+ else
+ msgImport = "<p><b>Import:</b> " + fnameLemata + ": " + nErrors + " errors!</p>";
+
+ fout.printf("Debug: %s: ending time: %s.\n", func, LocalDateTime.now().toString().replace("T", " "));
+
+ closeLogStreams();
+
+ // Return Infos:
+ return
+ "<html>" +
+ "<h1>GlemmServices: POST on /index/add: add List of wordforms.</h1>" +
+ "<p>Version vom " + versionDate + "</p>" +
+ "<p>Consumes = text/plain</p>" +
+ "<p>List length =" + arList.length +
+ msg + msgGlemmScript + msgImport +
+ "</html>";
+
+ } // doPOST_importLemmaData
+
+ /* doGET_viewDbInfo:
+ *
+ * GET /index/info/: text/plain > text/html.
+ * Returns general infos about the current content of the
+ * Glemm Lemma Database.
+ * 10.01.20/FB
+ */
+ @GET @Path("info") @Produces("text/html")
+
+ public String doGET_viewDbInfo()
+
+ {
+ final String
+ func = "doGET_viewDbInfo";
+ int
+ i, r;
+ String
+ returnContent;
+ final String
+ returnHeader = "<html><head>\n" +
+ " <meta charset=\"utf-8\">\n" +
+ "</head>\n" +
+ "<h1>GlemmServices: GET on /index/info</h1>\n" +
+ "<h4>(vers = " + versionDate + ")</h4>\n" +
+ "<h2>Current state of the Database</h2>\n";
+ final String
+ returnTrailer = "</html>",
+ alri = "align='right'";
+ StringBuilder
+ table = new StringBuilder();
+ String
+ msg = null;
+ GlemmDBInfo
+ dbInfo = null;
+ Properties
+ props;
+
+ props = init_and_log_Request(func, "/index/info/");
+
+ dbInfo = GlemmDB.readDbInfos(props);
+ if( dbInfo == null )
+ table.append("<p><b>Fehler:</b> Inhalt der Datebank kann nicht gelesen werden!</p>");
+ else
+ {
+ table.append("<table border=\"1\" cellpadding=\"2\" cellspacing=\"2\">\n");
+
+ for(i=0; i<dbInfo.counts.length; i++)
+ {
+ msg = String.format(Locale.GERMAN, " <tr><td><b>Anzahl %s</b></td><td %s>%,d</td></tr>\n", dbInfo.countLabels[i], alri, dbInfo.counts[i]);
+ table.append(msg);
+ }
+ table.append("</table>");
+ }
+
+ closeLogStreams();
+
+ final String
+ dbName = String.format("<p><b>database</b>: %s</p>", formatDbName(props));
+
+ return returnHeader + dbName + table + returnTrailer;
+ } // doGET_viewDBInfo
+
+
+ /* doGET_viewLemmaInfoHTML:
+ *
+ * GET /index/info/lemma/{lemma}?opts=o1+o2+o3...
+ * where: oi... = (flex)(+comp)(+other)(+spec) in any order.
+ * As opts is optional, flex is default.
+ *
+ * E.g. /index/info/lemma/strahlen?opts=flex+other
+ *
+ * Returns: text/html.
+ * Returns wordforms for a submitted lemma.
+ *
+ * 13.01.20/FB
+ */
+
+ @GET
+ @Path("info/lemma/{lemma}")
+ @Produces("text/html")
+
+ public String doGET_viewLemmaInfoHTML(
+ @PathParam(value="lemma") final String lemma,
+ @QueryParam("opts") @DefaultValue("undef") final String options,
+ @QueryParam("prop1") @DefaultValue("3") final int prop1,
+ @QueryParam("prop2") @DefaultValue("3") final int prop2,
+ @QueryParam("spec") @DefaultValue("3") final int spec )
+
+ {
+ final String
+ func = "doGET_viewLemmaInfoHTML";
+ int
+ i, r;
+ final String
+ returnHeader = "<html><head>\n" +
+ " <meta charset=\"utf-8\">\n" +
+ "</head>\n" +
+ "<h1>GlemmServices: GET on /index/info/lemma</h1>\n" +
+ "<h4>version " + version + " vom " + versionDate + " request = '" + LocalDateTime.now().toString().replace("T", " ") + "'</i></h4>\n" +
+ "<p><b>Query Syntax 1:</b> (C2 Options Setting)</p>" +
+ "<p><code>{lemma}</code> <b>or</b> <code>{lemma}?opts=(flex)(+comp)(+other)(+spec)</code></p>\n" +
+ "<p><b>Example:</b> <code>Öl</code> <b>or</b> <code>Öl?opts=flex+comp+other+spec</code></p>" +
+ "<p><b>Query Syntax 2:</b> (internal Options Setting)</p>" +
+ "<p><code>{lemma}?prop1=[012]&prop2=[012]&spec=[012]</code> (where each prop1, prop2, spec is optional).</p>\n" +
+ "<p><b>Example:</b> <code>Öl?prop1=0&prop2=1&spec=2</code></p>";
+ final String
+ returnTrailer = "</html>";
+ final String
+ backgroundcolor = "style=\"background-color: #E0E0E0\"";
+ StringBuilder
+ table = new StringBuilder("<table border=\"1\" cellpadding=\"5\" cellspacing=\"0\">\n");
+ String
+ msg = null;
+ int
+ nErrors = 0,
+ acceptedFormat = 0;
+ LemmaInfo
+ lemmaInfo = new LemmaInfo(lemma);
+ Properties
+ props;
+
+ props = init_and_log_Request(func, "/index/info/lemma/{lemma}");
+
+ fout.printf("Debug: %s: lemma='%s' options='%s' prop1='%d' prop2='%d' spec='%d'.\n", func, lemma, options, prop1, prop2, spec);
+
+ /*
+ for(i=0; i<lemma.length(); i++)
+ {
+ fout.printf("Debug: %s: char at %d = '%c' = [%d].\n", func, i, lemma.charAt(i), lemma.codePointAt(i));
+ }
+ */
+
+ nErrors = lemmaInfo.parseAllOptions(options, prop1, prop2, spec);
+
+ if( nErrors != 0 )
+ {
+ ferr.printf("Error: %s: Errors in options Parameter!\n", func);
+ msg = String.format("<p><b>Error:</b> options Parameter contains unknown values!</p>\n");
+ return returnHeader + msg + returnTrailer;
+ }
+
+ acceptedFormat = getAcceptedFormat();
+
+ table.append("<thead " + backgroundcolor + ">\n");
+ if( lemmaInfo.optsSetting == LemmaInfo.OPTS_SETTING_C2 )
+ msg = String.format("<tr><th>Lemma = '%s' & options = <i>'%s'</i></th></tr>\n", lemmaInfo.lemmaOrig, lemmaInfo.optionstoString());
+ else
+ msg = String.format("<tr><th>Lemma = '%s' & prop1,prop2,spec set directly.</th></tr>\n", lemmaInfo.lemmaOrig);
+
+ table.append(msg);
+
+ msg = String.format("<tr><th><b>prop1 = <i>'%s'</i> & prop2 = <i>'%s'</i> & spec = <i>'%s'</i></th></tr>\n",
+ lemmaInfo.prop1 == 0 ? "-" : lemmaInfo.prop1 == 1 ? "+" : "undef",
+ lemmaInfo.prop2 == 0 ? "-" : lemmaInfo.prop2 == 1 ? "+" : "undef",
+ lemmaInfo.spec == 0 ? "-" : lemmaInfo.spec == 1 ? "+" : "undef");
+ table.append(msg);
+
+ // extract lemmata from GlemmDB:
+ GlemmDB.readDBLemmaData(lemmaInfo, props);
+
+ // scan all wordforms and format them in the response buffer;
+ // if an error occures, wordform will be null;
+ String
+ wordforms = lemmaInfo.wfs2HTML();
+
+ // close sql stuff:
+ int
+ nWfs = lemmaInfo.nWfs;
+
+ if( lemmaInfo != null )
+ lemmaInfo.close();
+
+ if( lemmaInfo.nWfs == 0 )
+ wordforms = "<tr><td>keine Wortformen gefunden!</td></tr>\n";
+ else if( lemmaInfo.nWfs < 0 || wordforms == null )
+ wordforms = "<tr><td>Fehler aufgetreten: keine Wortformen gefunden!</td</tr>\n";
+ else
+ table.append("<tr><td><b>Anz. gefundene Wortformen</b> = " + lemmaInfo.nWfs + "</td></tr>\n");
+
+ table.append("</thead>\n");
+
+ // log outcome:
+ if( nWfs < 0 )
+ fout.printf("Error: %s: returning 0 results [HTML].\n", func);
+ else
+ fout.printf("Debug. %s: returning %d results [HTML].\n", func, nWfs);
+
+ closeLogStreams();
+
+ return returnHeader + table.toString() + "<tbody>\n" + wordforms + "</tbody>\n" + returnTrailer;
+
+ } // viewLemmaInfoHTML
+
+ /* viewLemmaInfoJSON:
+ *
+ * GET /index/info/lemma/{lemma}?opts=o1+o2+o3...
+ * where: oi... = (flex)(+comp)(+other)(+spec) in any order.
+ * As opts is optional, flex is default.
+ *
+ * E.g. /index/info/lemma/strahlen?opts=flex+other
+ *
+ * Returns: application/json.
+ * Returns wordforms for a submitted lemma.
+ *
+ * note:
+ * - if Browser request: HTTPServletRequest.accept = 'text/html,...xml...'
+ * - if WformServices request: HttpServletRequest.accept = 'application/json'.
+ *
+ * 24.01.20/FB
+ *
+ * - submitted URL, i.e. the requested lemma, is interpreted by glassfish as
+ * being ISO-8859-1 encoded. When the request is UTF-8 encoded, Umlauts and
+ * 'ß' are mis-interpreded, leading to an erronerous request to the Derby
+ * database.
+ * - Requested lemma 'lemma' has to be transformed to UTF-8 first.
+ * - note: even when submitting the GET request by specifying 'charencode=UTF-8',
+ * glassfish does not convert it automatically as expected.
+ *
+ * 22.10.21/FB
+ *
+ * - if accept=application/json return UTF-8 (WformServices).
+ * - else if accept=text/html do not transform character encoding (Browser).
+ */
+
+ @GET
+ @Path("info/lemma/{lemma}")
+ @Consumes("application/json, text/html")
+ @Produces("application/json")
+
+ public Response doGET_viewLemmaInfoJSON(
+ @PathParam(value="lemma") final String lemma,
+ @QueryParam("opts") @DefaultValue("undef") final String options,
+ @QueryParam("prop1") @DefaultValue("3") final int prop1,
+ @QueryParam("prop2") @DefaultValue("3") final int prop2,
+ @QueryParam("spec") @DefaultValue("3") final int spec )
+ {
+ final String
+ func = "doGET_viewLemmaInfoJSON";
+ int
+ nErrors = 0,
+ acceptedFormat = 0;
+ LemmaInfo
+ lemmaInfo;
+ LemmaResponse
+ lemmaResp = null;
+ Properties
+ props;
+ final boolean
+ bEncodeLemma = false;
+
+ props = init_and_log_Request(func, "/index/info/lemma/{lemma}?opts");
+
+ /* test:
+ for(int i=0; i<lemma.length(); i++)
+ {
+ fout.printf("Debug: %s: char at %d = '%c' = [%d].\n", func, i, lemma.charAt(i), lemma.codePointAt(i));
+ }
+ */
+ if( bEncodeLemma )
+ {
+ // when accepting JSON, url has to be converted to UTF-8, i.e. the requested lemma:
+ try {
+ lemmaInfo = new LemmaInfo(new String(lemma.getBytes("ISO-8859-1"), "utf-8"));
+ }
+ catch (UnsupportedEncodingException e1) {
+ e1.printStackTrace();
+ LemmaResponse
+ lemresp = new LemmaResponse();
+ return Response.status(Response.Status.BAD_REQUEST).type(MediaType.APPLICATION_JSON).entity(lemresp).build();
+ }
+ fout.printf("Debug: %s: lemma='%s' [UTF-8] options='%s'.\n", func, lemmaInfo.lemmaOrig, options);
+ }
+ else
+ {
+ lemmaInfo = new LemmaInfo(lemma);
+ fout.printf("Debug: %s: lemma='%s' [unchanged] options='%s'.\n", func, lemmaInfo.lemmaOrig, options);
+ }
+
+ /* does not help:
+ String
+ requestCharEncoding = httpServletRequest.getCharacterEncoding();
+
+ fout.printf("Debug: %s: request character encoding ='%s'.\n", func, requestCharEncoding);
+ */
+
+ nErrors = lemmaInfo.parseAllOptions(options, prop1, prop2, spec);
+
+ if( nErrors != 0 )
+ {
+ ferr.printf("Error: %s: Errors in options Parameter: '%s'!\n", func, options);
+ LemmaResponse
+ lemRespErr = new LemmaResponse();
+ lemRespErr.head_errMess = String.format("Error: %s: options Parameter contains unknown values: '%s'!\n", func, options);
+
+ return Response.status(Response.Status.BAD_REQUEST).type(MediaType.APPLICATION_JSON).entity(lemRespErr).build();
+ }
+
+ acceptedFormat = getAcceptedFormat();
+
+ // extract lemmata from GlemmDB:
+ GlemmDB.readDBLemmaData(lemmaInfo, props);
+
+ // transform SQL Response to JSON Response:
+ lemmaResp = lemmaInfo.res2JSON();
+
+ fout.printf("Debug: %s: lemmaResp: nWfs=%d errMess='%s' list='%s'.\n", func, lemmaResp.head_nWfs,
+ lemmaResp.head_errMess != null ? lemmaResp.head_errMess : "-",
+ lemmaResp.listofWfs);
+
+ // close:
+ if( lemmaInfo != null )
+ lemmaInfo.close();
+
+ closeLogStreams();
+
+ /* return Lemma Response Object */
+ if( lemmaResp.head_errMess == null )
+ {
+ return Response.status(Response.Status.OK).type(MediaType.APPLICATION_JSON).entity(lemmaResp).build();
+ //return Response.status(Response.Status.OK).entity(lemmaResp).build();
+ }
+ else // on errors:
+ return Response.status(Response.Status.INTERNAL_SERVER_ERROR).type(MediaType.APPLICATION_JSON).entity(lemmaResp).build();
+
+ } // viewLemmaInfoJSON
+
+ /* viewLemmaInfoTEXT:
+ *
+ * either:
+ * a) GET /index/info/lemma/{lemma}?opts=(flex)(+comp)(+other)(+spec)
+ * this is the original C2 Glemm Option Setting and should be totally
+ * compatible to the way Glemm Options work in COSMAS II.
+ * b) GET /index/info/lemma/{lemma}?(prop1=n)(&prop2=n)(&spec=n)
+ * this is the extended mode in KorAP which lets every flag from
+ * the Glemm Lemma table be set individually;
+ * where n=0 : false, n=1 : true; n=2 : undef.
+ *
+ * This Service expects first "opts" to be set (precedence other the
+ * other options) -> C2 Option Setting, or
+ * one of "prop1", "prop2" and "spec", in which case -> extended flag
+ * setting is used.
+ *
+ * Default is ?opts=flex.
+ *
+ * E.g. /index/info/lemma/strahlen?opts=flex+other
+ * E.g. /index/info/lemma/strahlen?prop1=0&prop2=1&spec=2
+ *
+ * Notes:
+ *
+ * Returns: text/plain.
+ * Returns only wordforms for a submitted lemma.
+ *
+ * 31.01.20/FB
+ */
+
+ @GET
+ @Path("info/lemma/{lemma}")
+ @Produces("text/plain")
+
+ public Response doGET_viewLemmaInfoTEXT(
+ @PathParam(value="lemma") final String lemma,
+ @QueryParam("opts") @DefaultValue("undef") final String options,
+ @QueryParam("prop1") @DefaultValue("3") final int prop1,
+ @QueryParam("prop2") @DefaultValue("3") final int prop2,
+ @QueryParam("spec") @DefaultValue("3") final int spec )
+
+ {
+ final String
+ func = "doGET_viewLemmaInfoTEXT";
+ int
+ nErrors = 0,
+ acceptedFormat = 0; // undef.
+ String
+ result = "";
+ LemmaInfo
+ lemmaInfo = new LemmaInfo(lemma);
+ LemmaResponse
+ lemmaResp = null;
+ Properties
+ props;
+
+ props = init_and_log_Request(func, "/index/info/lemma/{lemma}?opts");
+
+ fout.printf("Debug: %s: lemma='%s' options='%s' prop1=%d prop2=%d spec=%d.\n",
+ func, lemma, options, prop1, prop2, spec);
+
+ /*
+ for(int i=0; i<lemma.length(); i++)
+ {
+ fout.printf("Debug: %s: char at %d = '%c' = [%d].\n", func, i, lemma.charAt(i), lemma.codePointAt(i));
+ }
+ */
+ acceptedFormat = getAcceptedFormat();
+
+ nErrors = lemmaInfo.parseAllOptions(options, prop1, prop2, spec);
+
+ if( nErrors != 0 )
+ {
+ ferr.printf("Error: %s: Errors in options Parameter: '%s'!\n", func, options);
+ LemmaResponse
+ lemRespErr = new LemmaResponse();
+ lemRespErr.head_errMess = String.format("Error: %s: options Parameter contains unknown values: '%s'!\n", func, options);
+
+ return Response.status(Response.Status.BAD_REQUEST).type(MediaType.APPLICATION_JSON).entity(lemRespErr).build();
+ }
+
+
+ // extract lemmata from GlemmDB:
+ GlemmDB.readDBLemmaData(lemmaInfo, props);
+
+ // transform SQL Response to JSON Response:
+ switch(acceptedFormat)
+ {
+ case ACC_FORMAT_TEXT:
+ result = lemmaInfo.res2TEXT();
+ break;
+ case ACC_FORMAT_JSON:
+ // transform SQL Response to JSON Response:
+ lemmaResp = lemmaInfo.res2JSON();
+ break;
+ case ACC_FORMAT_HTML:
+ // not implemented here, see viewLemmaInfoHTML().
+ result = null;
+ break;
+ default:
+ result = lemmaInfo.res2TEXT();
+ }
+
+ // close:
+ if( lemmaInfo != null )
+ lemmaInfo.close();
+
+ closeLogStreams();
+
+ switch( acceptedFormat)
+ {
+ case ACC_FORMAT_TEXT:
+ /* return Lemma Response Object */
+ if( result != null )
+ return Response.status(Response.Status.ACCEPTED).type(MediaType.TEXT_PLAIN).entity(result).build();
+ else// on errors:
+ return Response.status(Response.Status.INTERNAL_SERVER_ERROR).type(MediaType.TEXT_PLAIN).entity("").build();
+ // break;
+
+ case ACC_FORMAT_JSON:
+ /* return Lemma Response Object */
+ if( lemmaResp.head_errMess != null )
+ return Response.status(Response.Status.ACCEPTED).type(MediaType.APPLICATION_JSON).entity(lemmaResp).build();
+ else// on errors:
+ return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
+ .type(MediaType.APPLICATION_JSON).entity(lemmaResp).build();
+ // break;
+
+ case ACC_FORMAT_HTML:
+ return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
+ .type(MediaType.APPLICATION_XML_TYPE).entity("internal error: not implemented in this procedure!").build();
+ default:
+ return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
+ .type(MediaType.APPLICATION_XML_TYPE).entity("internal error: requested Format is not known!").build();
+ }
+
+
+ } // viewLemmaInfoTEXT
+
+ /*
+ * Test function:
+ * Returns current content of Lemma Database.
+ * 09.10.20/FB
+ */
+ @GET @Path("test/info") @Produces("text/html")
+
+ public String viewDbContent()
+
+ {
+ final String
+ func = "viewDbContent";
+ int
+ r;
+ String
+ returnHeader = "<html><h1>GlemmServices: GET on /index/test/info</h1>" +
+ "<p>Version vom " + versionDate + "</p>";
+ String
+ returnTrailer = "</html>";
+ String
+ msg;
+ GlemmDBInfo
+ dbInfo;
+ Properties
+ props;
+
+ props = init_and_log_Request(func, "/index/test/info/");
+
+ dbInfo = GlemmDB.readDbInfos(props);
+ if( dbInfo == null )
+ msg = "<p><b>Fehler:</b> Inhalt der Datebank kann nicht gelesen werden!</p>";
+ else
+ msg = "<p>Alles gut.</p>";
+
+ closeLogStreams();
+
+ return returnHeader + msg + returnTrailer;
+ }
+
+ /*
+ * getAcceptedFormat
+ * 17.02.20/FB
+ */
+
+ public int getAcceptedFormat()
+
+ {
+ final String
+ func = "getAcceptedFormat";
+ Enumeration<String>
+ names = httpServletRequest.getHeaderNames();
+
+ //fout.printf("Debug: %s: HTTP Request:\n", func);
+
+ while( names.hasMoreElements() )
+ {
+ String
+ key = (String)names.nextElement(),
+ val = (String)httpServletRequest.getHeader(key);
+
+ fout.printf("Header: '%s' = '%s'.\n", key, val);
+ if( key.equals("accept") )
+ {
+ switch( val )
+ {
+ case "text/plain":
+ fout.printf("accepted format = 'text/plain': ok.\n");
+ return ACC_FORMAT_TEXT;
+ case "application/json":
+ fout.printf("accepted format = 'applic/JSON': ok.\n");
+ return ACC_FORMAT_JSON;
+ case "text/html":
+ fout.printf("accepted format = 'applic/HTML': ok.\n");
+ return ACC_FORMAT_HTML;
+ default:
+ fout.printf("accepted format = '%s' = unknown! Default='text/plain'.\n", val);
+ return ACC_FORMAT_TEXT;
+ }
+ }
+ }
+
+ return 0; // something wrong.
+
+ } // getAcceptedFormat
+
+}
diff --git a/GlemmServices/src/de/korap/services/LemmaInfo.java b/GlemmServices/src/de/korap/services/LemmaInfo.java
new file mode 100644
index 0000000..e5bbf06
--- /dev/null
+++ b/GlemmServices/src/de/korap/services/LemmaInfo.java
@@ -0,0 +1,629 @@
+package de.korap.services;
+
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.ArrayList;
+
+import de.korap.services.LemmaResponse;
+
+public class LemmaInfo {
+
+ // GLEMM API Options:
+ final static String
+ GLEMM_OPTS_FLEX = "flex", // Flexionsformen
+ GLEMM_OPTS_COMP = "comp", // Komposita
+ GLEMM_OPTS_OTHER = "other", // sonstige Wortbildungsformen
+ GLEMM_OPTS_SPEC = "spec", // Spezialfälle.
+ GLEMM_OPTS_NFLEX = "-flex", // keine Flexionsformen
+ GLEMM_OPTS_NCOMP = "-comp", // keine Komposita
+ GLEMM_OPTS_NOTHER = "-other", // keine sonstige Wortbildungsformen
+ GLEMM_OPTS_NSPEC = "-spec"; // keine Spezialfälle.
+
+ // Options Settings requested in Query:
+ final static int
+ OPTS_SETTING_UNDEF = 0,
+ OPTS_SETTING_C2 = 1, // lemma/Schuh and lemma/Schuh?opts=comp...
+ OPTS_SETTING_INT = 2; // lemma/Schuh?prop1=... [internal mode].
+
+ public String
+ lemma, // requested lemma, after conversion = 'ß' -> 'ss', i.e. 'schliesslich'.
+ lemmaOrig; // original lemma version, as submitted (e.g. 'schließlich').
+ public String
+ lemmaOpts; // lemmatization options as written in request.
+ public int
+ prop1 = 2,
+ prop2 = 2, // lemmatisation rule: 0 = '-', 1 = '+', 2 = undef.
+ spec = 2;
+ // Options set by user: oFlex, oComp etc.
+ // oFlex = 0 : no flexions / 1 : flexions / 2 : undefined.
+ public int
+ oFlex = 2,
+ oComp = 2,
+ oOther = 2,
+ oSpec = 2;
+ public int
+ optsSetting = OPTS_SETTING_C2;
+ Connection
+ con = null;
+ Statement
+ stmt = null;
+ public ResultSet
+ res = null;
+ public int
+ nWfs = -1; // -1: error occured; 0 = lemma has no wordforms; > 0 = no. of
+ // wordforms extracted for that lemma.
+ // nWfs is filled after this.res has be read.
+ StringBuilder
+ wordforms = null;
+
+
+ /* lemmaInfo
+ *
+ * constructor.
+ * 13.01.20/FB
+ */
+ LemmaInfo()
+
+ {
+ lemma = null;
+ lemmaOrig = null;
+ prop1 = 2;
+ prop2 = 2;
+ }
+
+ /* lemmaInfo(lemma)
+ *
+ * constructor.
+ * 13.01.20/FB
+ */
+ LemmaInfo(final String lemma)
+
+ {
+ this.lemma = lemma.replace("ß", "ss");
+ this.lemmaOrig = lemma;
+ this.prop1 = 2;
+ this.prop2 = 2;
+ }
+
+ /* LemmaInfo(lemma, prop1, prop2)
+ *
+ * construtor.
+ * E.g. "Haus&p1=1&p2=0"
+ * To be replaced.
+ * 13.01.20/FB
+ */
+
+ LemmaInfo(String lemma, int prop1, int prop2)
+
+ {
+ this.lemma = lemma.replace("ß", "ss");
+ this.lemmaOrig = lemma;
+ this.prop1 = prop1 == 0 || prop1 == 1 ? prop1 : 2;
+ this.prop2 = prop2 == 0 || prop2 == 1 ? prop2 : 2;
+ }
+
+ /* parseAllOptions
+ *
+ * Input: options of C2 Option Setting of options of Extended Options Setting
+ * through Query Params.
+ * Either C2 Setting: opts, or
+ * Extended Setting : prop1, prop2, spec
+ * will be set.
+ * If options = "undef", extended Options are requested instead of C2 Options.
+ * If prop1 == prop2 == spec == 3: Extended Setting is not requested.
+ * Default, if both are not set: options=flex.
+ *
+ * Parameter options is parsed, checked against correct constants and
+ * stored into lemmaInfo.GlemmXXX.
+ *
+ * Accepted constants: GLEMM_OPTS_x
+ * E.g. "flex+comp"
+ * Note:
+ * - GlemmFlex is set either explicitly by sumitting option "flex", or
+ * implicitly by submitting any other option.
+ * - First of all, we implement the C2 Option Setting.
+ * 15.01.20/FB
+ * - implementation of extended flag options: 31.01.20/FB
+ */
+
+ int parseAllOptions(String options, int prop1, int prop2, int spec)
+
+ {
+ if( !options.equals("undef") )
+ {
+ this.optsSetting = OPTS_SETTING_C2;
+ return parseOpts_C2Setting(options);
+ }
+ else if( prop1 != 3 || prop2 != 3 || spec != 3)
+ {
+ this.optsSetting = OPTS_SETTING_INT;
+ return parseOpts_ExtendedSetting(prop1, prop2, spec);
+ }
+ else
+ { // no setting requested, return default
+ this.optsSetting = OPTS_SETTING_C2;
+ return parseOpts_C2Setting("flex");
+ }
+
+ } // parseAllOptions
+
+ /*
+ * parseOpts_C2Setting
+ *
+ * expected: "(flex) (comp) (other) (spec)"
+ * Notes:
+ * - Jersey transforms "flex+comp+other..." automatically to
+ * "flex comp other ...".
+ *
+ * 31.01.20/FB
+ */
+
+ int parseOpts_C2Setting(final String options)
+
+ {
+ final String
+ func = "parseOpts_C2Setting";
+ String[]
+ opts = options.split("[ ]");
+ int
+ nErrors = 0;
+
+ this.prop1 = this.prop2 = 0; // default: +Flex -Comp -Other -Spec.
+ this.oFlex = 1; // default: +Flex
+
+ for(int i=0; i<opts.length; i++)
+ {
+ switch(opts[i]) {
+
+ case GLEMM_OPTS_FLEX:
+ this.oFlex = 1;
+ break;
+ case GLEMM_OPTS_COMP:
+ this.oComp = 1;
+ break;
+ case GLEMM_OPTS_OTHER:
+ this.oOther = 1;
+ break;
+ case GLEMM_OPTS_SPEC:
+ this.oSpec = 1;
+ break;
+ default:
+ if( opts[i].isEmpty() )
+ {
+ this.oFlex = 1;
+ }
+ else
+ {
+ GlemmServices.ferr.printf("Error: %s: unknown Glemm Option '%s'!\n", func, opts[i]);
+ nErrors++;
+ }
+ } // switch
+ }
+
+ if( nErrors > 0 )
+ return nErrors;
+
+ // convertion of User Options -> internal flags:
+
+ this.prop1 = this.prop2 = this.spec = 0;
+
+ // prop1 = 2 = undef = Flex+Comp requested; 1 = only Comp requested.
+ if( this.oComp == 1 )
+ this.prop1 = 2; // undef = flex and comp.
+
+ if( this.oOther == 1 )
+ this.prop2 = 2; // undef: other and not other requested.
+
+ if( this.oSpec == 1 )
+ this.spec = 2; // undef: spec and not spec requested.
+
+ return 0; // OK
+ } // parseOptions_C2Setting
+
+ /*
+ * parseOpts_ExtendedSetting
+ *
+ * expected: prop1, prop2, spec may have the following values:
+ * 0 : false;
+ * 1 : true;
+ * 2 : undef, i.e. do not set that flag for SQL Request.
+ * 3 : flag not requested
+ * Notes:
+ * - flags are directly set by the sumitted parameters.
+ * 31.01.20/FB
+ */
+
+ int parseOpts_ExtendedSetting(final int prop1, final int prop2, final int spec)
+
+ {
+ if( prop1 == 0 || prop1 == 1 )
+ this.prop1 = prop1;
+ else
+ this.prop1 = 2; // undef
+
+ if( prop2 == 0 || prop2 == 1 )
+ this.prop2 = prop2;
+ else
+ this.prop2 = 2; // undef
+
+ if( spec == 0 || spec == 1 )
+ this.spec = spec;
+ else
+ this.spec = 2; // undef
+
+ return 0; // OK
+
+ } // parseOptions_ExtendedSetting
+
+ /* parseOptions
+ *
+ * @Deprecated
+ *
+ * Input: Path Param options.
+ * Parameter options is parsed, checked against correct constants and
+ * stored into lemmaInfo.GlemmXXX.
+ *
+ * Accepted constants: GLEMM_OPTS_x
+ * E.g. "flex+comp"
+ * Note:
+ * - GlemmFlex is set either explicitly by sumitting option "flex", or
+ * implicitly by submitting any other option.
+ * - First of all, we implement the C2 Option Setting.
+ * 15.01.20/FB
+ */
+
+ int parseOptions(String options)
+
+ {
+ final String
+ func = "parseOptions";
+ final boolean
+ modeC2 = true; // COSMAS II Option Setting.
+ String[]
+ opts = options.split("[ ,]");
+ int
+ nErrors = 0;
+
+ this.prop1 = this.prop2 = 0; // default: +Flex -Comp -Other -Spec.
+
+ for(int i=0; i<opts.length; i++)
+ {
+ //System.out.printf("Debug: %s: i=%d opts='%s'.\n", func, i, opts[i]);
+ switch(opts[i]) {
+
+ case GLEMM_OPTS_FLEX:
+ this.oFlex = 1;
+ break;
+ case GLEMM_OPTS_NFLEX:
+ this.oFlex = 0;
+ break;
+ case GLEMM_OPTS_COMP:
+ this.prop1 = 1;
+ this.oComp = 1;
+ break;
+ case GLEMM_OPTS_NCOMP:
+ this.prop1 = 1;
+ this.oComp = 0;
+ break;
+ case GLEMM_OPTS_OTHER:
+ this.prop2 = 1;
+ this.oOther = 1;
+ break;
+ case GLEMM_OPTS_NOTHER:
+ this.prop2 = 1;
+ this.oOther = 0;
+ break;
+ case GLEMM_OPTS_SPEC:
+ // neither of prop1 nor prop2 is set by this option.
+ this.oSpec = 1;
+ break;
+ case GLEMM_OPTS_NSPEC:
+ // neither of prop1 nor prop2 is set by this option.
+ this.oSpec = 0;
+ break;
+ default:
+ if( opts[i].isEmpty() )
+ {
+ // don't set anything here.
+ }
+ else
+ {
+ GlemmServices.ferr.printf("Error: %s: unknown GLEMM Option '%s'!\n", func, opts[i]);
+ nErrors++;
+ }
+ } // switch
+ }
+
+ if( nErrors > 0 )
+ return nErrors;
+
+ // convertion of User Options -> internal flags:
+ // COSMAS II Options Setting:
+ if( modeC2 )
+ {
+ this.prop1 = this.prop2 = this.spec = 0;
+
+ // prop1 = 2 = undef = Flex+Comp requested; 1 = only Comp requested.
+ if( this.oComp == 1 )
+ this.prop1 = this.oFlex == 1 ? 2 : 1;
+
+ if( this.oOther == 1 )
+ this.prop2 = 2; // undef: other and not other requested.
+
+ if( this.oSpec == 1 )
+ this.spec = 2; // undef: spec and not spec requested.
+ }
+
+ return nErrors; // OK
+ } // parseOptions
+
+ /* optionstoString:
+ *
+ * 15.01.20/FB
+ */
+
+ String optionstoString()
+
+ {
+ StringBuilder
+ opts = new StringBuilder("");
+ int
+ n = 0;
+
+ if( this.oFlex != 2 )
+ {
+ opts.append( (this.oFlex == 0 ? "-" : "") + GLEMM_OPTS_FLEX);
+ n++;
+ }
+ if( this.oComp != 2 )
+ {
+ opts.append( (this.oComp == 0 ? "-" : n > 0 ? "+" : " ") + GLEMM_OPTS_COMP);
+ n++;
+ }
+ if( this.oOther != 2 )
+ {
+ opts.append( (this.oOther == 0 ? "-" : n > 0 ? "+" : "") + GLEMM_OPTS_OTHER);
+ n++;
+ }
+ if( this.oSpec != 2 )
+ {
+ opts.append((this.oSpec == 0 ? "-" : n > 0 ? "+" : "") + GLEMM_OPTS_SPEC);
+ }
+
+ return opts.toString();
+ } // optionstoString
+
+ /* * * * * * * * * * * * *
+ * close:
+ *
+ * closes the SQL stuff used for extracting the lemma's wordforms.
+ * 14.01.20/FB
+ * * * * * * * * * * * * */
+
+ void close()
+
+ {
+ GlemmServices.fout.printf("Debug: close: res=%s con=%s stmt=%s.\n",
+ this.res != null ? "allocated" : "null",
+ this.con != null ? "allocated" : "null",
+ this.stmt!= null ? "allocated" : "null");
+
+ try {
+ if( this.res != null )
+ this.res.close();
+ if( this.con != null )
+ this.con.close();
+ if( this.stmt != null )
+ this.stmt.close();
+ this.res = null;
+ this.con = null;
+ this.stmt = null;
+ }
+ catch (SQLException e) {
+ e.printStackTrace();
+ }
+
+ } // close
+
+ /* wfs2HTML
+ * writes wfs to a HTML formatted buffer.
+ * 14.01.20/FB
+ */
+
+ public String wfs2HTML()
+
+ {
+ int
+ n = -1; // undefined.
+
+ this.wordforms = new StringBuilder();
+
+ try {
+ for(n=0; this.res.next(); n++)
+ {
+ this.wordforms.append(String.format("<tr><td>%s</td></tr>\n", this.res.getString("wf")));
+ }
+ }
+ catch (SQLException e) {
+ n = -1; // reset to undefined due to error.
+ e.printStackTrace();
+ }
+
+ this.nWfs = n;
+
+ return this.wordforms != null ? this.wordforms.toString() : null;
+
+ } // wfs2HTML
+
+ /* res2JSON():
+ *
+ * transforms result of Lemma Query to a JSON list.
+ * 27.01.20/FB
+ */
+
+ public LemmaResponse res2JSON()
+
+ {
+ final String
+ func = "res2JSON";
+ LemmaResponse
+ lemResp = null;
+
+ try {
+ lemResp = new LemmaResponse();
+ lemResp.listofWfs = new ArrayList<>();
+ }
+ catch ( Exception e ){
+ GlemmServices.ferr.printf("Error: %s: cannot allocate memory for %s!\n",
+ func, lemResp == null ? "lemResp" : "listofWfs");
+ e.printStackTrace();
+ if( lemResp == null )
+ return null;
+ else
+ return lemResp;
+ }
+
+ // fill header:
+ lemResp.head_nWfs = -1;
+ lemResp.head_query = this.lemmaOrig;
+ lemResp.head_options = this.optionstoString();
+
+ // fill lemma list:
+
+ try {
+ for(this.nWfs=0; this.res.next(); this.nWfs++)
+ {
+ lemResp.listofWfs.add(this.res.getString("wf"));
+ }
+ }
+ catch (SQLException e) {
+ e.printStackTrace();
+ lemResp.head_errMess = String.format("Error: %s: cannot convert all lemmata! returns after %d lemmata.\n",
+ func, this.nWfs);
+ return lemResp;
+ }
+
+ lemResp.head_nWfs = this.nWfs;
+
+ return lemResp;
+
+ } // res2JSON
+
+
+ /* res2TEXT():
+ *
+ * transforms result of Lemma Query to a list of wordforms in Text/plain format.
+ * 28.01.20/FB
+ */
+
+ public String res2TEXT()
+
+ {
+ final String
+ func = "res2TEST";
+ StringBuilder
+ result;
+
+ try {
+ result = new StringBuilder();
+ }
+ catch (Exception e) {
+ GlemmServices.ferr.printf("Error: %s: cannot allocated memory!\n", func);
+ e.printStackTrace();
+ return String.format("Error: %s: cannot allocate memory for wordform list!\n", func);
+ }
+
+ // fill StringBuffer with list of wfs:
+ try {
+ for(this.nWfs=0; this.res.next(); this.nWfs++)
+ {
+ result.append(this.res.getString("wf") + "\n");
+ }
+ }
+ catch (SQLException e) {
+ e.printStackTrace();
+ return String.format("Error: %s: cannot convert all lemmata! returns after %d lemmata.\n",
+ func, this.nWfs);
+ }
+
+ return result.toString();
+
+ } // res2TEXT
+
+ /*
+ * decompositionRulestoString:
+ *
+ * transforms the GLEMM decomposition rules prop1+2 to a human
+ * readable string.
+ * See e.g. COSMAS II: src/util/list2sgml.c.
+ * 15.01.20/FB
+ * 21.10.21/FB C2-option 'Spec' is no Glemm option and thus not returned here.
+ */
+
+ static String decompRulestoString(int prop1, int prop2)
+
+ {
+ if( prop1 == 0 && prop2 == 0)
+ return "Flexionsformen";
+ else if( prop1 == 1 && prop2 == 0)
+ return "Komposita";
+ else if( prop1 == 0 && prop2 == 1)
+ return "sonst. Wortbidlungsformen";
+ else if( prop1 == 1 && prop2 == 1)
+ return "Komposita und sonst. Wortbildungsformen";
+ else
+ return "";
+
+ } // decompositionRulestoString
+
+ /*
+ * isWfSpecialCase
+ * Returns true, if wordform should be returned for GLEMM_OPT_SPEC,
+ * i.e. if it contains hyphens or special chars.
+ * 15.01.20/FB
+ */
+
+ static boolean isWfSpecialCase(String wf)
+
+ {
+ final String
+ func = "isWfSpecialCase";
+ boolean
+ isLorD;
+
+ /* test: isLetterOrDigit works as expected: all chars from ISO8859-1 are identified as a letter,
+ * digits as digits, interpunction and other chars are identified as non-letter.
+ */
+ /*
+ char[]
+ chars = new char[256];
+ for(i=0; i<256; i++)
+ chars[i] = (char)i;
+ for(i=0; i<chars.length; i++)
+ {
+ type = Character.isLetterOrDigit(chars[i]);
+ System.out.printf("Debug: %c : %s\n", chars[i], type == true ? "letter or digit" : "special char");
+ }
+ */
+
+ for(int i=0; i<wf.length(); i++)
+ {
+ isLorD = Character.isLetterOrDigit(wf.charAt(i));
+ //System.out.printf("Debug: %c : %s\n", wf.charAt(i), isLorD == true ? "letter or digit" : "special char");
+ if( !isLorD )
+ {
+ //System.out.printf("Debug: %s : %s spec=%b.\n", func, wf, true);
+ return true; // wf contains at least one hyphen or special char.
+ }
+ }
+
+ //System.out.printf("Debug: %s : %s spec=%b.\n", func, wf, false);
+
+ return false;
+ } // isWfSpecialCase
+
+
+}
+