blob: fdb2af3c244491dae5d9a5e95ef4c0696592739a [file] [log] [blame]
package de.korap.services;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.time.LocalDateTime;
import java.util.Properties;
import de.korap.services.utils.Utils;
public class GlemmDB
{
static public String dbTable = "GlemmLemmata";
static String dbLemma = "lemma";
static String dbWf = "wf";
static String dbProp1 = "prop1";
static String dbProp2 = "prop2";
// Derby Database access (overwritten by GlemmServices.properties):
private static String fnameDB = "/home/bodmer/KorAP/GlemmServices/Dbs/DerbyTestDB";
private static String serverHost = "//localhost:1527/";
private static String derbyDBURL = "jdbc:derby:" + serverHost + fnameDB;
// Derby Database access (final assignments):
final private String derbyDriverEmb = "org.apache.derby.jdbc.EmbeddedDriver";
final private String derbyDriverClient = "org.apache.derby.jdbc.ClientDriver";
final static boolean
usePreparedStmt = true; // 1=use PreparedStatement and not Statement.
// values to be inserted: lemma, wf, prop1, prop2, spec.
static String insertQuery = "INSERT into " + dbTable + " VALUES ('%s', '%s', '%s', '%s', '%s')";
static String insertPreparedQuery = "INSERT into " + dbTable + " VALUES (?, ?, ?, ?, ?)";
// Constants of table GlemmLemmata:
static final int maxLemmaSize = 64; // max. no. of chars, as declared in table GlemmLemmata.
static final int maxWfSize = 64; // max. no. of chars, as declared in table GlemmLemmata.
/* variables */
static int nExceptStmt = 0; // count no. of exceptions for preparedStmt.
static int nExceptInsert = 0; // count no. of exceptions for Inserts.
/*
* set_ConstantsFromProperties
*
* Arguments:
* props : Properties loaded by the GlemmServices.
* Notes:
* - this function loads only those Constants needed in this class.
* 15.10.21/FB
*/
private static void set_ConstantsFromProperties(Properties props)
{
fnameDB = Utils.getConstantFromProperties(props, "fnameDB", "/home/bodmer/KorAP/GlemmServices/Dbs/DerbyTestDB");
serverHost = Utils.getConstantFromProperties(props, "serverHost", "//localhost:1527/");
derbyDBURL = "jdbc:derby:" + serverHost + fnameDB;
} // set_ConstantsFromProperties
/* checkDerbyConnection:
*
* check if Derby Network Server is available.
* Returns: 0 (=OK) or -1 (on error).
* 05.11.21/FB
*/
public static int checkDerbyConnection()
{
final String
func = "checkDerbyConnection";
Connection
con = null;
// open Connection to Derby Database,
// set Autocommit = false to accelerate insertions:
try {
GlemmServices.fout.printf("Debug: %s: connecting to '%s'...\n", func, derbyDBURL);
con = DriverManager.getConnection(derbyDBURL);
GlemmServices.fout.printf("Debug: %s: connected.\n", func);
}
catch (SQLException e)
{
GlemmServices.ferr.printf("Error: %s: connecting failed!\n", func);
e.printStackTrace(GlemmServices.ferr);
return -1;
}
finally
{
try { // closing connection:
if( con != null )
con.close();
}
catch (SQLException e) {
e.printStackTrace(GlemmServices.ferr);
}
}
return 0; // OK
} // check_DerbyConnection
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* importGlemmLemata
* Parameters:
* fn : absolute file name of GLEMM output [ISO8859-1] expected, not UTF-8!
* props : Properties vom GlemmServices.properties to set constants needed
* in this class.
* dbpath : location + name of database where to store lemmata+wfs.
* Returns : 0=OK, -1: general error; > 0 : no. of lemmata not stored.
*
* 10.01.20/FB
* 15.10.21/FB dynamically loading constants from GlemmServices.properties.
* 04.11.21/FB dbpath.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
public static int importGlemmLemmata(String fn, Properties props, final String dbpath)
{
final String
func = "importGlemmLemmata";
BufferedReader
in = null;
int
nErrors;
// load GlemmServices properties and log them (when last param = true):
set_ConstantsFromProperties(props);
// when indexing data into the database, select the submitted dbpath,
// (not the one stored in the GlemmServices.properties):
derbyDBURL = "jdbc:derby:" + serverHost + dbpath;
// open reader on Glemm Lemmata:
try {
GlemmServices.fout.printf("Debug: %s: reading from '%s'.\n", func, fn);
in = new BufferedReader(new FileReader(fn));
nErrors = writeDBLemmata(in);
}
catch (IOException e) {
GlemmServices.ferr.printf("Error: %s: while reading from '%s'!\n", func, fn);
e.printStackTrace(GlemmServices.ferr);
return -1;
}
// close Reader:
try {
in.close();
}
catch (IOException e) {
nErrors = -1;
}
if( nErrors == 0 )
GlemmServices.fout.printf("Debug: %s: Import done successfully.\n", func);
else
GlemmServices.ferr.printf("Debug: %s: Import: %d Errors!\n", func, nErrors);
return nErrors; // OK
} // importGlemmLemmata
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*
* writeDBLemmata()
*
* stores list of lemmata + infos into Derby db.
* File format: "Lemma Decomposition Wf",
* e.g.: "Häuser +- Haus".
* Returns: 0 = OK or
* -1: SQL insert error, or
* -2: connection error to Derby Server, or
* > 0: no. of lemmata not stored.
* 09.01.20/FB
* 04.11.21/FB turn autocommit off to accelerate huge amount of insertions.
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
private static int writeDBLemmata(BufferedReader in)
{
final String
func = "writeDBLemmata";
Connection
con = null;
PreparedStatement
stmtPrep = null;
Statement
stmt = null;
String
line;
String[]
lemmaInfo;
int
nErrors = 0,
nInserted = 0, // no. of lemma/wf pairs imported.
ret = 0; // OK.
boolean
containsSpec;
// open Connection to Derby Database,
// set Autocommit = false to accelerate insertions:
try {
GlemmServices.fout.printf("Debug: %s: connecting '%s'...\n", func, derbyDBURL);
con = DriverManager.getConnection(derbyDBURL);
con.setAutoCommit(false);
GlemmServices.fout.printf("Debug: %s: connected (autoCommit=off).\n", func);
if( usePreparedStmt )
stmtPrep = con.prepareStatement(insertPreparedQuery);
else
stmt = con.createStatement();
while( (line = in.readLine()) != null )
{
//System.out.printf("Debug: import: '%s'.\n", line);
lemmaInfo = line.split("\\s");
if( lemmaInfo.length != 3 )
{
GlemmServices.ferr.printf("Error: %s: '%s' has %d components! Expected = 3! Cannot be stored!\n", func, line, lemmaInfo.length);
nErrors++;
continue;
}
// Get Info, if wordform contains special chars like '-', interpunction etc.:
// lemmaInfo[2] = wordform.
containsSpec = LemmaInfo.isWfSpecialCase(lemmaInfo[2]);
// store into DB:
if( usePreparedStmt )
ret = writeDBLemmaInfos_prepared(stmtPrep, lemmaInfo[0], lemmaInfo[2], lemmaInfo[1], containsSpec);
else
ret = writeDBLemmaInfos(stmt, lemmaInfo[0], lemmaInfo[2], lemmaInfo[1], containsSpec);
if( ret != 0 )
{
GlemmServices.ferr.printf("Error: %s: cannot store lemma '%s' into DB! (ret=%d)\n", func, line, ret);
nErrors++;
}
else
nInserted++;
} // while
}
catch (IOException e) {
GlemmServices.ferr.printf("Error: %s: while reading Glemm lemmata+infos from file! Exit!\n", func);
e.printStackTrace(GlemmServices.ferr);
return -2;
}
catch (SQLException e) {
if( con == null )
GlemmServices.ferr.printf("Error: %s: connection failed!\n", func);
else
GlemmServices.ferr.printf("Error: %s: cannot create stmt!\n", func);
e.printStackTrace(GlemmServices.ferr);
return -1;
}
finally
{
try {
// important: now commit, as autoCommit is deactivated,
// this also removes database locks:
con.commit();
GlemmServices.fout.printf("Debug: %s: commit successfull.\n", func);
}
catch (SQLException e) {
GlemmServices.ferr.printf("Error: %s: commit at the end of the wfs list failed!\n", func);
e.printStackTrace(GlemmServices.ferr);
}
try { // closing connection:
if( con != null )
con.close();
if( stmt != null )
stmt.close();
if( stmtPrep != null )
stmtPrep.close();
}
catch (SQLException e) {
e.printStackTrace(GlemmServices.ferr);
}
}
GlemmServices.fout.printf("Info : %s: no. of lemmata/wf pairs inserted = %d.\n", func, nInserted);
return nErrors; // 0=OK, >0 = no. of errors.
} // writeDBLemmata
/* * * * * * * * * * * * * * * * * * * * * * * * *
* writeDBLemmaInfos_prepared:
* write Lemma + Wf + Decomposition rules into Derby DB.
* Notes:
* - this is the version with a PreparedQuery.
* Parameters:
* spec : if true: lemma contains special chars like hyphen etc.
* returns 0 = OK.
* 17.01.20/FB
* 05.1.21/FB reject lemma/wf > 64 chars.
* * * * * * * * * * * * * * * * * * * * * * * * */
private static int writeDBLemmaInfos_prepared(
PreparedStatement stmt,
String lemma,
String wf,
String decomposition,
boolean spec)
{
final String
func = "writeDBLemmaInfos_prepared";
final boolean
bLog = false;
String
sqlInsert = null;
boolean
prop1, prop2;
int
n;
// check size of wf and lemma against field size in table GlemmLemmata:
if( lemma.length() >= GlemmDB.maxLemmaSize )
{
GlemmServices.ferr.printf("Error: %s: lemma rejected (> %d): '%s'!\n",
func, maxLemmaSize, lemma);
return 4;
}
if( wf.length() >= GlemmDB.maxWfSize )
{
GlemmServices.ferr.printf("Error: %s: wf rejected (> %d): '%s'!\n",
func, maxWfSize, wf);
return 5;
}
prop1 = decomposition.charAt(0) == '+' ? true : false;
prop2 = decomposition.charAt(1) == '+' ? true : false;
// for loging only:
sqlInsert = String.format(insertQuery, lemma, wf, prop1, prop2, spec);
if( bLog )
GlemmServices.fout.printf("Debug: %s: sqlInsert='%s'.\n", func, sqlInsert);
// now insert new values into PreparedStatement:
try {
stmt.setString( 1, lemma);
stmt.setString( 2, wf);
stmt.setBoolean(3, prop1);
stmt.setBoolean(4, prop2);
stmt.setBoolean(5, spec);
//System.out.printf("Debug: %s: prepared stmt = '%s'.\n", func, stmt.toString());
}
catch (SQLException e1) {
GlemmServices.ferr.printf( "Error: %s: while filling prepared statement with lemma='%s' wf='%s' prop1='%b' prop2='%b' spec='%b'!\n",
func, lemma, wf, prop1, prop2, spec);
if( ++nExceptStmt <= 1 )
e1.printStackTrace(GlemmServices.ferr);
return 1;
}
try {
// execute the filled prepared statement:
n = stmt.executeUpdate();
if( n != 1 )
{
GlemmServices.ferr.printf("Error: %s: while inserting lemma='%s' wf='%s' prop1=%b prop2=%b spec=%b! (n=%d)\n",
func, lemma, wf, prop1, prop2, spec, n);
return 2;
}
}
catch (SQLException e) {
GlemmServices.ferr.printf("Error: %s: while inserting lemma='%s' wf='%s' prop1=%b prop2=%b spec=%b!\n",
func, lemma, wf, prop1, prop2, spec);
if( ++nExceptInsert <= 1 )
e.printStackTrace(GlemmServices.ferr);
return 3;
};
return 0; // OK
} // writeDBLemmaInfos_prepared
/* * * * * * * * * * * * * * * * * * * * * * * * *
* writeDBLemmaInfos:
* write Lemma + Wf + Decomposition into Derby DB.
* Parameters:
* containsSpec : if true: lemma contains special chars like hyphen etc.
* returns 0 = OK.
* 10.01.20/FB
* * * * * * * * * * * * * * * * * * * * * * * * */
private static int writeDBLemmaInfos(Statement stmt, String lemma, String wf, String decomposition, boolean spec)
{
final String
func = "writeDBLemmaInfos";
String
sqlInsert = null;
Boolean
prop1, prop2;
int
n;
prop1 = decomposition.charAt(0) == '+' ? true : false;
prop2 = decomposition.charAt(1) == '+' ? true : false;
sqlInsert = String.format(insertQuery, lemma, wf, prop1, prop2, spec);
GlemmServices.fout.printf("Debug: %s: sqlInsert='%s'.\n", func, sqlInsert);
try {
n = stmt.executeUpdate(sqlInsert);
if( n != 1 )
{
GlemmServices.ferr.printf("Error: %s: while inserting lemma='%s' wf='%s' prop1=%b prop2=%b spec=%b!\n",
func, lemma, wf, prop1, prop2, spec);
return 1;
}
}
catch (SQLException e) {
GlemmServices.ferr.printf("Error: %s: while inserting lemma='%s' wf='%s' prop1=%b prop2=%b spec=%b!\n",
func, lemma, wf, prop1, prop2, spec);
e.printStackTrace(GlemmServices.ferr);
return 1;
};
return 0; // OK
} // writeDBLemmaInfos
/* * * * * * * * * * * * * * * * * * * * * * * * * *
* readDBLemmaData
*
* Reads wordforms for one lemma from the GlemmDB.
* Input: lemma + GLEMM_OPTS_xxx (lemmatization rules).
*
* 14.01.20/FB
* * * * * * * * * * * * * * * * * * * * * * * * * */
static int readDBLemmaData(LemmaInfo lemmaInfo, Properties props)
{
final String
func = "readDBLemmaData";
// load GlemmServices properties and log them (when last param = true):
set_ConstantsFromProperties(props);
try {
GlemmServices.fout.printf("Debug: %s: connecting '%s'...\n", func, derbyDBURL);
lemmaInfo.con = DriverManager.getConnection(derbyDBURL);
GlemmServices.fout.printf("Debug: %s: connected.\n", func);
lemmaInfo.stmt = lemmaInfo.con.createStatement();
int r = readDBLemma(lemmaInfo.stmt, lemmaInfo);
if( r != 0 )
{
GlemmServices.ferr.printf("Error: %s: cannot get data for lemma='%s' options='%s'!\n",
func, lemmaInfo.lemma, lemmaInfo.optionstoString());
return 1;
}
}
catch (SQLException e) {
if( lemmaInfo.con == null )
GlemmServices.ferr.printf("Error: %s: connection failed!\n", func);
else
GlemmServices.ferr.printf("Error: %s: cannot create stmt!\n", func);
e.printStackTrace(GlemmServices.ferr);
return 1;
}
return 0; // OK
} // readDBLemmaData
/* * * * * * * * * * * * * * * * * * * * * * * * * *
* readDBLemma
*
* Reads wordforms for one lemma by accessing the GlemmDB.
* Input : lemma + prop1+2 (lemmatization rules).
* Output:
* - contains a ResultSet for fetching the wordforms.
* This ResultSet will have be be closed with LemmaInfo.close().
* The no. of found wordforms is not known till we
* fetch all of them from ResultSet!
* - due to SELECT DISTINCT only uniq wf are returned.
*
* 14.01.20/FB
* * * * * * * * * * * * * * * * * * * * * * * * * */
static int readDBLemma(Statement stmt, LemmaInfo lemmaInfo)
{
final String
func = "readDBLemma";
String
sql;
sql = String.format("SELECT DISTINCT wf from %s WHERE lemma='%s' %s %s %s",
dbTable,
lemmaInfo.lemma,
lemmaInfo.prop1 == 0 ? "AND prop1='false'" :
lemmaInfo.prop1 == 1 ? "AND prop1='true'" : "",
lemmaInfo.prop2 == 0 ? "AND prop2='false'" :
lemmaInfo.prop2 == 1 ? "AND prop2='true'" : "",
lemmaInfo.spec == 1 ? "AND spec='true'" :
lemmaInfo.spec == 0 ? "AND spec='false'" : "");
//System.out.printf("Debug: %s: prop1=%d prop2=%d spec=%d.\n", func, lemmaInfo.prop1, lemmaInfo.prop2, lemmaInfo.spec);
GlemmServices.fout.printf("Debug: %s: sql = '%s'.\n", func, sql);
try {
lemmaInfo.res = stmt.executeQuery(sql);
}
catch (SQLException e) {
GlemmServices.ferr.printf("Error: %s: when executing sql='%s'!\n", func, sql);
e.printStackTrace(GlemmServices.ferr);
return 1;
}
finally {
// all sql stuff are closed outside this function, so don't close them here!
}
return 0; // OK
} // readDBLemma
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* readDbInfos
* Returns General Infos about the data stored in the Glemm Database.
* 10.01.20/FB
* 15.10.21/FB read GlemmServices.properties.
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
public static GlemmDBInfo readDbInfos(Properties props)
{
final String
func = "readDbInfos";
Connection
con = null;
Statement
stmt = null;
GlemmDBInfo
dbInfo = null;
// load GlemmServices properties and log them (when last param = true):
set_ConstantsFromProperties(props);
// open Connection to Derby Database:
try {
GlemmServices.fout.printf("Debug: %s: connecting '%s'...\n", func, derbyDBURL);
con = DriverManager.getConnection(derbyDBURL);
GlemmServices.fout.printf("Debug: %s: connected.\n", func);
stmt = con.createStatement();
dbInfo = readDbLemmataInfos(stmt);
if( dbInfo == null )
{
GlemmServices.ferr.printf("Error: %s: cannot read General Infos from Glemm Database!\n", func);
}
}
catch (SQLException e) {
if( con == null )
GlemmServices.ferr.printf("Error: %s: connection failed!\n", func);
else
GlemmServices.ferr.printf("Error: %s: cannot create stmt!\n", func);
e.printStackTrace(GlemmServices.ferr);
return null;
}
finally {
// closing connection:
try {
if( con != null ) con.close();
if( stmt != null ) stmt.close();
}
catch (SQLException e) {
e.printStackTrace(GlemmServices.ferr);
}
}
return dbInfo; // may be null.
} // readDbInfos
/* * * * * * * * * * * * * * * * * *
* readDbLemmataInfos:
* reads and returns general infos about the content
* of the Glemm Database.
* Returns 0=OK or error code > 0.
* 10.01.20/FB
*/
private static GlemmDBInfo readDbLemmataInfos(Statement stmt)
{
final String
func = "readDbLemmaInfos";
String[]
sql = { "SELECT COUNT(distinct lemma) from " + dbTable , // counts no. of distinct lemmata.
"SELECT COUNT(*) from " + dbTable, // counts total no. of stored wordforms.
"SELECT COUNT(*) from " + dbTable + " WHERE prop1=false and prop2=false",
"SELECT COUNT(*) from " + dbTable + " WHERE prop1=true and prop2=false",
"SELECT COUNT(*) from " + dbTable + " WHERE prop1=false and prop2=true",
"SELECT COUNT(*) from " + dbTable + " WHERE prop1=true and prop2=true"};
ResultSet
res = null;
int[]
nCounts = new int[sql.length];
int
i = 0,
nDistLemmata = -1, // no. of distinct lemmata in DB.
nWfs = -1; // no. of wordforms in DB.
GlemmDBInfo
dbInfo = null;
GlemmServices.fout.printf("Debug: %s: extracting agregated infos about Database contents...\n", func);
dbInfo = new GlemmDBInfo();
// extract DB infos and return them:
try {
for(i=0; i<sql.length; i++)
{
res = stmt.executeQuery(sql[i]);
if( res.next() )
dbInfo.counts[i] = res.getInt(1); // count is in column 1.
else
{
GlemmServices.ferr.printf("Error: %s: cannot extract count from '%s'!\n", func, sql[i]);
dbInfo.counts[i] = -1;
}
}
}
catch (SQLException sqlExcept)
{
GlemmServices.ferr.printf("Error: %s: sql='%s' failed!\n", func, sql[i]);
sqlExcept.printStackTrace(GlemmServices.ferr);
return null;
}
finally {
try {
res.close();
}
catch (SQLException e) {
e.printStackTrace(GlemmServices.ferr);
}
}
return dbInfo; // OK
} // readDbLemmataInfos
/******************** Test functions ****************************************/
/* * * * * * * * * * *
* readDbInfos_test
* This is debug code written to test if we can access the Derby
* Database from an external Java Programm.
* 08.01.20/FB
* * * * * * * * * * */
public static int readDbInfos_test()
{
final String
func = "readDbInfos";
Connection
con = null;
Statement
stmt = null;
String
insertStmt = "";
// Connect to Derby database:
GlemmServices.fout.printf("Debug: %s: connecting '%s'...\n", func, derbyDBURL);
// open Connection to Derby Database:
try {
con = DriverManager.getConnection(derbyDBURL);
GlemmServices.fout.printf("Debug: %s: connected.\n", func);
// extract DB infos and return them:
try
{
stmt = con.createStatement();
String
sql = "SELECT * FROM " + dbTable;
ResultSet
result = stmt.executeQuery(sql);
GlemmServices.fout.printf("Debug: %s: table '%s':\n", func, dbTable);
while (result.next()) {
GlemmServices.fout.printf("'%s' : '%s' : '%s' : '%s' .\n", result.getString(1),
result.getString(2),
result.getString(3),
result.getString(4));
insertStmt = insertStmt + "<p>" + result.getString(1) + " : " + result.getString(2) + " : " +
result.getString(3) + " : " + result.getString(4) + "</p>";
}
}
catch (SQLException sqlExcept)
{
GlemmServices.ferr.printf("Error: %s: insertStmt='%s' failed!.\n", func, stmt);
sqlExcept.printStackTrace(GlemmServices.ferr);
con.close();
return 1;
}
}
catch (SQLException e)
{
e.printStackTrace();
GlemmServices.ferr.printf("Error: %s: connecting '%s failed!\n", func, derbyDBURL);
return 1;
}
// closing connection:
try {
con.close();
}
catch (SQLException e) {
e.printStackTrace(GlemmServices.ferr);
}
return 0; // OK
} // readDbInfos_test
/* * * * * * * * * * *
* importGlemmLemata_test
* Test Version.
* 12.12.19/FB
* * * * * * * * * * */
public static int importGlemmLemmata_test(String fn)
{
final String
func = "importGlemmLemmata_test";
InputStream
in;
BufferedReader
in2 = null;
String
line;
String[]
lemmaInfo;
int
r;
/*
* Read Lemmata + Info from file:
*/
GlemmServices.fout.printf("Debug: %s: reading from '%s'.\n", func, fn);
try {
//in = new BufferedInputStream( new FileInputStream(fn) );
in2 = new BufferedReader( new FileReader(fn));
while( (line = in2.readLine()) != null )
{
GlemmServices.fout.printf("Debug: import: '%s'.\n", line);
lemmaInfo = line.split("\\s");
if( lemmaInfo.length == 3 )
GlemmServices.fout.printf("Debug: Import: lem='%s' Decomp='%s' Wf='%s'.\n", lemmaInfo[0], lemmaInfo[1], lemmaInfo[2]);
else
GlemmServices.ferr.printf("Error: importGlemmLamata: '%s' has %d components!\n", line, lemmaInfo.length);
}
}
catch (FileNotFoundException e)
{
e.printStackTrace(GlemmServices.ferr);
GlemmServices.ferr.printf("Error: %s: cannot open '%s'!\n", func, fn);
return 1;
}
catch (IOException e) {
e.printStackTrace(GlemmServices.ferr);
return 1;
}
try {
in2.close();
}
catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace(GlemmServices.ferr);
}
/* Check connection to Derby Database:
*/
r = readDbInfos_test();
if( r != 0 )
return r;
return 0; // OK
} // importGlemmLemmata_test
}