Added parsing hit elements.
diff --git a/src/main/java/de/mannheim/ids/sruws/SRU_WS.java b/src/main/java/de/mannheim/ids/sruws/SRU_WS.java
index a58e14c..88e45bf 100644
--- a/src/main/java/de/mannheim/ids/sruws/SRU_WS.java
+++ b/src/main/java/de/mannheim/ids/sruws/SRU_WS.java
@@ -9,7 +9,6 @@
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -70,6 +69,7 @@
private static final String HITS_NS = "http://clarin.eu/fcs/dataview/hits";
private static final String HITS_PREFIX = "hits";
private static final String FCS_NS = "http://clarin.eu/fcs/1.0";
+ private static final String FCS_RESOURCE = "http://clarin.eu/fcs/resource";
private static final String FCS_PREFIX = "fcs";
//private static final String FCS_KWIC_NS = "http://clarin.eu/fcs/1.0/kwic";
//private static final String FCS_KWIC_PREFIX = "kwic";
@@ -100,13 +100,14 @@
@POST
@Produces(MediaType.TEXT_XML)
@Consumes("text/query-cql")
- public StreamingOutput doPost(@QueryParam("query") String query,
- @QueryParam("endpoint") String endpoint,
- @QueryParam("maximumRecords") int maximumRecords) throws IOException, ServletException{
-
- return process(endpoint, query, maximumRecords);
+ public StreamingOutput doPost(String query,
+ @QueryParam("endpoint") String endpoint,
+ @QueryParam("maximumRecords") int maximumRecords)
+ throws IOException, ServletException {
+
+ return process(endpoint, query, maximumRecords);
}
-
+
private StreamingOutput process(String endpoint, String query, int max)
throws IOException, ServletException {
@@ -131,11 +132,11 @@
if (max==0) max = DEFAULT_MAX_RECORDS;
- endpointUri = config.getInitParameter(endpointBase+endpoint);
- logger.info("Endpoint URL {} ",endpointUri);
+ endpointUri = config.getInitParameter(endpointBase + endpoint);
+ // logger.info("Endpoint URL {} ",endpointUri);
generateCorpusList();
- logger.info(query.toString());
+ // logger.info(query.toString());
HttpGet getReq = createSearchRetrieveRequest(query, max);
CloseableHttpClient httpClient = HttpClients.createDefault();
@@ -146,6 +147,7 @@
String text;
try {
text = readKwicInput(bis);
+ // text = readKwicInput(bis);
//logger.info(text);
} catch (XMLStreamException e) {
throw new ServletException("XML streaming error");
@@ -260,14 +262,27 @@
e = reader.next();
if (e == XMLEvent.START_ELEMENT &&
+ reader.getLocalName().equals("Result")
+ && reader.getPrefix().equals(HITS_PREFIX)
+ && reader.getNamespaceURI().equals(HITS_NS)) {
+
+ sb = new StringBuilder();
+ reader.next();
+ sb.append(reader.getText());
+ sb.append(" "); // left context
+ }
+ else if (e == XMLEvent.START_ELEMENT
+ &&
reader.getLocalName().equals("Resource") &&
reader.getPrefix().equals(FCS_PREFIX) &&
- reader.getNamespaceURI().equals(FCS_NS)){
+ (reader.getNamespaceURI().equals(FCS_NS)||
+ reader.getNamespaceURI().equals(FCS_RESOURCE)) ){
for (int i=0; i<reader.getAttributeCount(); i++){
if (reader.getAttributeLocalName(i).equals("pid")){
String pid = reader.getAttributeValue(i);
- logger.info("Corpus: " + URLDecoder.decode(pid,"UTF-8"));
+ // logger.info("Corpus: " +
+ // URLDecoder.decode(pid,"UTF-8"));
c = corpusList.get(URLDecoder.decode(pid,"UTF-8"));
if (c == null) c = defaultCorpus;
}
@@ -288,10 +303,14 @@
e = reader.next();
}
sb.append(" ");
- }
+ }
else if (e == XMLEvent.START_ELEMENT &&
- reader.getLocalName().equals("kw")){
+ (reader.getLocalName().equals("kw") ||
+ (reader.getLocalName().equals("Hit")
+ && reader.getPrefix().equals(HITS_PREFIX)
+ && reader.getNamespaceURI().equals(HITS_NS)))
+ ){
e = reader.next();
if (e == XMLEvent.CHARACTERS){
String keyword = reader.getText();
@@ -302,61 +321,20 @@
}
else if (e == XMLEvent.END_ELEMENT &&
- reader.getLocalName().equals("kwic")){
+ (reader.getLocalName().equals("kwic") ||
+ reader.getLocalName().equals("Hit"))
+ ) {
kwic = normalizeString(sb.toString());
Sentence s = createSentence(kwic);
c.addSentence(s);
- //logger.info(kwic);
+ // logger.info(kwic);
kwicCollector.append(s.getSentenceString());
}
}
return kwicCollector.toString();
}
- private String readInput(InputStream is, List<String[]> tokenizedSentences)
- throws XMLStreamException{
-
- String kwic;
- StringBuilder sb;
- StringBuilder kwicCollector = new StringBuilder();
-
- XMLInputFactory f = XMLInputFactory.newInstance();
- XMLStreamReader reader = f.createXMLStreamReader(is);
-
- while (reader.hasNext()) {
- if (reader.next() == XMLEvent.START_ELEMENT &&
- reader.getLocalName().equals("Result") &&
- reader.getPrefix().equals(HITS_PREFIX) &&
- reader.getNamespaceURI().equals(HITS_NS)){
-
- sb = new StringBuilder();
-
- reader.next();
- sb.append(reader.getText());
- sb.append(" "); // left context
-
- reader.next();
- if (reader.getLocalName().equals("Hit") &&
- reader.getPrefix().equals(HITS_PREFIX) &&
- reader.getNamespaceURI().equals(HITS_NS)){
-
- reader.next();
- sb.append(reader.getText());
- sb.append(" ");
- }
- reader.next(); // end element Hit
- reader.next();
- sb.append(reader.getText()); // right context
-
- kwic = normalizeString(sb.toString());
- tokenizedSentences.add(kwic.split("\\s+"));
- kwicCollector.append(kwic);
- }
- }
- return kwicCollector.toString();
- }
-
private Sentence createSentence(String kwic){
String[] tokens = kwic.split("\\s+");
int open = ArrayUtils.indexOf(tokens,"#");
@@ -367,7 +345,7 @@
int[] keyIndexes = new int[close-open];
int j=0;
for (int i=open; i<close; i++){
- //logger.info(i+" "+tokens[i]);
+ // logger.info(i + " " + tokens[i]);
keyIndexes[j] = i;
j++;
}
@@ -378,7 +356,6 @@
sb.append(" ");
}
String sentenceString = sb.toString();
-
return new Sentence(sentenceString, tokens, keyIndexes);
}
@@ -415,6 +392,7 @@
builder.setParameter("startRecord", "1");
builder.setParameter("maximumRecords", String.valueOf(maximumRecords));
URI requestURI = builder.build();
+ logger.info(requestURI.toString());
return new HttpGet(requestURI);
}
catch (URISyntaxException e) {
diff --git a/src/main/webapp/WEB-INF/web.xml b/src/main/webapp/WEB-INF/web.xml
index d2235e6..e98c4eb 100644
--- a/src/main/webapp/WEB-INF/web.xml
+++ b/src/main/webapp/WEB-INF/web.xml
@@ -7,7 +7,6 @@
<servlet>
<servlet-name>SRU_WS</servlet-name>
- <!-- <servlet-class>de.mannheim.ids.sruws.SRU_WS</servlet-class> -->
<servlet-class>com.sun.jersey.spi.container.servlet.ServletContainer</servlet-class>
<init-param>
<param-name>com.sun.jersey.config.property.packages</param-name>
@@ -41,13 +40,13 @@
</init-param>
- <!-- broken sentences -->
+ <!-- Other endpoints delivering broken sentences -->
<!-- same case like goethe, pid is sub-resource -->
- <!-- <init-param>
+ <init-param>
<param-name>de.mannheim.ids.sruws.endpoint.muenchen</param-name>
<param-value>https://clarin.phonetik.uni-muenchen.de/BASSRU/</param-value>
- </init-param> -->
+ </init-param>
<!-- weird pid -->
<!-- <init-param>
@@ -67,6 +66,17 @@
<param-value>http://fedora.clarin-d.uni-saarland.de/sru2/</param-value>
</init-param> -->
+ <!-- <init-param>
+ <param-name>de.mannheim.ids.sruws.endpoint.lindat</param-name>
+ <param-value>http://lindat.mff.cuni.cz/services/fcs</param-value>
+ </init-param> -->
+
+ <!-- Dutch -->
+ <!-- <init-param>
+ <param-name>de.mannheim.ids.sruws.endpoint.dans</param-name>
+ <param-value>http://srucql.dans.knaw.nl</param-value>
+ </init-param> -->
+
<load-on-startup>1</load-on-startup>
</servlet>
<servlet-mapping>
diff --git a/src/test/java/de/mannheim/ids/sruws/test/TestClient.java b/src/test/java/de/mannheim/ids/sruws/test/TestClient.java
index 45de13d..f5a995e 100644
--- a/src/test/java/de/mannheim/ids/sruws/test/TestClient.java
+++ b/src/test/java/de/mannheim/ids/sruws/test/TestClient.java
@@ -6,15 +6,21 @@
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
+import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
+import org.apache.http.HttpEntity;
import org.apache.http.HttpHeaders;
+import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.ByteArrayEntity;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
@@ -22,19 +28,25 @@
public static void main(String arg[]) throws IOException{
String url =
- //"http://localhost:8080/SRU-WS/";
- "http://clarin.ids-mannheim.de/sruws/";
+ "http://localhost:8080/SRU-WS/" +
+ // "http://clarin.ids-mannheim.de/fcsws" +
+ "?maximumRecords=10&endpoint=korap";
- HttpClient client = HttpClients.createDefault();
+ HttpClient client = HttpClientBuilder.create().build();
HttpPost post = new HttpPost(url);
- post.setHeader(HttpHeaders.ACCEPT,"text/query-cql");
+ post.setHeader(HttpHeaders.CONTENT_TYPE, "text/query-cql");
- List<NameValuePair> urlParameters = new ArrayList<NameValuePair>();
- urlParameters.add(new BasicNameValuePair("query", "Baden-Württemberg"));
- urlParameters.add(new BasicNameValuePair("maximumRecords", "5"));
- urlParameters.add(new BasicNameValuePair("endpoint", "korap"));
- post.setEntity(new UrlEncodedFormEntity(urlParameters));
-
+ // List<NameValuePair> urlParameters = new ArrayList<NameValuePair>();
+ // urlParameters.add(new BasicNameValuePair("query",
+ // "Baden-Württemberg"));
+ // urlParameters.add(new BasicNameValuePair("maximumRecords", "5"));
+ // urlParameters.add(new BasicNameValuePair("endpoint", "korap"));
+ // post.setEntity(new UrlEncodedFormEntity(urlParameters));
+
+ String query = "Baden-Württemberg";
+ HttpEntity entity = new StringEntity(query, Charset.defaultCharset());
+ post.setEntity(entity);
+
HttpResponse response = client.execute(post);
System.out.println("Sending 'POST' request to URL : " + url);
System.out.println("Post parameters : " + post.getEntity());
@@ -45,15 +57,15 @@
response.getStatusLine().getReasonPhrase());
}
- FileWriter fw = new FileWriter(new File("test.xml"));
+ // FileWriter fw = new FileWriter(new File("test.xml"));
BufferedReader reader = new BufferedReader(new InputStreamReader(
response.getEntity().getContent()));
String line;
while ((line = reader.readLine()) != null){
- //System.out.println(line);
- fw.append(line+"\n");
+ System.out.println(line);
+ // fw.append(line+"\n");
}
- fw.close();
+ // fw.close();
}
}