| commit | a4f6cf9ea44b1a6fce6e59e6530e1ecf8658ba9f | [log] [tgz] |
|---|---|---|
| author | Marc Kupietz <kupietz@ids-mannheim.de> | Wed Dec 07 17:28:29 2022 +0100 |
| committer | Marc Kupietz <kupietz@ids-mannheim.de> | Wed Dec 07 17:30:42 2022 +0100 |
| tree | 8e59bdcd3dfae181c200c685d98ecc17243144ba | |
| parent | 464b1c88414ef715b5f9f50c0e3b6a6c32db5ce2 [diff] [blame] |
Add exclude punctuation option Change-Id: Ie90a59f77a92b8007af92411bcbaf00a8c910722
diff --git a/src/main/java/org/ids_mannheim/Utils.java b/src/main/java/org/ids_mannheim/Utils.java index e8413b5..8f4e801 100644 --- a/src/main/java/org/ids_mannheim/Utils.java +++ b/src/main/java/org/ids_mannheim/Utils.java
@@ -55,5 +55,8 @@ .replaceAll("^(\\d+)\t>\t--\t[^\t]+", "$1\t>\t>\t\\$("); } + public static boolean isPunctuation(String token, String lemma, String pos) { + return pos.startsWith("$") || token.equals("\"") || token.equals("'") || token.equals("<") || token.equals(">"); + } }