Separated regex grammar rules for reuse
Change-Id: Ife6c9c3998b0c0a1a1017a8f735000c2eca66790
Reviewed-on: https://korap.ids-mannheim.de/gerrit/c/KorAP/Koral/+/6023
Reviewed-by: Nils Diewald <nils@diewald-online.de>
diff --git a/Changes b/Changes
index 7523e71..967aee3 100644
--- a/Changes
+++ b/Changes
@@ -2,8 +2,9 @@
- [feature] Improve regex treatment of negative
character classes (diewald)
- [bugfix] Improve quantifiers in Regex (diewald)
+ - [feature] Separated regex grammar for reuse (diewald)
-0.38.1 2022-01-07
+0.38.1 2022-10-05
- [security] Updated log4j (diewald)
- Fixed Annis OR group (resolved #96)
- [security] Updated jackson (diewald)
diff --git a/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4 b/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4
index 8871e98..e54caa7 100644
--- a/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4
+++ b/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4
@@ -2,7 +2,7 @@
@header {package de.ids_mannheim.korap.query.parse.poliqarpplus;}
-
+import Regex;
options
{
@@ -55,7 +55,6 @@
/** Simple strings and Simple queries */
WS : [ \t] -> channel(HIDDEN);
-fragment FOCC : '{' WS* ( [0-9]* WS* ',' WS* [0-9]+ | [0-9]+ WS* ','? ) WS* '}';
fragment NO_RE : ~[ \t/];
fragment ALPHABET : ~('\t' | ' ' | '/' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
| '(' | ')' | '|' | '"' | ',' | ':' | '\'' | '\\' | '!' | '=' | '~' | '&' | '^' | '<' | '>' | '#' );
@@ -92,20 +91,6 @@
SQUOTE : '\'';
HASH : '#';
-/* Regular expressions and Regex queries */
-fragment RE_symbol : ~('*' | '?' | '+' | '{' | '}' | '[' | ']'
- | '(' | ')' | '|' | '\\' | '"' | ':' | '\'' | '^' );
-fragment RE_esc : '\\' ('.' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
- | '(' | ')' | '|' | '\\' | '"' | ':' | '\'' | '^' );
-fragment RE_char : (RE_symbol | RE_esc );
-fragment RE_alter : ((RE_char | ('(' RE_expr ')') | RE_chgroup) '|' RE_expr )+;
-
-fragment RE_chgroup : '[' '^'? RE_char+ ']';
-fragment RE_quant : ('.' | RE_char | RE_chgroup | ( '(' RE_expr ')')) ('?' | '*' | '+' | FOCC) QMARK?;
-fragment RE_group : '(' RE_expr ')';
-fragment RE_expr : ('.' | RE_char | RE_alter | RE_chgroup | RE_quant | RE_group | '^' )+;
-fragment RE_dquote : ('"'|'„'|'“') (RE_expr | '\'' | ':' )* ('"'|'“'|'”');
-
REGEX : RE_dquote;
ESC_SQUOTE : BACKSLASH SQUOTE;
diff --git a/src/main/antlr/poliqarpplus/Regex.g4 b/src/main/antlr/poliqarpplus/Regex.g4
new file mode 100644
index 0000000..f83b395
--- /dev/null
+++ b/src/main/antlr/poliqarpplus/Regex.g4
@@ -0,0 +1,18 @@
+lexer grammar Regex;
+
+fragment RE_ws : [ \t];
+fragment RE_focc : '{' RE_ws* ( [0-9]* RE_ws* ',' RE_ws* [0-9]+ | [0-9]+ RE_ws* ','? ) RE_ws* '}';
+
+/* Regular expressions and Regex queries */
+fragment RE_symbol : ~('*' | '?' | '+' | '{' | '}' | '[' | ']'
+ | '(' | ')' | '|' | '\\' | '"' | ':' | '\'' | '^' );
+fragment RE_esc : '\\' ('.' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
+ | '(' | ')' | '|' | '\\' | '"' | ':' | '\'' | '^' );
+fragment RE_char : (RE_symbol | RE_esc );
+fragment RE_alter : ((RE_char | ('(' RE_expr ')') | RE_chgroup) '|' RE_expr )+;
+
+fragment RE_chgroup : '[' '^'? RE_char+ ']';
+fragment RE_quant : ('.' | RE_char | RE_chgroup | ( '(' RE_expr ')')) ('?' | '*' | '+' | RE_focc) '?'?;
+fragment RE_group : '(' RE_expr ')';
+fragment RE_expr : ('.' | RE_char | RE_alter | RE_chgroup | RE_quant | RE_group | '^' )+;
+fragment RE_dquote : ('"'|'„'|'“') (RE_expr | '\'' | ':' )* ('"'|'“'|'”');
\ No newline at end of file