Update Readme
Change-Id: I9b2f6ac5f5b8db1384e2c4ff21e4b8ff6aff88b0
diff --git a/Readme.pod b/Readme.pod
index b214dce..2b1ae07 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -33,7 +33,8 @@
=item
-B<mandatory>: text-header with integrated textsigle, text-body
+B<mandatory>: text-header with integrated textsigle
+(or convertable identifier), text-body
=item
@@ -105,6 +106,16 @@
Print version information.
+=item B<--tokenizer-korap|-tk>
+
+Use the standard KorAP/DeReKo tokenizer.
+
+=item B<--tokenizer-internal|-ti>
+
+Tokenize the data using two embedded tokenizers,
+that will take an I<aggressive> and a I<conservative>
+approach.
+
=item B<--tokenizer-call|-tc>
Call an external tokenizer process, that will tokenize
@@ -126,16 +137,6 @@
$ --no-tokens --sentence-positions -' - \
$ > corpus.korapxml.zip
-=item B<--tokenizer-korap|-tk>
-
-Use the standard KorAP/DeReKo tokenizer.
-
-=item B<--tokenizer-internal|-ti>
-
-Tokenize the data using two embedded tokenizers,
-that will take an I<Aggressive> and a I<conservative>
-approach.
-
=item B<--skip-inline-tokens>
Boolean flag indicating that inline tokens should not
@@ -154,7 +155,7 @@
=item B<--xmlid-to-textsigle> <from-regex>@<to-c/to-d/to-t>
-Expects a regular replacement expression (separated by a B<@> between the
+Expects a regular replacement expression (separated by B<@> between the
search and the replacement) to convert text id attributes to text sigles
with three parts (separated by B</>).
@@ -164,8 +165,8 @@
--xmlid-to-textsigle 'ICC.German\.([^.]+\.[^.]+)\.(.+)@ICCGER/$1/$2' \
-tk - < t/data/icc_german_sample.p5.xml
-Converts text id `ICC.German.DeReKo.WPD17.G11.00238' to
-sigle `ICCGER/DeReKo.WPD17/G11.00238'.
+Converts text id C<ICC.German.DeReKo.WPD17.G11.00238> to
+sigle C<ICCGER/DeReKo.WPD17/G11.00238>.
=item B<--inline-tokens> <foundry>#[<file>]
@@ -244,7 +245,7 @@
=head1 COPYRIGHT AND LICENSE
-Copyright (C) 2021, L<IDS Mannheim|https://www.ids-mannheim.de/>
+Copyright (C) 2021-2023, L<IDS Mannheim|https://www.ids-mannheim.de/>
Author: Peter Harders