| Akron | e4c2e41 | 2016-01-28 15:10:50 +0100 | [diff] [blame] | 1 | package KorAP::XML::Tokenizer::Span; |
| Nils Diewald | 2db9ad0 | 2013-10-29 19:26:43 +0000 | [diff] [blame] | 2 | use strict; |
| 3 | use warnings; |
| 4 | use Mojo::DOM; |
| Akron | b62d92a | 2020-03-01 16:32:00 +0100 | [diff] [blame] | 5 | use Clone; |
| Nils Diewald | 2db9ad0 | 2013-10-29 19:26:43 +0000 | [diff] [blame] | 6 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 7 | use constant { |
| 8 | O_START => 0, |
| 9 | O_END => 1, |
| 10 | P_START => 2, |
| 11 | P_END => 3, |
| 12 | ID => 4, |
| 13 | CONTENT => 5, |
| 14 | DOM => 6, |
| 15 | HASH => 7, |
| 16 | MILESTONE => 8, |
| 17 | PTI => 9 |
| 18 | }; |
| 19 | |
| Nils Diewald | 2db9ad0 | 2013-10-29 19:26:43 +0000 | [diff] [blame] | 20 | sub new { |
| 21 | bless [], shift; |
| 22 | }; |
| 23 | |
| Akron | e19aa14 | 2016-02-01 14:38:40 +0100 | [diff] [blame] | 24 | sub type { |
| 25 | 'span'; |
| 26 | }; |
| 27 | |
| Akron | 72e671f | 2020-08-04 11:35:40 +0200 | [diff] [blame] | 28 | sub set_o_start { |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 29 | $_[0]->[O_START] = $_[1]; |
| Akron | 72e671f | 2020-08-04 11:35:40 +0200 | [diff] [blame] | 30 | }; |
| 31 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 32 | sub get_o_start { |
| 33 | $_[0]->[O_START] |
| Nils Diewald | 2db9ad0 | 2013-10-29 19:26:43 +0000 | [diff] [blame] | 34 | }; |
| 35 | |
| Akron | 72e671f | 2020-08-04 11:35:40 +0200 | [diff] [blame] | 36 | sub set_o_end { |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 37 | $_[0]->[O_END] = $_[1]; |
| Akron | 72e671f | 2020-08-04 11:35:40 +0200 | [diff] [blame] | 38 | }; |
| 39 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 40 | sub get_o_end { |
| 41 | $_[0]->[O_END] |
| Nils Diewald | 2db9ad0 | 2013-10-29 19:26:43 +0000 | [diff] [blame] | 42 | }; |
| 43 | |
| Akron | 72e671f | 2020-08-04 11:35:40 +0200 | [diff] [blame] | 44 | sub set_p_start { |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 45 | $_[0]->[P_START] = $_[1]; |
| Akron | 72e671f | 2020-08-04 11:35:40 +0200 | [diff] [blame] | 46 | }; |
| 47 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 48 | sub get_p_start { |
| 49 | $_[0]->[P_START] |
| Nils Diewald | 2db9ad0 | 2013-10-29 19:26:43 +0000 | [diff] [blame] | 50 | }; |
| 51 | |
| Akron | 72e671f | 2020-08-04 11:35:40 +0200 | [diff] [blame] | 52 | sub set_p_end { |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 53 | $_[0]->[P_END] = $_[1]; |
| Akron | 72e671f | 2020-08-04 11:35:40 +0200 | [diff] [blame] | 54 | }; |
| 55 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 56 | sub get_p_end { |
| 57 | $_[0]->[P_END]; |
| Nils Diewald | 2db9ad0 | 2013-10-29 19:26:43 +0000 | [diff] [blame] | 58 | }; |
| 59 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 60 | sub set_id { |
| 61 | $_[0]->[ID] = $_[1]; |
| 62 | }; |
| 63 | |
| 64 | sub get_id { |
| 65 | $_[0]->[ID]; |
| 66 | }; |
| 67 | |
| 68 | sub set_content { |
| 69 | $_[0]->[CONTENT] = $_[1]; |
| 70 | }; |
| 71 | |
| 72 | sub get_content { |
| 73 | $_[0]->[CONTENT]; |
| Nils Diewald | 2db9ad0 | 2013-10-29 19:26:43 +0000 | [diff] [blame] | 74 | }; |
| 75 | |
| Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame] | 76 | sub dom { |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 77 | if ($_[0]->[DOM]) { |
| 78 | return $_[0]->[DOM]; |
| Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame] | 79 | } |
| 80 | else { |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 81 | my $c = Mojo::DOM->new($_[0]->[CONTENT]); |
| Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame] | 82 | $c->xml(1); |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 83 | return $_[0]->[DOM] = $c; |
| Nils Diewald | 2db9ad0 | 2013-10-29 19:26:43 +0000 | [diff] [blame] | 84 | }; |
| Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame] | 85 | }; |
| 86 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 87 | sub set_hash { |
| 88 | $_[0]->[HASH] = $_[1]; |
| Nils Diewald | 2db9ad0 | 2013-10-29 19:26:43 +0000 | [diff] [blame] | 89 | }; |
| 90 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 91 | sub get_hash { |
| 92 | return $_[0]->[HASH]; |
| Akron | 1622dd9 | 2015-12-09 22:34:26 +0100 | [diff] [blame] | 93 | }; |
| 94 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 95 | sub set_milestone { |
| 96 | $_[0]->[MILESTONE] = 1; |
| Akron | 1622dd9 | 2015-12-09 22:34:26 +0100 | [diff] [blame] | 97 | }; |
| 98 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 99 | sub get_milestone { |
| 100 | $_[0]->[MILESTONE] ? 1 : 0; |
| 101 | }; |
| 102 | |
| 103 | sub set_pti { |
| 104 | $_[0]->[PTI] = $_[1]; |
| 105 | }; |
| 106 | |
| 107 | sub get_pti { |
| 108 | $_[0]->[PTI]; |
| 109 | }; |
| Akron | 1622dd9 | 2015-12-09 22:34:26 +0100 | [diff] [blame] | 110 | |
| Nils Diewald | 7b84722 | 2014-04-23 11:14:00 +0000 | [diff] [blame] | 111 | sub to_string { |
| 112 | my $v = shift; |
| 113 | { |
| 114 | no warnings; |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 115 | return '[(' . $v->[O_START] . ':' . $v->[O_END] . '|' . |
| 116 | $v->[P_START] . ':' . $v->[P_END] . ')' . |
| 117 | $v->[ID] . '-' .$v->[CONTENT] . ']'; |
| Nils Diewald | 7b84722 | 2014-04-23 11:14:00 +0000 | [diff] [blame] | 118 | }; |
| 119 | }; |
| 120 | |
| Akron | b62d92a | 2020-03-01 16:32:00 +0100 | [diff] [blame] | 121 | # Clone the span |
| 122 | sub clone { |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 123 | # TODO: |
| 124 | # Optionally clone without DOM and treat hash specially |
| Akron | b62d92a | 2020-03-01 16:32:00 +0100 | [diff] [blame] | 125 | return Clone::clone(shift); |
| 126 | }; |
| 127 | |
| 128 | |
| Nils Diewald | 2db9ad0 | 2013-10-29 19:26:43 +0000 | [diff] [blame] | 129 | 1; |