Akron | 3538d8f | 2024-03-14 17:33:45 +0100 | [diff] [blame] | 1 | - name: List DeReKo-KorAP Releases |
Akron | 6c26c6a | 2024-03-19 14:19:53 +0100 | [diff] [blame] | 2 | hosts: worker-03 |
| 3 | run_once: true |
Akron | 3538d8f | 2024-03-14 17:33:45 +0100 | [diff] [blame] | 4 | tasks: |
Marc Kupietz | 4226062 | 2024-03-15 11:33:18 +0100 | [diff] [blame] | 5 | - name: Get current year |
| 6 | set_fact: |
| 7 | current_year: "{{ ansible_date_time.year }}" |
| 8 | |
Akron | 3538d8f | 2024-03-14 17:33:45 +0100 | [diff] [blame] | 9 | - name: Receive new relevant list from database |
| 10 | community.mysql.mysql_query: |
| 11 | login_host: klinux10 |
| 12 | login_db: corpora |
| 13 | login_user: viewer |
| 14 | query: > |
| 15 | SELECT concat('/vol/corpora/DeReKo/incoming/KorAP/zip/', replace(name,'.i5.xml','.zip')) as cname |
Marc Kupietz | 4226062 | 2024-03-15 11:33:18 +0100 | [diff] [blame] | 16 | FROM fileMeta{{ current_year }}I, basename |
| 17 | WHERE (fileMeta{{ current_year }}I.name LIKE 'pp-%' or fileMeta{{ current_year }}I.rsr = 1 or |
Marc Kupietz | bdd87e1 | 2024-03-25 19:50:24 +0100 | [diff] [blame^] | 18 | fileMeta{{ current_year }}I.main = 1 or |
Marc Kupietz | 4226062 | 2024-03-15 11:33:18 +0100 | [diff] [blame] | 19 | (fileMeta{{ current_year }}I.name like '%2_.i5.xml' and (basename.rsr=1 or basename.inkorap=1))) and |
| 20 | fileMeta{{ current_year }}I.base=basename.id |
Akron | 3538d8f | 2024-03-14 17:33:45 +0100 | [diff] [blame] | 21 | ORDER by name |
| 22 | register: sqlresult |
Marc Kupietz | 4226062 | 2024-03-15 11:33:18 +0100 | [diff] [blame] | 23 | |
Akron | 3538d8f | 2024-03-14 17:33:45 +0100 | [diff] [blame] | 24 | - name: Find all changed from the last 200 days |
| 25 | ansible.builtin.find: |
| 26 | paths: |
| 27 | - "/vol/corpora/DeReKo/incoming/KorAP/zip/" |
| 28 | age: "-200d" |
| 29 | age_stamp: "mtime" |
| 30 | use_regex: true |
| 31 | follow: true |
| 32 | file_type: "any" |
| 33 | patterns: |
| 34 | - "^[^\\.]*\\.zip$" |
| 35 | register: newfiles |
Marc Kupietz | 4226062 | 2024-03-15 11:33:18 +0100 | [diff] [blame] | 36 | |
Akron | 3538d8f | 2024-03-14 17:33:45 +0100 | [diff] [blame] | 37 | - name: Compare both lists and show intersection |
| 38 | ansible.legacy.copy: |
| 39 | content: "{{ newfiles.files | map(attribute='path') | sort() | intersect( sqlresult.query_result[0] | map(attribute='cname') ) | join('\n') }}" |
| 40 | dest: dereko-ingestion.txt |
| 41 | delegate_to: localhost |