Akron | 3538d8f | 2024-03-14 17:33:45 +0100 | [diff] [blame] | 1 | - name: List DeReKo-KorAP Releases |
Akron | 6c26c6a | 2024-03-19 14:19:53 +0100 | [diff] [blame^] | 2 | hosts: worker-03 |
| 3 | run_once: true |
Akron | 3538d8f | 2024-03-14 17:33:45 +0100 | [diff] [blame] | 4 | tasks: |
Marc Kupietz | 4226062 | 2024-03-15 11:33:18 +0100 | [diff] [blame] | 5 | - name: Get current year |
| 6 | set_fact: |
| 7 | current_year: "{{ ansible_date_time.year }}" |
| 8 | |
Akron | 3538d8f | 2024-03-14 17:33:45 +0100 | [diff] [blame] | 9 | - name: Receive new relevant list from database |
| 10 | community.mysql.mysql_query: |
| 11 | login_host: klinux10 |
| 12 | login_db: corpora |
| 13 | login_user: viewer |
| 14 | query: > |
| 15 | SELECT concat('/vol/corpora/DeReKo/incoming/KorAP/zip/', replace(name,'.i5.xml','.zip')) as cname |
Marc Kupietz | 4226062 | 2024-03-15 11:33:18 +0100 | [diff] [blame] | 16 | FROM fileMeta{{ current_year }}I, basename |
| 17 | WHERE (fileMeta{{ current_year }}I.name LIKE 'pp-%' or fileMeta{{ current_year }}I.rsr = 1 or |
| 18 | (fileMeta{{ current_year }}I.name like '%2_.i5.xml' and (basename.rsr=1 or basename.inkorap=1))) and |
| 19 | fileMeta{{ current_year }}I.base=basename.id |
Akron | 3538d8f | 2024-03-14 17:33:45 +0100 | [diff] [blame] | 20 | ORDER by name |
| 21 | register: sqlresult |
Marc Kupietz | 4226062 | 2024-03-15 11:33:18 +0100 | [diff] [blame] | 22 | |
Akron | 3538d8f | 2024-03-14 17:33:45 +0100 | [diff] [blame] | 23 | - name: Find all changed from the last 200 days |
| 24 | ansible.builtin.find: |
| 25 | paths: |
| 26 | - "/vol/corpora/DeReKo/incoming/KorAP/zip/" |
| 27 | age: "-200d" |
| 28 | age_stamp: "mtime" |
| 29 | use_regex: true |
| 30 | follow: true |
| 31 | file_type: "any" |
| 32 | patterns: |
| 33 | - "^[^\\.]*\\.zip$" |
| 34 | register: newfiles |
Marc Kupietz | 4226062 | 2024-03-15 11:33:18 +0100 | [diff] [blame] | 35 | |
Akron | 3538d8f | 2024-03-14 17:33:45 +0100 | [diff] [blame] | 36 | - name: Compare both lists and show intersection |
| 37 | ansible.legacy.copy: |
| 38 | content: "{{ newfiles.files | map(attribute='path') | sort() | intersect( sqlresult.query_result[0] | map(attribute='cname') ) | join('\n') }}" |
| 39 | dest: dereko-ingestion.txt |
| 40 | delegate_to: localhost |