List all files to ingest for KorAP
Change-Id: If931858772db836df44c61238e260692d5f5b2a8
diff --git a/list-dereko-korap-release.yml b/list-dereko-korap-release.yml
new file mode 100644
index 0000000..2f90a5a
--- /dev/null
+++ b/list-dereko-korap-release.yml
@@ -0,0 +1,34 @@
+- name: List DeReKo-KorAP Releases
+ hosts: 10.0.10.55
+ user: korap
+ tasks:
+ - name: Receive new relevant list from database
+ community.mysql.mysql_query:
+ login_host: klinux10
+ login_db: corpora
+ login_user: viewer
+ query: >
+ SELECT concat('/vol/corpora/DeReKo/incoming/KorAP/zip/', replace(name,'.i5.xml','.zip')) as cname
+ FROM fileMeta2024I, basename
+ WHERE (fileMeta2024I.name LIKE 'pp-%' or fileMeta2024I.rsr = 1 or
+ (fileMeta2024I.name like '%2_.i5.xml' and (basename.rsr=1 or basename.inkorap=1))) and
+ fileMeta2024I.base=basename.id
+ ORDER by name
+ register: sqlresult
+ - name: Find all changed from the last 200 days
+ ansible.builtin.find:
+ paths:
+ - "/vol/corpora/DeReKo/incoming/KorAP/zip/"
+ age: "-200d"
+ age_stamp: "mtime"
+ use_regex: true
+ follow: true
+ file_type: "any"
+ patterns:
+ - "^[^\\.]*\\.zip$"
+ register: newfiles
+ - name: Compare both lists and show intersection
+ ansible.legacy.copy:
+ content: "{{ newfiles.files | map(attribute='path') | sort() | intersect( sqlresult.query_result[0] | map(attribute='cname') ) | join('\n') }}"
+ dest: dereko-ingestion.txt
+ delegate_to: localhost