blob: 32333d7d314b3c52a017209fd6ee64ef79866ce4 [file] [log] [blame]
Marc Kupietzab0a7332024-04-26 13:25:16 +02001#!/usr/bin/env bash
2TESTDIR=$(dirname $0)
3ASSERTSH=${TESTDIR}/assert.sh
Marc Kupietz73a26bf2024-04-26 17:36:44 +02004# set -e
Marc Kupietzab0a7332024-04-26 13:25:16 +02005. ${ASSERTSH}
Marc Kupietz73a26bf2024-04-26 17:36:44 +02006ERRORS=0
Marc Kupietzed3cc3a2024-04-27 16:05:49 +02007PASSED=0
Marc Kupietzab0a7332024-04-26 13:25:16 +02008TEXTS=6
9I5_FILE=target/dnb18.i5.xml
10if [ ! -f "$I5_FILE" ]; then
11 log_failure "File $I5_FILE does not exist"
12 exit 1
13fi
14
15
16observed=$(xmlstarlet sel --net -t -v "count(//idsText)" $I5_FILE)
Marc Kupietzed3cc3a2024-04-27 16:05:49 +020017assert_eq "$observed" "$TEXTS" "$I5_FILE contains $TEXTS idsText elements"
Marc Kupietzab0a7332024-04-26 13:25:16 +020018
19observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/sourceDesc/biblStruct/monogr/h.author[normalize-space(.)])" $I5_FILE)
Marc Kupietzed3cc3a2024-04-27 16:05:49 +020020assert_eq "$observed" "$TEXTS" "$I5_FILE contains $TEXTS non-empty h.author elements"
Marc Kupietz41c42382024-04-26 17:02:53 +020021
22observed=$(xmlstarlet sel --net -t -v "/idsCorpus/idsHeader/fileDesc/titleStmt/c.title" target/dnb13.i5.xml)
Marc Kupietzed3cc3a2024-04-27 16:05:49 +020023assert_eq "$observed" "Deutsche Nationalbibliothek: Belletristik 2013" "c.title contains yeaar"
Marc Kupietzab0a7332024-04-26 13:25:16 +020024
Marc Kupietzfddbb512024-04-26 16:49:02 +020025observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/sourceDesc/biblStruct/monogr/h.author[contains(., '[')])" target/dnb13.i5.xml)
Marc Kupietzed3cc3a2024-04-27 16:05:49 +020026assert_eq "$observed" "0" "authors do not contain []"
Marc Kupietz73a26bf2024-04-26 17:36:44 +020027
Marc Kupietzeaa90132024-04-26 18:14:40 +020028observed=$(xmlstarlet sel --net -t -v "/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/sourceDesc/biblStruct/monogr/editor[@role='translator'][1]" target/dnb13.i5.xml)
Marc Kupietzed3cc3a2024-04-27 16:05:49 +020029assert_eq "$observed" "Zwack, Heinz" "translator is correctly identified"
Marc Kupietzeaa90132024-04-26 18:14:40 +020030
Marc Kupietz54ec28b2024-04-27 10:07:06 +020031observed=$(grep -Ec '^Copyright' target/dnb13.i5.xml)
Marc Kupietzed3cc3a2024-04-27 16:05:49 +020032assert_eq "$observed" "2" "spaces at <br> elements are inserted correctly"
33
Marc Kupietz9d87e9d2024-05-05 15:56:50 +020034observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/profileDesc/textDesc/textType[contains(., 'Arztroman')])" target/dnb18.i5.xml)
35assert_gt "$observed" "0" "at least one textType contains 'Arztroman'"
36
37observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/profileDesc/textDesc/textType[normalize-space(.)=''])" target/dnb18.i5.xml)
38assert_eq "$observed" "0" "no empty textType elements"
39
40observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/profileDesc/textDesc/textTypeRef[normalize-space(.)=''])" target/dnb18.i5.xml)
41assert_eq "$observed" "0" "no empty textTypeRef elements"
42
Marc Kupietz8653ed52024-07-25 10:59:15 +020043min_expected=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText)" target/dnb18.i5.xml)
44observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/publicationStmt/idno)" target/dnb18.i5.xml)
45assert_gt "$observed" "$min_expected" "exvery text has more than one idno element"
46
Marc Kupietzc4ea4092024-07-25 11:00:00 +020047observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/publicationStmt/idno[@type='URN'])" target/dnb18.i5.xml)
48assert_eq "$observed" "$min_expected" "exvery text has one idno element of type URN"
49
50observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/publicationStmt/idno[@type='URN' and @rend='URN'])" target/dnb18.i5.xml)
51assert_eq "$observed" "$min_expected" "every idno element of type URN also has attribute rend='URN'"
52
Marc Kupietzed3cc3a2024-04-27 16:05:49 +020053exit_with_test_summary
Marc Kupietz54ec28b2024-04-27 10:07:06 +020054
Marc Kupietzeaa90132024-04-26 18:14:40 +020055