| Marc Kupietz | ab0a733 | 2024-04-26 13:25:16 +0200 | [diff] [blame] | 1 | #!/usr/bin/env bash |
| 2 | TESTDIR=$(dirname $0) |
| 3 | ASSERTSH=${TESTDIR}/assert.sh |
| Marc Kupietz | 73a26bf | 2024-04-26 17:36:44 +0200 | [diff] [blame] | 4 | # set -e |
| Marc Kupietz | ab0a733 | 2024-04-26 13:25:16 +0200 | [diff] [blame] | 5 | . ${ASSERTSH} |
| Marc Kupietz | 73a26bf | 2024-04-26 17:36:44 +0200 | [diff] [blame] | 6 | ERRORS=0 |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 7 | PASSED=0 |
| Marc Kupietz | ab0a733 | 2024-04-26 13:25:16 +0200 | [diff] [blame] | 8 | TEXTS=6 |
| 9 | I5_FILE=target/dnb18.i5.xml |
| 10 | if [ ! -f "$I5_FILE" ]; then |
| 11 | log_failure "File $I5_FILE does not exist" |
| 12 | exit 1 |
| 13 | fi |
| 14 | |
| 15 | |
| 16 | observed=$(xmlstarlet sel --net -t -v "count(//idsText)" $I5_FILE) |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 17 | assert_eq "$observed" "$TEXTS" "$I5_FILE contains $TEXTS idsText elements" |
| Marc Kupietz | ab0a733 | 2024-04-26 13:25:16 +0200 | [diff] [blame] | 18 | |
| 19 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/sourceDesc/biblStruct/monogr/h.author[normalize-space(.)])" $I5_FILE) |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 20 | assert_eq "$observed" "$TEXTS" "$I5_FILE contains $TEXTS non-empty h.author elements" |
| Marc Kupietz | 41c4238 | 2024-04-26 17:02:53 +0200 | [diff] [blame] | 21 | |
| 22 | observed=$(xmlstarlet sel --net -t -v "/idsCorpus/idsHeader/fileDesc/titleStmt/c.title" target/dnb13.i5.xml) |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 23 | assert_eq "$observed" "Deutsche Nationalbibliothek: Belletristik 2013" "c.title contains yeaar" |
| Marc Kupietz | ab0a733 | 2024-04-26 13:25:16 +0200 | [diff] [blame] | 24 | |
| Marc Kupietz | fddbb51 | 2024-04-26 16:49:02 +0200 | [diff] [blame] | 25 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/sourceDesc/biblStruct/monogr/h.author[contains(., '[')])" target/dnb13.i5.xml) |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 26 | assert_eq "$observed" "0" "authors do not contain []" |
| Marc Kupietz | 73a26bf | 2024-04-26 17:36:44 +0200 | [diff] [blame] | 27 | |
| Marc Kupietz | eaa9013 | 2024-04-26 18:14:40 +0200 | [diff] [blame] | 28 | observed=$(xmlstarlet sel --net -t -v "/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/sourceDesc/biblStruct/monogr/editor[@role='translator'][1]" target/dnb13.i5.xml) |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 29 | assert_eq "$observed" "Zwack, Heinz" "translator is correctly identified" |
| Marc Kupietz | eaa9013 | 2024-04-26 18:14:40 +0200 | [diff] [blame] | 30 | |
| Marc Kupietz | 54ec28b | 2024-04-27 10:07:06 +0200 | [diff] [blame] | 31 | observed=$(grep -Ec '^Copyright' target/dnb13.i5.xml) |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 32 | assert_eq "$observed" "2" "spaces at <br> elements are inserted correctly" |
| 33 | |
| Marc Kupietz | 9d87e9d | 2024-05-05 15:56:50 +0200 | [diff] [blame] | 34 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/profileDesc/textDesc/textType[contains(., 'Arztroman')])" target/dnb18.i5.xml) |
| 35 | assert_gt "$observed" "0" "at least one textType contains 'Arztroman'" |
| 36 | |
| 37 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/profileDesc/textDesc/textType[normalize-space(.)=''])" target/dnb18.i5.xml) |
| 38 | assert_eq "$observed" "0" "no empty textType elements" |
| 39 | |
| 40 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/profileDesc/textDesc/textTypeRef[normalize-space(.)=''])" target/dnb18.i5.xml) |
| 41 | assert_eq "$observed" "0" "no empty textTypeRef elements" |
| 42 | |
| Marc Kupietz | 8653ed5 | 2024-07-25 10:59:15 +0200 | [diff] [blame] | 43 | min_expected=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText)" target/dnb18.i5.xml) |
| 44 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/publicationStmt/idno)" target/dnb18.i5.xml) |
| 45 | assert_gt "$observed" "$min_expected" "exvery text has more than one idno element" |
| 46 | |
| Marc Kupietz | c4ea409 | 2024-07-25 11:00:00 +0200 | [diff] [blame] | 47 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/publicationStmt/idno[@type='URN'])" target/dnb18.i5.xml) |
| 48 | assert_eq "$observed" "$min_expected" "exvery text has one idno element of type URN" |
| 49 | |
| Marc Kupietz | 7582d57 | 2024-07-25 16:55:20 +0200 | [diff] [blame^] | 50 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/publicationStmt/idno[@type='URL' and starts-with(@rend, 'URN;urn:nbn:de:')])" target/dnb18.i5.xml) |
| 51 | assert_eq "$observed" "$min_expected" "for every idno element of type URN, there is also an URL element with @rend starting with 'URN;urn:nbn:de:'" |
| Marc Kupietz | c4ea409 | 2024-07-25 11:00:00 +0200 | [diff] [blame] | 52 | |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 53 | exit_with_test_summary |
| Marc Kupietz | 54ec28b | 2024-04-27 10:07:06 +0200 | [diff] [blame] | 54 | |
| Marc Kupietz | eaa9013 | 2024-04-26 18:14:40 +0200 | [diff] [blame] | 55 | |