| Marc Kupietz | ab0a733 | 2024-04-26 13:25:16 +0200 | [diff] [blame] | 1 | #!/usr/bin/env bash |
| 2 | TESTDIR=$(dirname $0) |
| 3 | ASSERTSH=${TESTDIR}/assert.sh |
| Marc Kupietz | 73a26bf | 2024-04-26 17:36:44 +0200 | [diff] [blame] | 4 | # set -e |
| Marc Kupietz | ab0a733 | 2024-04-26 13:25:16 +0200 | [diff] [blame] | 5 | . ${ASSERTSH} |
| Marc Kupietz | 73a26bf | 2024-04-26 17:36:44 +0200 | [diff] [blame] | 6 | ERRORS=0 |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 7 | PASSED=0 |
| Marc Kupietz | cff63c0 | 2024-09-28 23:11:18 +0200 | [diff] [blame] | 8 | TEXTS=7 |
| Marc Kupietz | ab0a733 | 2024-04-26 13:25:16 +0200 | [diff] [blame] | 9 | I5_FILE=target/dnb18.i5.xml |
| 10 | if [ ! -f "$I5_FILE" ]; then |
| 11 | log_failure "File $I5_FILE does not exist" |
| 12 | exit 1 |
| 13 | fi |
| 14 | |
| 15 | |
| 16 | observed=$(xmlstarlet sel --net -t -v "count(//idsText)" $I5_FILE) |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 17 | assert_eq "$observed" "$TEXTS" "$I5_FILE contains $TEXTS idsText elements" |
| Marc Kupietz | ab0a733 | 2024-04-26 13:25:16 +0200 | [diff] [blame] | 18 | |
| 19 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/sourceDesc/biblStruct/monogr/h.author[normalize-space(.)])" $I5_FILE) |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 20 | assert_eq "$observed" "$TEXTS" "$I5_FILE contains $TEXTS non-empty h.author elements" |
| Marc Kupietz | 41c4238 | 2024-04-26 17:02:53 +0200 | [diff] [blame] | 21 | |
| 22 | observed=$(xmlstarlet sel --net -t -v "/idsCorpus/idsHeader/fileDesc/titleStmt/c.title" target/dnb13.i5.xml) |
| Marc Kupietz | 18c6159 | 2024-07-26 13:52:46 +0200 | [diff] [blame] | 23 | assert_eq "$observed" "Deutsche Nationalbibliothek: Belletristik 2013 (DeLiKo@DNB)" "c.title contains year and DeLiKo@DNB" |
| Marc Kupietz | ab0a733 | 2024-04-26 13:25:16 +0200 | [diff] [blame] | 24 | |
| Marc Kupietz | fddbb51 | 2024-04-26 16:49:02 +0200 | [diff] [blame] | 25 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/sourceDesc/biblStruct/monogr/h.author[contains(., '[')])" target/dnb13.i5.xml) |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 26 | assert_eq "$observed" "0" "authors do not contain []" |
| Marc Kupietz | 73a26bf | 2024-04-26 17:36:44 +0200 | [diff] [blame] | 27 | |
| Marc Kupietz | eaa9013 | 2024-04-26 18:14:40 +0200 | [diff] [blame] | 28 | observed=$(xmlstarlet sel --net -t -v "/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/sourceDesc/biblStruct/monogr/editor[@role='translator'][1]" target/dnb13.i5.xml) |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 29 | assert_eq "$observed" "Zwack, Heinz" "translator is correctly identified" |
| Marc Kupietz | eaa9013 | 2024-04-26 18:14:40 +0200 | [diff] [blame] | 30 | |
| Marc Kupietz | 54ec28b | 2024-04-27 10:07:06 +0200 | [diff] [blame] | 31 | observed=$(grep -Ec '^Copyright' target/dnb13.i5.xml) |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 32 | assert_eq "$observed" "2" "spaces at <br> elements are inserted correctly" |
| 33 | |
| Marc Kupietz | 9d87e9d | 2024-05-05 15:56:50 +0200 | [diff] [blame] | 34 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/profileDesc/textDesc/textType[contains(., 'Arztroman')])" target/dnb18.i5.xml) |
| 35 | assert_gt "$observed" "0" "at least one textType contains 'Arztroman'" |
| 36 | |
| 37 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/profileDesc/textDesc/textType[normalize-space(.)=''])" target/dnb18.i5.xml) |
| 38 | assert_eq "$observed" "0" "no empty textType elements" |
| 39 | |
| 40 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/profileDesc/textDesc/textTypeRef[normalize-space(.)=''])" target/dnb18.i5.xml) |
| 41 | assert_eq "$observed" "0" "no empty textTypeRef elements" |
| 42 | |
| Marc Kupietz | 8653ed5 | 2024-07-25 10:59:15 +0200 | [diff] [blame] | 43 | min_expected=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText)" target/dnb18.i5.xml) |
| 44 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/publicationStmt/idno)" target/dnb18.i5.xml) |
| 45 | assert_gt "$observed" "$min_expected" "exvery text has more than one idno element" |
| 46 | |
| Marc Kupietz | c4ea409 | 2024-07-25 11:00:00 +0200 | [diff] [blame] | 47 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/publicationStmt/idno[@type='URN'])" target/dnb18.i5.xml) |
| 48 | assert_eq "$observed" "$min_expected" "exvery text has one idno element of type URN" |
| 49 | |
| Marc Kupietz | 7582d57 | 2024-07-25 16:55:20 +0200 | [diff] [blame] | 50 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/publicationStmt/idno[@type='URL' and starts-with(@rend, 'URN;urn:nbn:de:')])" target/dnb18.i5.xml) |
| 51 | assert_eq "$observed" "$min_expected" "for every idno element of type URN, there is also an URL element with @rend starting with 'URN;urn:nbn:de:'" |
| Marc Kupietz | c4ea409 | 2024-07-25 11:00:00 +0200 | [diff] [blame] | 52 | |
| Marc Kupietz | e43c720 | 2024-09-22 12:53:41 +0200 | [diff] [blame] | 53 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/sourceDesc/biblStruct/note[@type='award'][@subtype='Buchpreis deutscher_buchpreis'])" target/dnb13.i5.xml) |
| Rebecca Wilm | 2eab634 | 2024-08-04 13:14:17 +0200 | [diff] [blame] | 54 | assert_eq "$observed" "1" "award is correctly identified'" |
| 55 | |
| 56 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/sourceDesc/biblStruct/note[@type='award'])" target/dnb18.i5.xml) |
| 57 | assert_eq "$observed" "0" "no falsely identified awards'" |
| 58 | |
| Marc Kupietz | cff63c0 | 2024-09-28 23:11:18 +0200 | [diff] [blame] | 59 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/publicationStmt/idno[@type='IDN' and .='8999999999'])" target/dnb18.i5.xml) |
| 60 | assert_eq "$observed" "1" "epub 8... id and without API metadata is transformed" |
| 61 | |
| 62 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc[publicationStmt/idno[@type='IDN' and .='8999999999']]/sourceDesc//h.title[.='Herzblut'])" target/dnb18.i5.xml) |
| 63 | assert_eq "$observed" "1" "static metadata for epub with 8... id is correctly retrieved" |
| 64 | |
| Marc Kupietz | 13348b5 | 2024-09-29 14:34:50 +0200 | [diff] [blame] | 65 | xmllint -noout xslt/static_metadata.xml |
| 66 | assert_eq "$?" "0" "static_metadata.xml is well-formed" |
| 67 | |
| Marc Kupietz | f475f22 | 2024-09-30 15:01:06 +0200 | [diff] [blame^] | 68 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc[publicationStmt/idno[@type='IDN' and .='8000000009']]/sourceDesc//h.title[.='Ein Hund kam in die Küche'])" target/dnb23.i5.xml) |
| 69 | assert_eq "$observed" "1" "static metadata is also used as fallback by ISBN and via symbolic link" |
| 70 | |
| 71 | observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/fileDesc/sourceDesc/biblStruct/note[@type='award'][@subtype='Buchpreis deutscher_buchpreis'])" target/dnb23.i5.xml) |
| 72 | assert_eq "$observed" "1" "award is also correctly identified for static metadata epubs" |
| 73 | |
| Marc Kupietz | ed3cc3a | 2024-04-27 16:05:49 +0200 | [diff] [blame] | 74 | exit_with_test_summary |
| Marc Kupietz | 54ec28b | 2024-04-27 10:07:06 +0200 | [diff] [blame] | 75 | |
| Marc Kupietz | eaa9013 | 2024-04-26 18:14:40 +0200 | [diff] [blame] | 76 | |