bansp | 5e2d1c0 | 2022-03-10 04:51:40 +0100 | [diff] [blame] | 1 | <?xml version="1.0" encoding="UTF-8"?> |
| 2 | <idsHeader type="corpus" |
| 3 | pattern="text" |
| 4 | status="new" |
| 5 | version="1.1" |
| 6 | TEIform="teiHeader"> |
| 7 | <fileDesc> |
| 8 | <titleStmt> |
| 9 | <korpusSigle>NKJP</korpusSigle> |
bansp | 9103aab | 2022-03-19 05:10:21 +0100 | [diff] [blame^] | 10 | <c.title xml:lang="pl">Narodowy Korpus Języka Polskiego -- podkorpus zawierający 1 milion słów</c.title> |
| 11 | <c.title xml:lang="en">National Corpus of Polish -- the 1 million word subcorpus</c.title> |
bansp | 5e2d1c0 | 2022-03-10 04:51:40 +0100 | [diff] [blame] | 12 | </titleStmt> |
bansp | 9103aab | 2022-03-19 05:10:21 +0100 | [diff] [blame^] | 13 | <publicationStmt> |
| 14 | <availability status="unknown"> |
| 15 | <p>This 1 million word subcorpus of the National Corpus of Polish is available publicly for free.</p> |
| 16 | </availability> |
| 17 | </publicationStmt> |
bansp | 5e2d1c0 | 2022-03-10 04:51:40 +0100 | [diff] [blame] | 18 | </fileDesc> |
bansp | 9103aab | 2022-03-19 05:10:21 +0100 | [diff] [blame^] | 19 | <encodingDesc> |
| 20 | <classDecl> |
| 21 | <taxonomy xml:id="taxonomy-NKJP-type"> |
| 22 | <category xml:id="typ_lit"> |
| 23 | <desc xml:lang="pl">literatura piękna</desc> |
| 24 | <desc xml:lang="en">fiction</desc> |
| 25 | </category> |
| 26 | <category xml:id="typ_fakt"> |
| 27 | <desc xml:lang="pl">literatura faktu</desc> |
| 28 | <desc xml:lang="en">non-fiction novel</desc> |
| 29 | </category> |
| 30 | <category xml:id="typ_publ"> |
| 31 | <desc xml:lang="pl">publicystyka i wiadomości prasowe</desc> |
| 32 | <desc xml:lang="en">journalism</desc> |
| 33 | </category> |
| 34 | <category xml:id="typ_nd"> |
| 35 | <desc xml:lang="pl">naukowo-dydaktyczny</desc> |
| 36 | <desc xml:lang="en">academic writing</desc> |
| 37 | </category> |
| 38 | <category xml:id="typ_inf-por"> |
| 39 | <desc xml:lang="pl">informacyjno-poradnikowy</desc> |
| 40 | <desc xml:lang="en">informative and instructive writing</desc> |
| 41 | </category> |
| 42 | <category xml:id="typ_nklas"> |
| 43 | <desc xml:lang="pl">książka niebeletrystyczna niesklasyfikowana</desc> |
| 44 | <desc xml:lang="en">unclassified non-fiction book</desc> |
| 45 | </category> |
| 46 | <category xml:id="typ_inne_pisane"> |
| 47 | <desc xml:lang="pl">inne teksty pisane</desc> |
| 48 | <desc xml:lang="en">miscellaneous (written)</desc> |
| 49 | <category xml:id="typ_urzed"> |
| 50 | <desc xml:lang="pl">urzędowo-kancelaryjny</desc> |
| 51 | <desc xml:lang="en">legal and official</desc> |
| 52 | </category> |
| 53 | </category> |
| 54 | <category xml:id="typ_internet"> |
| 55 | <desc xml:lang="pl">Internet</desc> |
| 56 | <desc xml:lang="en">Internet</desc> |
| 57 | </category> |
| 58 | <category xml:id="typ_konwers"> |
| 59 | <desc xml:lang="pl">konwersacyjne</desc> |
| 60 | <desc xml:lang="en">conversational</desc> |
| 61 | </category> |
| 62 | <category xml:id="typ_media"> |
| 63 | <desc xml:lang="pl">mówione medialne</desc> |
| 64 | <desc xml:lang="en">spoken from the media</desc> |
| 65 | </category> |
| 66 | <category xml:id="typ_qmow"> |
| 67 | <desc xml:lang="pl">quasi-mówione</desc> |
| 68 | <desc xml:lang="en">quasi-spoken</desc> |
| 69 | </category> |
| 70 | </taxonomy> |
| 71 | </classDecl> |
| 72 | <classDecl> |
| 73 | <taxonomy xml:id="taxonomy-NKJP-channel"> |
| 74 | <category xml:id="kanal_prasa"> |
| 75 | <desc xml:lang="pl">prasa</desc> |
| 76 | <desc xml:lang="en">press</desc> |
| 77 | <category xml:id="kanal_prasa_dziennik"> |
| 78 | <desc xml:lang="pl">dziennik</desc> |
| 79 | <desc xml:lang="en">daily</desc> |
| 80 | </category> |
| 81 | <category xml:id="kanal_prasa_tygodnik"> |
| 82 | <desc xml:lang="pl">tygodnik</desc> |
| 83 | <desc xml:lang="en">weekly</desc> |
| 84 | </category> |
| 85 | <category xml:id="kanal_prasa_miesiecznik"> |
| 86 | <desc xml:lang="pl">miesiecznik</desc> |
| 87 | <desc xml:lang="en">monthly</desc> |
| 88 | </category> |
| 89 | <category xml:id="kanal_prasa_inne"> |
| 90 | <desc xml:lang="pl">inne prasowe</desc> |
| 91 | <desc xml:lang="en">other press</desc> |
| 92 | </category> |
| 93 | </category> |
| 94 | <category xml:id="kanal_ksiazka"> |
| 95 | <desc xml:lang="pl">książka</desc> |
| 96 | <desc xml:lang="en">book</desc> |
| 97 | </category> |
| 98 | <category xml:id="kanal_internet"> |
| 99 | <desc xml:lang="pl">Internet</desc> |
| 100 | <desc xml:lang="en">internet</desc> |
| 101 | </category> |
| 102 | <category xml:id="kanal_mowiony"> |
| 103 | <desc xml:lang="pl">mówiony</desc> |
| 104 | <desc xml:lang="en">spoken</desc> |
| 105 | </category> |
| 106 | <category xml:id="kanal_ulotka"> |
| 107 | <desc xml:lang="pl">ulotki, ogłoszenia, reklamy</desc> |
| 108 | <desc xml:lang="en">leaflets, announcemnets, ads</desc> |
| 109 | </category> |
| 110 | </taxonomy> |
| 111 | </classDecl> |
| 112 | <classDecl> |
| 113 | <taxonomy xml:id="ukd"> |
| 114 | <bibl> |
| 115 | <title xml:lang="pl">Uniwersalna Klasyfikacja Dziesiętna</title> |
| 116 | <title xml:lang="en">Universal Decimal Classification</title> |
| 117 | <edition>UDC-P058</edition> |
| 118 | </bibl> |
| 119 | </taxonomy> |
| 120 | </classDecl> |
| 121 | <classDecl> |
| 122 | <taxonomy xml:id="bn"> |
| 123 | <bibl> |
| 124 | <title xml:lang="pl">Klasyfikacja Biblioteki Narodowej</title> |
| 125 | <title xml:lang="en">Polish National Library Classification</title> |
| 126 | <edition>Słownik języka haseł przedmiotowych Biblioteki Narodowej. Wyd. 5 popr. i rozsz., stan na dzień 31 grudnia 2004 roku.</edition> |
| 127 | </bibl> |
| 128 | </taxonomy> |
| 129 | </classDecl> |
| 130 | </encodingDesc> |
bansp | 5e2d1c0 | 2022-03-10 04:51:40 +0100 | [diff] [blame] | 131 | </idsHeader> |