add an excerpt from NKJP 1M v 1.2: infrastructure files in the root and the KOT subcorpus

Change-Id: I56a68249bb41821f6e5a6b33303182ed75ee73cf
diff --git a/nkjp2korap_sample1/header.dtd b/nkjp2korap_sample1/header.dtd
new file mode 100644
index 0000000..b660d33
--- /dev/null
+++ b/nkjp2korap_sample1/header.dtd
@@ -0,0 +1,133 @@
+<!-- dodatkowy schemat DTD dla nagłówka lokalnego (header.xml) 
+     definiuje on podzbiór schematu TEI dla nagłówków lokalnych
+     użycie: 
+     <!DOCTYPE teiHeader SYSTEM "header.dtd"> w header.xml
+
+     ver. 0.2, Piotr Bański, 04-05-2009
+     ver. 0.3, Adam Przepiórkowski, 13-05-2009
+     ver. 0.4, Adam Przepiórkowski, 21-05-2009
+     ver. 0.5, Piotr Bański and Adam Przepiórkowski, 23-05-2009
+     ver. 0.5.1, Adam Przepiórkowski, 23-05-2009
+          AP: Added @xml:lang to <topic>.
+     ver. 0.5.2, Adam Przepiórkowski, 25-09-2009
+          AP: Changed the possible values of nkjp:subcorpus.
+          AP: Possibly made various other modifications between
+              23-05-2009 and 25-09-2009. 
+     ver. 0.5.3, Adam Przepiórkowski, 14-02-2010
+          AP: Added @from and @to to <date>
+-->
+
+<!ELEMENT teiHeader (fileDesc, encodingDesc?,  profileDesc, revisionDesc)>
+<!ATTLIST teiHeader xmlns CDATA #FIXED "http://www.tei-c.org/ns/1.0">
+<!ATTLIST teiHeader xmlns:nkjp CDATA #FIXED "http://www.nkjp.pl/ns/1.0">
+<!ATTLIST teiHeader xml:lang CDATA #FIXED "en">
+<!ATTLIST teiHeader xml:id ID #REQUIRED>
+<!ATTLIST teiHeader type CDATA #FIXED "text">
+<!ELEMENT fileDesc (titleStmt, extent*, publicationStmt, sourceDesc)>
+<!ELEMENT publicationStmt (availability,  idno*)>
+<!ATTLIST publicationStmt nkjp:subcorpus (balanced|unbalanced|restricted|one_million) #REQUIRED>
+<!ELEMENT availability (p+)>
+<!ATTLIST availability status (free | restricted) #REQUIRED>
+<!ELEMENT extent (num+)>
+<!ATTLIST extent nkjp:file (text.xml) #REQUIRED>
+<!ELEMENT num (#PCDATA)>
+<!ATTLIST num type (word | segment | sentence | character) #REQUIRED>
+<!ATTLIST num value CDATA #IMPLIED>
+<!ELEMENT profileDesc (langUsage?, textClass, nkjp:topic?, particDesc?, settingDesc?)>
+<!ELEMENT settingDesc (setting+)>
+<!ELEMENT setting (name*, date)>
+<!ELEMENT name (#PCDATA | ref)*>
+<!ATTLIST name type (person|place|voivodship) #IMPLIED> <!-- AP: person, by default -->
+<!ATTLIST name xml:lang (pl | en) #IMPLIED>
+<!ATTLIST name ref CDATA #IMPLIED>
+<!ELEMENT nkjp:topic (#PCDATA)>
+<!ATTLIST nkjp:topic xml:lang (pl | en) #IMPLIED>
+<!ELEMENT particDesc (person+)>
+<!ELEMENT person (persName, sex?, education?, age?, residence?)>
+<!ATTLIST person role (author|editor|speaker) #REQUIRED>
+<!ATTLIST person xml:id ID #IMPLIED>
+<!ELEMENT persName (#PCDATA)>
+<!ELEMENT sex (#PCDATA)>
+<!ATTLIST sex value (0|1|2|9) #REQUIRED>
+<!ELEMENT education (#PCDATA)>
+<!ATTLIST education xml:lang (pl | en) #IMPLIED>
+<!ELEMENT age (#PCDATA)>
+<!ELEMENT residence (#PCDATA)>
+<!ELEMENT langUsage (language+)>
+<!ELEMENT language (#PCDATA)>
+<!ATTLIST language ident (pl-x-formal | pl-x-informal) #REQUIRED> <!-- required by TEI -->
+<!ELEMENT revisionDesc (change+)>
+<!ATTLIST revisionDesc xml:id ID #IMPLIED>
+
+<!-- PB: content of fileDesc -->
+<!ELEMENT titleStmt (title+, author*, respStmt*)>
+<!ELEMENT title (#PCDATA)>
+<!ATTLIST title xml:lang (pl | en) #IMPLIED>
+<!ATTLIST title level (a | j | m) #IMPLIED>
+<!ATTLIST title type (file) #IMPLIED>
+<!ELEMENT respStmt (name, resp)>
+<!ATTLIST respStmt xml:lang (pl | en) #IMPLIED>
+<!ELEMENT resp (#PCDATA | ref)*>
+<!ATTLIST resp xml:lang (pl | en) #IMPLIED>
+
+<!ELEMENT idno (#PCDATA)>
+<!ATTLIST idno type (ISSN|ISBN|issue|para|nkjp) #REQUIRED>
+
+<!ELEMENT sourceDesc (bibl, listBibl*, recordingStmt?)>
+<!-- PB: bibl musi takie "luźne" być, ponieważ schemat TEI dopuszcza #PCDATA --> 
+<!ELEMENT bibl (ptr?, title*, author*, respStmt*, date*, publisher?, pubPlace?, idno*, ref?, note*)>
+<!ATTLIST bibl xml:id ID #REQUIRED>
+<!ELEMENT author (#PCDATA)>
+<!ATTLIST author ref CDATA #IMPLIED> <!-- default: audio -->
+<!ELEMENT listBibl (bibl+)>
+<!ELEMENT recordingStmt (recording+)>
+<!ELEMENT recording (broadcast?)>
+<!ELEMENT broadcast (bibl)>
+<!ATTLIST recording type (audio|video) #IMPLIED> <!-- default: audio -->
+<!ATTLIST recording dur CDATA #IMPLIED>
+<!ELEMENT publisher (#PCDATA)>
+<!ELEMENT pubPlace (#PCDATA)>
+<!ELEMENT date (#PCDATA)>
+<!ATTLIST date type (created|first_published|published|acquired|recorded) #REQUIRED>
+<!-- PB: atrybut when MUSI mieć formę "yyyy(-mm(-dd))"; niestety, w ramach DTD nie mogę tego wymusić --> 
+<!ATTLIST date when CDATA #IMPLIED>
+<!ATTLIST date from CDATA #IMPLIED>
+<!ATTLIST date to CDATA #IMPLIED>
+<!ATTLIST date xml:lang (pl | en) #IMPLIED>
+<!ELEMENT note (#PCDATA | ref)*>
+<!ATTLIST note type (text_origin | original_header) #REQUIRED>
+<!ATTLIST note xml:lang (pl | en) #IMPLIED>
+<!ELEMENT ref (#PCDATA)>
+<!ATTLIST ref type CDATA #IMPLIED>
+<!ATTLIST ref target CDATA #IMPLIED>
+
+<!-- PB: zawartość profileDesc; nie mogę tutaj niestety użyć IDREF ani NMTOKEN, musi być CDATA -->
+<!ELEMENT textClass (classCode | keywords | catRef)+>
+<!ELEMENT classCode (#PCDATA)>
+<!ATTLIST classCode scheme CDATA #REQUIRED>
+<!ELEMENT keywords (list)>
+<!ATTLIST keywords scheme CDATA #REQUIRED>
+<!ELEMENT catRef EMPTY>
+<!ATTLIST catRef scheme CDATA #REQUIRED>
+<!ATTLIST catRef target CDATA #REQUIRED>
+
+
+<!ELEMENT change (#PCDATA | ptr | list | gi | att | val | tag)*>
+<!ATTLIST change who CDATA #REQUIRED>
+<!-- PB: @when ma takie samo ograniczenie jak w <date>: "yyyy(-mm(-dd))" -->
+<!ATTLIST change when CDATA #IMPLIED>
+<!ELEMENT ptr EMPTY>
+<!ATTLIST ptr target CDATA #IMPLIED>
+<!ELEMENT gi (#PCDATA)>
+<!ELEMENT att (#PCDATA)>
+<!ELEMENT val (#PCDATA)>
+<!ELEMENT tag (#PCDATA)>
+
+
+<!ELEMENT list (head?, item+)>
+<!ATTLIST list xml:lang (pl | en) #IMPLIED>
+<!ELEMENT head (#PCDATA)>
+<!ELEMENT item (#PCDATA | ref | list)*>
+
+<!ELEMENT p (#PCDATA | ref)*>
+<!ATTLIST p xml:lang (pl | en) #IMPLIED>