Get domain from folder names
grep '<domain>' output/corpus.p5.xml| sort | uniq
<domain>Creative:Novels_ShortStories</domain>
<domain>Informational:Reportage</domain>
<domain>Instructional:AdministrativeRegulatoryProse</domain>
<domain>Instructional:Skills_Hobbies</domain>
<domain>Learned:Humanities</domain>
<domain>Learned:NaturalSciences</domain>
<domain>Learned:SocialSciences</domain>
<domain>Learned:Technology</domain>
<domain>Persuasive</domain>
<domain>Popular:Humanities</domain>
<domain>Popular:NaturalSciences</domain>
<domain>Popular:SocialSciences</domain>
<domain>Popular:Technology</domain>
Change-Id: I8b5fbba4caa655d4d10ac733e7733dc743da0ec9
diff --git a/eng2tei.py b/eng2tei.py
index 2cb73b4..1e02b8d 100644
--- a/eng2tei.py
+++ b/eng2tei.py
@@ -91,7 +91,11 @@
idno.set("type", "URI")
idno.text = uris.text
domain = ET.SubElement(titleStmt, "domain")
- domain.text = domains.get("type")
+ splitFName = file.split("/")
+ if (splitFName[-2] == 'Persuasive'):
+ domain.text = splitFName[-2]
+ else:
+ domain.text = splitFName[-3] + ':' + splitFName[-2]
text = ET.SubElement(tei, "text")
text.append(texts[i])