Get domain from folder names
grep '<domain>' output/corpus.p5.xml| sort | uniq
<domain>Blog</domain>
<domain>Creative:Novels_ShortStories</domain>
<domain>Instructional:AdministrativeRegulatoryProse</domain>
<domain>Instructional:Skills_hobbies</domain>
<domain>Learned:Humanities</domain>
<domain>Learned:NaturalSciences</domain>
<domain>Learned:SocialSciences</domain>
<domain>Learned:Technology</domain>
<domain>Persuasive</domain>
<domain>Popular:Humanities</domain>
<domain>Popular:NaturalSciences</domain>
Change-Id: Ic6c4e8cbd80997b20b436d6577e0fb224500cb0f
diff --git a/nor2tei.py b/nor2tei.py
index e02564b..6bbb53b 100644
--- a/nor2tei.py
+++ b/nor2tei.py
@@ -92,6 +92,11 @@
idno.text = uris.text
domain = ET.SubElement(titleStmt, "domain")
domain.text = domains.get("type")
+ splitFName = file.split("/")
+ if (splitFName[-2] in ['Persuasive', 'Blog']):
+ domain.text = splitFName[-2]
+ else:
+ domain.text = splitFName[-3] + ':' + splitFName[-2]
text = ET.SubElement(tei, "text")
text.append(texts[i])