Add files via upload
diff --git a/bunc2tei.py b/bunc2tei.py
new file mode 100644
index 0000000..6ea85c6
--- /dev/null
+++ b/bunc2tei.py
@@ -0,0 +1,52 @@
+import os
+import xml.etree.ElementTree as ET
+
+path = '/home/spassova/BGCorpusExamples'
+files = os.listdir(path)
+
+tree = ET.parse(path + '/' + files[0])
+root = tree.getroot()
+
+titles = root.findall('.//*[@type='title']')
+domains = root.findall('.//*[@type='domain']')
+pageURLs = root.findall('.//*[@type='pageURL']')
+ids = root.findall('.//*[@type='id']')
+mainImageURLs = root.findall('.//*[@type='mainImageURL']')
+mainImageTexts = root.findall('.//*[@type='mainImageTexts']')
+mainImageSources = root.findall('.//*[@type='mainImageSources']')
+authors = root.findall('.//*[@type='authors']')
+authorURLs = root.findall('.//*[@type='authorURLs']')
+categories = root.findall('.//*[@type='category']')
+subCategories = root.findall('.//*[@type='subCategory']')
+tags = root.findall('.//*[@type='tags']')
+datesPublished = root.findall('.//*[@type='datePublished']')
+timesPublished = root.findall('.//*[@type='timePubished']')
+datesModified = root.findall('.//*[@type='dateModified']')
+timesModified = root.findall('.//*[@type='timeModified']')
+
+number_of_texts = len(root.findall('{http://www.tei-c.org/ns/1.0}text'))
+
+for elem in root.findall('*'):
+    root.remove(elem)
+
+root.tag = 'teiCorpus'
+
+for i in range(number_of_texts):
+    tei = ET.SubElement(root, 'TEI')
+    teiHeader = ET.SubElement(tei, 'teiHeader')
+    fileDesc = ET.SubElement(teiHeader, 'fileDesc')
+    titleStmt = ET.SubElement(fileDesc, 'titleStmt')
+    textSigle = ET.SubElement(titleStmt, 'textSigle')
+    textSigle.text = 'BNC/TST.' + f'{i:05}'
+    sourceDesc = ET.SubElement(fileDesc, 'sourceDesc')
+    
+
+
+    
+
+
+
+
+ET.indent(tree, '  ')
+ET.register_namespace('', 'http://www.tei-c.org/ns/1.0')
+tree.write('04_output.xml', encoding='utf-8', xml_declaration=True, method='xml', short_empty_elements=True)
\ No newline at end of file