reogranise acc. to source; add comment line with vrt positional attributes
diff --git a/vrt2xml.awk b/vrt2xml.awk
index 8f37955..d8d6d88 100644
--- a/vrt2xml.awk
+++ b/vrt2xml.awk
@@ -1,9 +1,21 @@
+# vrt2xml.awk
+# to be called for each yearly vrt file of KLK e.g. klk_fi_v2_2021.vrt
+
 BEGIN       {
+    # getting OUTDIR and YEAR as parameters
+    
     OFS="\t";
 
-    OUTDIR="XML";
-    system("mkdir -p \"" OUTDIR "\"");
-     
+    system("mkdir -p " OUTDIR);
+
+    getline;
+    if($0 !~ /^\s*<!\-\- #vrt positional-attributes/) {
+	print "ERROR: Something went wrong when looking for positional attributes in first line"
+    }
+    else {
+	PATTS = $0;
+    }
+    
     CONTAIN["Aamulehti"]		 = 1; 
     CONTAIN["Etelä-Suomen Sanomat"]	 = 1; 
     CONTAIN["Hämeen Sanomat"]		 = 1; 
@@ -26,70 +38,76 @@
     CONTAIN["Turun Sanomat"]		 = 1;
     
 
+    # start all outfiles according to selected titles in CONTAIN:
     for(c in CONTAIN){
-	OUTFILE = OUTDIR "/" simplify(c) ".xml"; 
+
+	system("mkdir -p " OUTDIR "/" simplify(c));
+	
+	OUTFILE = OUTDIR "/" simplify(c) "/" simplify(c) YEAR ".xml";
+	
 	printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") >  OUTFILE;
+	print PATTS                                            >> OUTFILE;
 	printf("<texts>\n")                                    >> OUTFILE;
-	print OUTFILE                                          > "/dev/stderr";
+	print "Starting outfile " OUTFILE                      > "/dev/stderr";
     }
 }
 
 
-
-/^\s*<\!\-\-/      { print;  } # positional attributes
-
 /^\s*<text/ {
     
 # USE 01 binding_id="2246025"
-    BID = gensub(/^.+binding_id=\"([^\"]+?)\".+$/, "\\1", "1", $0);
 # USE 02 date="2021-01-15"
-    DATE = gensub(/^.+date=\"([^\"]+?)\".+$/, "\\1", "1", $0);
-# 03 datefrom="20210115"
-# 04 dateto="20210115"
-# 05 elec_date="_"
-# 06 file=""
+#     03 datefrom="20210115"
+#     04 dateto="20210115"
+#     05 elec_date="_"
+#     06 file=""
 # USE 07 filename_metadata="finclarin_siirto_k2021/0039-5552/2021/alto/2246025_0039-5552_2021-01-15_SK0221_mets.xml"
-    METAFILENAME = gensub(/^.+filename_metadata=\"([^\"]+?)\".+$/, "\\1", "1", $0);
 # USE 08 filename_orig="finclarin_siirto_k2021/0039-5552/2021/alto/2246025_0039-5552_2021-01-15_SK0221_page-2021011502210030301.xml
-    ORIGFILENAME = gensub(/^.+filename_orig=\"([^\"]+?)\".+$/, "\\1", "1", $0);
 # USE 09 id="t-bcd0f3fa-bbd3dac4"
-    ID = gensub(/^.+id=\"([^\"]+?)\".+$/, "\\1", "1", $0);
-# 10 img_url=""
+#     10 img_url=""
 # USE 11 issue_date="15.01.2021"
-    ISSUEDATE = gensub(/^.+issue_date=\"([^\"]+?)\".+$/, "\\1", "1", $0);
 # USE 12 issue_no="SK0221"
-    ISSUENO = gensub(/^.+issue_no=\"([^\"]+?)\".+$/, "\\1", "1", $0);
 # USE 13 issue_title="Suomen Kuvalehti"
-    ISSUETITLE = gensub(/^.+issue_title=\"([^\"]+?)\".+$/, "\\1", "1", $0);
 # USE 14 label="Suomen Kuvalehti no. SK0221 15.01.2021"
-    LABEL = gensub(/^.+label=\"([^\"]+?)\".+$/, "\\1", "1", $0);
 # USE 16 language="fi"
-    LANGUAGE = gensub(/^.+language=\"([^\"]+?)\".+$/, "\\1", "1", $0);
 # USE 17 page_id="p1"
-    PAGEID = gensub(/^.+page_id=\"([^\"]+?)\".+$/, "\\1", "1", $0);
 # USE 18 page_no="None"
-    PAGENO = gensub(/^.+page_no=\"([^\"]+?)\".+$/, "\\1", "1", $0);
-# 19 part_name="_"
-# 20 publ_id="0039-5552"
-# 21 publ_part=""
+#     19 part_name="_"
+#     20 publ_id="0039-5552"
+#     21 publ_part=""
 # USE 22 publ_title="Suomen Kuvalehti" 
-    PUBLTITLE = gensub(/^.+publ_title=\"([^\"]+?)\".+$/, "\\1", "1", $0);
 # USE 23 publ_type="aikakausi"
-    PUBLTYPE = gensub(/^.+publ_type=\"([^\"]+?)\".+$/, "\\1", "1", $0);
 # USE 24 sentcount="70"
-    SENTCOUNT = gensub(/^.+sentcount=\"([^\"]+?)\".+$/, "\\1", "1", $0);
-# 25 sum_lang="|xxx:44|fin:23|eng:3|"
-# 26 timefrom="000000"
-# 27 timeto="235959"
+#     25 sum_lang="|xxx:44|fin:23|eng:3|"
+#     26 timefrom="000000"
+#     27 timeto="235959"
 # USE 28 tokencount="304"
-    TOKENCOUNT = gensub(/^.+tokencount=\"([^\"]+?)\".+$/, "\\1", "1", $0);
-# 29 version_added="KLK-fi-2021">
+#     29 version_added="KLK-fi-2021">
     
     
-#	print BID, DATE, METAFILENAME, ORIGFILENAME, ID, ISSUEDATE, ISSUENO, ISSUETITLE, LABEL, LANGUAGE, PAGEID, PAGENO, PUBLTITLE, PUBLTYPE, SENTCOUNT, TOKENCOUNT
+    BID          = gensub(/^.+binding_id=\"([^\"]+?)\".+$/,        "\\1", "1", $0);
+    DATE         = gensub(/^.+date=\"([^\"]+?)\".+$/,              "\\1", "1", $0);
+    METAFILENAME = gensub(/^.+filename_metadata=\"([^\"]+?)\".+$/, "\\1", "1", $0);
+    ORIGFILENAME = gensub(/^.+filename_orig=\"([^\"]+?)\".+$/,     "\\1", "1", $0);
+    ID           = gensub(/^.+id=\"([^\"]+?)\".+$/,                "\\1", "1", $0);
+    ISSUEDATE    = gensub(/^.+issue_date=\"([^\"]+?)\".+$/,        "\\1", "1", $0);
+    ISSUENO      = gensub(/^.+issue_no=\"([^\"]+?)\".+$/,          "\\1", "1", $0);
+    ISSUETITLE   = gensub(/^.+issue_title=\"([^\"]+?)\".+$/,       "\\1", "1", $0);
+    LABEL        = gensub(/^.+label=\"([^\"]+?)\".+$/,             "\\1", "1", $0);
+    LANGUAGE     = gensub(/^.+language=\"([^\"]+?)\".+$/,          "\\1", "1", $0);
+    PAGEID       = gensub(/^.+page_id=\"([^\"]+?)\".+$/,           "\\1", "1", $0);
+    PAGENO       = gensub(/^.+page_no=\"([^\"]+?)\".+$/,           "\\1", "1", $0);
+    PUBLTITLE    = gensub(/^.+publ_title=\"([^\"]+?)\".+$/,        "\\1", "1", $0);
+    PUBLTYPE     = gensub(/^.+publ_type=\"([^\"]+?)\".+$/,         "\\1", "1", $0);
+    SENTCOUNT    = gensub(/^.+sentcount=\"([^\"]+?)\".+$/,         "\\1", "1", $0);
+    TOKENCOUNT   = gensub(/^.+tokencount=\"([^\"]+?)\".+$/,        "\\1", "1", $0);
+
     
-    OUTFILE = OUTDIR "/" simplify(PUBLTITLE) ".xml" ;
+#   print BID, DATE, METAFILENAME, ORIGFILENAME, ID, ISSUEDATE, ISSUENO, ISSUETITLE, LABEL, LANGUAGE, PAGEID, PAGENO, PUBLTITLE, PUBLTYPE, SENTCOUNT, TOKENCOUNT
     
+    OUTFILE = OUTDIR "/" simplify(PUBLTITLE) "/" simplify(PUBLTITLE) YEAR ".xml" ;
+
+    # continue OUTFILEs if PUBTITLE of <text> is in CONTAIN: 
     if(CONTAIN[PUBLTITLE]){print $0 > OUTFILE; }
 }
 
@@ -134,7 +152,7 @@
     
     for(c in CONTAIN){
 	if(CONTAIN[c] == 1){
-	    OUTFILE = OUTDIR "/" simplify(c) ".xml";   
+	    OUTFILE = OUTDIR "/" simplify(c) "/" simplify(c) YEAR ".xml";   
 	    printf("</texts>\n") >> OUTFILE;
 	}
     }