blob: 1c384203a5ec8f7d590a9178073232e921c3df39 [file] [log] [blame]
Harald Lungenaccccb22024-08-23 09:19:04 +03001 /<text/ {
2 pubTitle = gensub(/^.+publ_title=\"([^\"]+?)\".+$/, "\\1", "1", $0);
3 pubType = gensub(/^.+publ_type=\"([^\"]+?)\".+$/, "\\1", "1", $0);
4 language = gensub(/^.+language=\"([^\"]+?)\".+$/, "\\1", "1", $0);
5 tokenc = gensub(/^.+tokencount=\"([^\"]+?)\".+$/, "\\1", "1", $0);
6
7 pubTitles[pubTitle]++;
8 pubTypes[pubTitle] = pubType;
9 languages[pubTitle]= language;
10 tokens[pubTitle] = tokens[pubTitle] + tokenc;
11 }
12
13 END {
14 for (pubTitle in pubTitles){
15 print pubTitle, "\t", pubTitles[pubTitle], "\t", tokens[pubTitle], "\t", pubTypes[pubTitle], "\t", languages[pubTitle];
16 }
17 }