blob: 1c384203a5ec8f7d590a9178073232e921c3df39 [file] [log] [blame]
/<text/ {
pubTitle = gensub(/^.+publ_title=\"([^\"]+?)\".+$/, "\\1", "1", $0);
pubType = gensub(/^.+publ_type=\"([^\"]+?)\".+$/, "\\1", "1", $0);
language = gensub(/^.+language=\"([^\"]+?)\".+$/, "\\1", "1", $0);
tokenc = gensub(/^.+tokencount=\"([^\"]+?)\".+$/, "\\1", "1", $0);
pubTitles[pubTitle]++;
pubTypes[pubTitle] = pubType;
languages[pubTitle]= language;
tokens[pubTitle] = tokens[pubTitle] + tokenc;
}
END {
for (pubTitle in pubTitles){
print pubTitle, "\t", pubTitles[pubTitle], "\t", tokens[pubTitle], "\t", pubTypes[pubTitle], "\t", languages[pubTitle];
}
}