| /<text/ { | |
| pubTitle = gensub(/^.+publ_title=\"([^\"]+?)\".+$/, "\\1", "1", $0); | |
| pubType = gensub(/^.+publ_type=\"([^\"]+?)\".+$/, "\\1", "1", $0); | |
| language = gensub(/^.+language=\"([^\"]+?)\".+$/, "\\1", "1", $0); | |
| tokenc = gensub(/^.+tokencount=\"([^\"]+?)\".+$/, "\\1", "1", $0); | |
| pubTitles[pubTitle]++; | |
| pubTypes[pubTitle] = pubType; | |
| languages[pubTitle]= language; | |
| tokens[pubTitle] = tokens[pubTitle] + tokenc; | |
| } | |
| END { | |
| for (pubTitle in pubTitles){ | |
| print pubTitle, "\t", pubTitles[pubTitle], "\t", tokens[pubTitle], "\t", pubTypes[pubTitle], "\t", languages[pubTitle]; | |
| } | |
| } |