Harald Lungen | accccb2 | 2024-08-23 09:19:04 +0300 | [diff] [blame^] | 1 | /<text/ { |
| 2 | pubTitle = gensub(/^.+publ_title=\"([^\"]+?)\".+$/, "\\1", "1", $0); |
| 3 | pubType = gensub(/^.+publ_type=\"([^\"]+?)\".+$/, "\\1", "1", $0); |
| 4 | language = gensub(/^.+language=\"([^\"]+?)\".+$/, "\\1", "1", $0); |
| 5 | tokenc = gensub(/^.+tokencount=\"([^\"]+?)\".+$/, "\\1", "1", $0); |
| 6 | |
| 7 | pubTitles[pubTitle]++; |
| 8 | pubTypes[pubTitle] = pubType; |
| 9 | languages[pubTitle]= language; |
| 10 | tokens[pubTitle] = tokens[pubTitle] + tokenc; |
| 11 | } |
| 12 | |
| 13 | END { |
| 14 | for (pubTitle in pubTitles){ |
| 15 | print pubTitle, "\t", pubTitles[pubTitle], "\t", tokens[pubTitle], "\t", pubTypes[pubTitle], "\t", languages[pubTitle]; |
| 16 | } |
| 17 | } |