blob: b8f520f17952763222c7a253c8d9d38b68d8f489 [file] [log] [blame]
Marc Kupietz793f85d2020-09-08 14:40:24 +02001package de.ids_mannheim.korap.tokenizer;
2
Marc Kupietz793f85d2020-09-08 14:40:24 +02003import org.junit.Test;
4import org.junit.runner.RunWith;
5import org.junit.runners.Parameterized;
6
7import java.io.*;
8import java.net.URL;
9import java.nio.charset.StandardCharsets;
10import java.nio.file.Files;
11import java.nio.file.Paths;
12import java.util.ArrayList;
13import java.util.Collection;
14
15import static org.junit.Assert.*;
16
17@RunWith(Parameterized.class)
Marc Kupietz793f85d2020-09-08 14:40:24 +020018public class IPCOffsetTests {
Marc Kupietz74141b32020-10-01 23:23:18 +020019
20 static final String testFiletemplate = "/other_test_data/test.%s.%s.%02d.%s.txt";
Marc Kupietz793f85d2020-09-08 14:40:24 +020021 @Parameterized.Parameters
22 public static Collection<Object[]> data() {
23 Collection<Object[]> testData = new ArrayList<>();
Marc Kupietzf5a7e042020-10-12 10:43:24 +020024 for (String language : new String[]{"de", "en", "fr"}) {
Marc Kupietz74141b32020-10-01 23:23:18 +020025 for (String encoding : new String[]{"ascii", "latin1", "utf8"}) {
26 for (int i = 1; true; i++) {
27 URL inputUrl = IPCOffsetTests.class.getResource(String.format(testFiletemplate, language, encoding, i, "input"));
28 URL positionsUrl = IPCOffsetTests.class.getResource(String.format(testFiletemplate, language, encoding, i, "positions"));
29 URL tokensUrl = IPCOffsetTests.class.getResource(String.format(testFiletemplate, language, encoding, i, "tokens"));
30 if (inputUrl == null)
31 break;
32 testData.add(new String[]{inputUrl.getFile(), positionsUrl.getFile(), tokensUrl.getFile(), language, encoding});
33 }
Marc Kupietz8e197f32020-10-08 09:20:37 +020034 }
Marc Kupietz793f85d2020-09-08 14:40:24 +020035 }
36 return testData;
37 }
38
39 private final String input;
Marc Kupietz74141b32020-10-01 23:23:18 +020040 private final String positions;
41 private final String tokens;
Marc Kupietz8e197f32020-10-08 09:20:37 +020042 private final String encoding;
Marc Kupietz74141b32020-10-01 23:23:18 +020043 private final String language;
Marc Kupietz793f85d2020-09-08 14:40:24 +020044
45 static String readFile(String path)
46 throws IOException {
47 byte[] encoded = Files.readAllBytes(Paths.get(path));
48 return new String(encoded, StandardCharsets.UTF_8);
49 }
50
Marc Kupietz74141b32020-10-01 23:23:18 +020051 public IPCOffsetTests(String input, String positions, String tokens, String language, String encoding) {
Marc Kupietz793f85d2020-09-08 14:40:24 +020052 this.input = input;
Marc Kupietz74141b32020-10-01 23:23:18 +020053 this.positions = positions;
54 this.tokens = tokens;
55 this.language = language;
Marc Kupietz8e197f32020-10-08 09:20:37 +020056 this.encoding = encoding;
Marc Kupietz793f85d2020-09-08 14:40:24 +020057 }
58
59 @Test
60 public void testMainWithOffsetsAndSentencesOnDifferentInputFiles() throws IOException {
Marc Kupietz74141b32020-10-01 23:23:18 +020061 File tempFile = File.createTempFile("position_output", ".txt");
62 String[] args = {"--language", language, "--encoding", encoding, "--no-tokens", "--positions", "--sentence-boundaries", "--force", "-o", tempFile.getAbsolutePath(), input};
Marc Kupietz751868b2020-09-25 17:59:38 +020063 Main.main(args);
Marc Kupietzc419d5b2020-09-17 15:21:26 +020064 String actualResult = readFile(tempFile.getAbsolutePath());
Marc Kupietz74141b32020-10-01 23:23:18 +020065 String goldData = readFile(positions);
Marc Kupietz6d28ed12021-07-15 21:09:47 +020066 assertEquals("Testing "+tempFile+ " against " + new File(positions).getName(), goldData, actualResult);
Marc Kupietz74141b32020-10-01 23:23:18 +020067 }
68
69 @Test
70 public void testMainWithTokenOutputOnDifferentInputFiles() throws IOException {
71 File tempFile = File.createTempFile("token_output", ".txt");
72 String[] args = {"--language", language, "--encoding", encoding, "--tokens", "--force", "-o", tempFile.getAbsolutePath(), input};
73 Main.main(args);
74 String actualResult = readFile(tempFile.getAbsolutePath());
75 String goldData = readFile(tokens);
Marc Kupietz6d28ed12021-07-15 21:09:47 +020076 assertEquals("Testing " + tempFile + " against " + new File(tokens).getName(), goldData, actualResult);
Marc Kupietz793f85d2020-09-08 14:40:24 +020077 }
78}
79