| Marc Kupietz | 793f85d | 2020-09-08 14:40:24 +0200 | [diff] [blame] | 1 | package de.ids_mannheim.korap.tokenizer; |
| 2 | |
| 3 | import org.apache.maven.surefire.shade.org.apache.commons.io.output.ByteArrayOutputStream; |
| 4 | import org.junit.Test; |
| 5 | import org.junit.runner.RunWith; |
| 6 | import org.junit.runners.Parameterized; |
| 7 | |
| 8 | import java.io.*; |
| 9 | import java.net.URL; |
| 10 | import java.nio.charset.StandardCharsets; |
| 11 | import java.nio.file.Files; |
| 12 | import java.nio.file.Paths; |
| 13 | import java.util.ArrayList; |
| 14 | import java.util.Collection; |
| 15 | |
| 16 | import static org.junit.Assert.*; |
| 17 | |
| 18 | @RunWith(Parameterized.class) |
| 19 | @net.jcip.annotations.NotThreadSafe |
| 20 | public class IPCOffsetTests { |
| 21 | @Parameterized.Parameters |
| 22 | public static Collection<Object[]> data() { |
| 23 | Collection<Object[]> testData = new ArrayList<>(); |
| 24 | for (int i = 1; true; i++) { |
| 25 | URL inputUrl = IPCOffsetTests.class.getResource(String.format("/other_test_data/test%02d_input.txt", i)); |
| 26 | URL goldUrl = IPCOffsetTests.class.getResource(String.format("/other_test_data/test%02d_gold.txt", i)); |
| 27 | if (inputUrl == null) |
| 28 | break; |
| 29 | testData.add(new String[]{inputUrl.getFile(), goldUrl.getFile()}); |
| 30 | } |
| 31 | return testData; |
| 32 | } |
| 33 | |
| 34 | private final String input; |
| 35 | private final String gold; |
| 36 | |
| 37 | static String readFile(String path) |
| 38 | throws IOException { |
| 39 | byte[] encoded = Files.readAllBytes(Paths.get(path)); |
| 40 | return new String(encoded, StandardCharsets.UTF_8); |
| 41 | } |
| 42 | |
| 43 | public IPCOffsetTests(String input, String gold) { |
| 44 | this.input = input; |
| 45 | this.gold = gold; |
| 46 | } |
| 47 | |
| 48 | @Test |
| 49 | public void testMainWithOffsetsAndSentencesOnDifferentInputFiles() throws IOException { |
| 50 | final ByteArrayOutputStream myOut = new ByteArrayOutputStream(); |
| 51 | System.setOut(new PrintStream(myOut)); |
| 52 | String[] args = {"-s", input}; |
| 53 | KorAPTokenizerImpl.main(args); |
| 54 | String goldData = readFile(gold); |
| 55 | assertEquals(goldData, myOut.toString(StandardCharsets.UTF_8)); |
| 56 | } |
| 57 | } |
| 58 | |