daza | 8534747 | 2020-11-23 18:43:33 +0100 | [diff] [blame] | 1 | [paths] |
| 2 | train = "/home/daza/ids-projects/DeReKo/spacy_train/spacy_bin_corpora/Tiger.NewOrth.train.spacy" |
| 3 | dev = "/home/daza/ids-projects/DeReKo/spacy_train/spacy_bin_corpora/Tiger.NewOrth.test.spacy" |
| 4 | |
| 5 | [system] |
| 6 | gpu_allocator = "pytorch" |
| 7 | |
| 8 | |
| 9 | [nlp] |
| 10 | lang = "de" |
| 11 | pipeline = ["transformer", "tagger"] |
| 12 | tokenizer = {"@tokenizers": "spacy.Tokenizer.v1"} |
| 13 | |
| 14 | [components] |
| 15 | |
| 16 | [components.transformer] |
| 17 | factory = "transformer" |
| 18 | |
| 19 | [components.transformer.model] |
| 20 | @architectures = "spacy-transformers.TransformerModel.v1" |
| 21 | name = "bert-base-german-cased" |
| 22 | tokenizer_config = {"use_fast": true} |
| 23 | |
| 24 | [components.transformer.model.get_spans] |
| 25 | @span_getters = "spacy-transformers.strided_spans.v1" |
| 26 | window = 128 |
| 27 | stride = 96 |
| 28 | |
| 29 | |
| 30 | [components.tagger] |
| 31 | factory = "tagger" |
| 32 | |
| 33 | [components.tagger.model] |
| 34 | @architectures = "spacy.Tagger.v1" |
| 35 | nO = null |
| 36 | |
| 37 | [components.tagger.model.tok2vec] |
| 38 | @architectures = "spacy-transformers.TransformerListener.v1" |
| 39 | grad_factor = 1.0 |
| 40 | |
| 41 | [components.tagger.model.tok2vec.pooling] |
| 42 | @layers = "reduce_mean.v1" |
| 43 | |
| 44 | |
| 45 | |
| 46 | [corpora] |
| 47 | |
| 48 | [corpora.train] |
| 49 | @readers = "spacy.Corpus.v1" |
| 50 | path = ${paths.train} |
| 51 | max_length = 500 |
| 52 | |
| 53 | [corpora.dev] |
| 54 | @readers = "spacy.Corpus.v1" |
| 55 | path = ${paths.dev} |
| 56 | max_length = 0 |
| 57 | |
| 58 | [training] |
| 59 | accumulate_gradient = 3 |
| 60 | dev_corpus = "corpora.dev" |
| 61 | train_corpus = "corpora.train" |
| 62 | |
| 63 | [training.optimizer] |
| 64 | @optimizers = "Adam.v1" |
| 65 | |
| 66 | [training.optimizer.learn_rate] |
| 67 | @schedules = "warmup_linear.v1" |
| 68 | warmup_steps = 250 |
| 69 | total_steps = 20000 |
| 70 | initial_rate = 5e-5 |
| 71 | |
| 72 | |
| 73 | |
| 74 | [training.batcher] |
| 75 | @batchers = "spacy.batch_by_padded.v1" |
| 76 | discard_oversize = true |
| 77 | size = 2000 |
| 78 | buffer = 256 |
| 79 | |
| 80 | [initialize] |
| 81 | vectors = null |