blob: e35d4e3f3420d8bda4c55a742c03b5fa1b3eb444 [file] [log] [blame]
[paths]
train = "/home/daza/ids-projects/DeReKo/spacy_train/spacy_bin_corpora/Tiger.NewOrth.train.spacy"
dev = "/home/daza/ids-projects/DeReKo/spacy_train/spacy_bin_corpora/Tiger.NewOrth.test.spacy"
[system]
gpu_allocator = "pytorch"
[nlp]
lang = "de"
pipeline = ["transformer", "tagger"]
tokenizer = {"@tokenizers": "spacy.Tokenizer.v1"}
[components]
[components.transformer]
factory = "transformer"
[components.transformer.model]
@architectures = "spacy-transformers.TransformerModel.v1"
name = "bert-base-german-cased"
tokenizer_config = {"use_fast": true}
[components.transformer.model.get_spans]
@span_getters = "spacy-transformers.strided_spans.v1"
window = 128
stride = 96
[components.tagger]
factory = "tagger"
[components.tagger.model]
@architectures = "spacy.Tagger.v1"
nO = null
[components.tagger.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
[components.tagger.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
[corpora]
[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}
max_length = 500
[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}
max_length = 0
[training]
accumulate_gradient = 3
dev_corpus = "corpora.dev"
train_corpus = "corpora.train"
[training.optimizer]
@optimizers = "Adam.v1"
[training.optimizer.learn_rate]
@schedules = "warmup_linear.v1"
warmup_steps = 250
total_steps = 20000
initial_rate = 5e-5
[training.batcher]
@batchers = "spacy.batch_by_padded.v1"
discard_oversize = true
size = 2000
buffer = 256
[initialize]
vectors = null