blob: d2523e8a5bf1171685cda05e3774b9554c2f0ad7 [file] [log] [blame]
Marc Kupietz86044852025-11-29 10:19:03 +01001#!/bin/bash
2# Script to preload spaCy models to a local directory
3# Usage: ./preload-models.sh [MODEL_NAME] [TARGET_DIR]
4
5set -e
6
7MODEL_NAME="${1:-de_core_news_lg}"
8TARGET_DIR="${2:-./models}"
9
10echo "Preloading spaCy model: $MODEL_NAME"
11echo "Target directory: $TARGET_DIR"
12
13# Create target directory if it doesn't exist
14mkdir -p "$TARGET_DIR"
15
16# Check if model already exists
17if [ -d "$TARGET_DIR/$MODEL_NAME" ]; then
18 echo "Model $MODEL_NAME already exists in $TARGET_DIR"
19 echo "Remove it first if you want to re-download: rm -rf $TARGET_DIR/$MODEL_NAME"
20 exit 0
21fi
22
23echo "Downloading model using temporary Docker container..."
24
25# Use a temporary container to download the model
26docker run --rm -v "$(realpath $TARGET_DIR)":/models python:3.12-slim-bookworm bash -c "
27 set -e
28 echo 'Installing spaCy...'
29 pip install -q spacy
30
31 echo 'Downloading model $MODEL_NAME...'
32 echo 'This may take several minutes depending on your connection speed.'
33 python -m spacy download $MODEL_NAME --no-cache-dir 2>&1 | while IFS= read -r line; do
34 echo \"\$line\"
35 # Show progress dots for download
36 if [[ \"\$line\" == *\"Downloading\"* ]]; then
37 echo -n \"Progress: \"
38 fi
39 done
40
41 echo 'Moving model to /models...'
42 python -c \"
43import spacy
44import shutil
45import site
46import os
47
48# Get the installed model path
49site_packages = site.getsitepackages()[0]
50model_path = site_packages + '/$MODEL_NAME'
51
52# spaCy packages contain a subdirectory with the versioned model
53# Find the actual model directory (e.g., de_core_news_lg-3.8.0)
54items = os.listdir(model_path)
55model_subdir = None
56for item in items:
57 item_path = os.path.join(model_path, item)
58 if os.path.isdir(item_path) and '$MODEL_NAME' in item:
59 model_subdir = item_path
60 break
61
62if model_subdir:
63 # Copy the actual model directory
64 shutil.copytree(model_subdir, '/models/$MODEL_NAME')
65 print(f'Model copied successfully from {model_subdir}!')
66else:
67 # Fallback: copy the whole package
68 shutil.copytree(model_path, '/models/$MODEL_NAME')
69 print('Model copied successfully!')
70\"
71"
72
73if [ -d "$TARGET_DIR/$MODEL_NAME" ]; then
Marc Kupietzc75ae7c2025-11-29 10:41:26 +010074 # Set permissions so all users can read/write/execute
75 echo "Setting permissions..."
76 chmod -R a+rwX "$TARGET_DIR/$MODEL_NAME"
77
Marc Kupietz86044852025-11-29 10:19:03 +010078 echo ""
79 echo "✓ Model $MODEL_NAME successfully preloaded to $TARGET_DIR/$MODEL_NAME"
80 echo ""
81 echo "You can now run the container with:"
82 echo " docker run --rm -i -v $(realpath $TARGET_DIR):/local/models korap/conllu-spacy"
83else
84 echo "✗ Error: Model download failed"
85 exit 1
86fi