| Marc Kupietz | 8604485 | 2025-11-29 10:19:03 +0100 | [diff] [blame] | 1 | #!/bin/bash |
| 2 | # Script to preload spaCy models to a local directory |
| 3 | # Usage: ./preload-models.sh [MODEL_NAME] [TARGET_DIR] |
| 4 | |
| 5 | set -e |
| 6 | |
| 7 | MODEL_NAME="${1:-de_core_news_lg}" |
| 8 | TARGET_DIR="${2:-./models}" |
| 9 | |
| 10 | echo "Preloading spaCy model: $MODEL_NAME" |
| 11 | echo "Target directory: $TARGET_DIR" |
| 12 | |
| 13 | # Create target directory if it doesn't exist |
| 14 | mkdir -p "$TARGET_DIR" |
| 15 | |
| 16 | # Check if model already exists |
| 17 | if [ -d "$TARGET_DIR/$MODEL_NAME" ]; then |
| 18 | echo "Model $MODEL_NAME already exists in $TARGET_DIR" |
| 19 | echo "Remove it first if you want to re-download: rm -rf $TARGET_DIR/$MODEL_NAME" |
| 20 | exit 0 |
| 21 | fi |
| 22 | |
| 23 | echo "Downloading model using temporary Docker container..." |
| 24 | |
| 25 | # Use a temporary container to download the model |
| 26 | docker run --rm -v "$(realpath $TARGET_DIR)":/models python:3.12-slim-bookworm bash -c " |
| 27 | set -e |
| 28 | echo 'Installing spaCy...' |
| 29 | pip install -q spacy |
| 30 | |
| 31 | echo 'Downloading model $MODEL_NAME...' |
| 32 | echo 'This may take several minutes depending on your connection speed.' |
| 33 | python -m spacy download $MODEL_NAME --no-cache-dir 2>&1 | while IFS= read -r line; do |
| 34 | echo \"\$line\" |
| 35 | # Show progress dots for download |
| 36 | if [[ \"\$line\" == *\"Downloading\"* ]]; then |
| 37 | echo -n \"Progress: \" |
| 38 | fi |
| 39 | done |
| 40 | |
| 41 | echo 'Moving model to /models...' |
| 42 | python -c \" |
| 43 | import spacy |
| 44 | import shutil |
| 45 | import site |
| 46 | import os |
| 47 | |
| 48 | # Get the installed model path |
| 49 | site_packages = site.getsitepackages()[0] |
| 50 | model_path = site_packages + '/$MODEL_NAME' |
| 51 | |
| 52 | # spaCy packages contain a subdirectory with the versioned model |
| 53 | # Find the actual model directory (e.g., de_core_news_lg-3.8.0) |
| 54 | items = os.listdir(model_path) |
| 55 | model_subdir = None |
| 56 | for item in items: |
| 57 | item_path = os.path.join(model_path, item) |
| 58 | if os.path.isdir(item_path) and '$MODEL_NAME' in item: |
| 59 | model_subdir = item_path |
| 60 | break |
| 61 | |
| 62 | if model_subdir: |
| 63 | # Copy the actual model directory |
| 64 | shutil.copytree(model_subdir, '/models/$MODEL_NAME') |
| 65 | print(f'Model copied successfully from {model_subdir}!') |
| 66 | else: |
| 67 | # Fallback: copy the whole package |
| 68 | shutil.copytree(model_path, '/models/$MODEL_NAME') |
| 69 | print('Model copied successfully!') |
| 70 | \" |
| 71 | " |
| 72 | |
| 73 | if [ -d "$TARGET_DIR/$MODEL_NAME" ]; then |
| Marc Kupietz | c75ae7c | 2025-11-29 10:41:26 +0100 | [diff] [blame] | 74 | # Set permissions so all users can read/write/execute |
| 75 | echo "Setting permissions..." |
| 76 | chmod -R a+rwX "$TARGET_DIR/$MODEL_NAME" |
| 77 | |
| Marc Kupietz | 8604485 | 2025-11-29 10:19:03 +0100 | [diff] [blame] | 78 | echo "" |
| 79 | echo "✓ Model $MODEL_NAME successfully preloaded to $TARGET_DIR/$MODEL_NAME" |
| 80 | echo "" |
| 81 | echo "You can now run the container with:" |
| 82 | echo " docker run --rm -i -v $(realpath $TARGET_DIR):/local/models korap/conllu-spacy" |
| 83 | else |
| 84 | echo "✗ Error: Model download failed" |
| 85 | exit 1 |
| 86 | fi |