blob: 1ada4a705ce0179005ef3a179c3af9b954bc7ee9 [file] [log] [blame]
Marc Kupietz86044852025-11-29 10:19:03 +01001#!/bin/bash
2
3set -o pipefail
4
5# Default values
6model="de_core_news_lg"
7use_dependencies="True"
8use_germalemma="True"
9
10usage() {
11 echo "Usage: $0 [-h] [-m MODEL] [-L] [-d] [-g]"
12 echo " -h Display this help message"
13 echo " -m MODEL Specify spaCy model (default: $model)"
14 echo " -L List available/installed models"
15 echo " -d Disable dependency parsing (faster processing)"
16 echo " -g Disable GermaLemma (use spaCy lemmatizer only)"
17 exit 1
18}
19
20# Parse command line options
21while getopts "hm:Ldg" opt; do
22 case $opt in
23 h)
24 usage
25 ;;
26 m)
27 model="$OPTARG"
28 ;;
29 L)
30 python -m spacy info 2>/dev/null || echo "No models installed"
31 exit 0
32 ;;
33 d)
34 use_dependencies="False"
35 ;;
36 g)
37 use_germalemma="False"
38 ;;
39 \?)
40 echo "Invalid option: -$OPTARG" >&2
41 usage
42 ;;
43 :)
44 echo "Option -$OPTARG requires an argument" >&2
45 usage
46 ;;
47 esac
48done
49
50if [ $OPTIND -le $# ]; then
51 usage
52fi
53
54MODEL_DIR="/local/models"
55MODEL_PATH="$MODEL_DIR/$model"
56
57# Ensure MODEL_DIR exists
58mkdir -p "$MODEL_DIR"
59
60# Function to check if model is installed and usable
61is_model_installed() {
62 local model_name="$1"
63 # Check if model is installed in the venv
64 python -c "import spacy; spacy.load('$model_name')" 2>/dev/null
65 return $?
66}
67
68# Function to check if preloaded model exists and is valid
69has_preloaded_model() {
70 local model_path="$1"
71 # Check for config.cfg which indicates a valid spaCy model
72 if [ -f "$model_path/config.cfg" ]; then
73 return 0
74 fi
75 return 1
76}
77
78# Function to install model
79install_model() {
80 local model_name="$1"
81
82 # Check if model exists in /local/models - if so, we'll use absolute path
83 if has_preloaded_model "$MODEL_PATH"; then
84 echo "Found preloaded model in $MODEL_PATH" >&2
85 echo "Will use absolute path to avoid download" >&2
86 return 0
87 fi
88
89 # Check if already installed in venv
90 if is_model_installed "$model_name"; then
91 echo "Model $model_name already installed in venv" >&2
92 return 0
93 fi
94
95 # Try to download model to /local/models if writable
96 if [ -w "$MODEL_DIR" ]; then
97 # Download and install to /local/models with progress
98 if python /app/download_with_progress.py "$model_name" 2>&1 | tee /tmp/spacy_download.log >&2; then
99 # Try to move the installed model to /local/models for persistence
100 SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])")
101 INSTALLED_MODEL="$SITE_PACKAGES/$model_name"
102
103 if [ -d "$INSTALLED_MODEL" ]; then
104 echo "Moving model to $MODEL_PATH for persistence..." >&2
105 mv "$INSTALLED_MODEL" "$MODEL_PATH" 2>/dev/null || true
106 # Create symlink back
107 ln -sf "$MODEL_PATH" "$INSTALLED_MODEL" 2>/dev/null || true
108 echo "Model saved to $MODEL_PATH" >&2
109 fi
110 return 0
111 else
112 echo "Failed to download model $model_name" >&2
113 return 1
114 fi
115 else
116 # MODEL_DIR not writable, install to venv (ephemeral)
117 echo "Cannot write to $MODEL_DIR, installing to venv (ephemeral)" >&2
118 if python /app/download_with_progress.py "$model_name" 2>&1 | tee /tmp/spacy_download.log >&2; then
119 return 0
120 else
121 echo "Failed to download model $model_name" >&2
122 return 1
123 fi
124 fi
125}
126
127# Install or verify model
128if ! install_model "$model"; then
129 echo "ERROR: Could not install model $model, aborting." >&2
130 exit 1
131fi
132
133# Determine which model path to use
134# If preloaded model exists, use absolute path; otherwise use model name
135if has_preloaded_model "$MODEL_PATH"; then
136 MODEL_TO_USE="$MODEL_PATH"
137 echo "Using preloaded model at: $MODEL_TO_USE" >&2
138else
139 MODEL_TO_USE="$model"
140 echo "Using installed model: $MODEL_TO_USE" >&2
141fi
142
143# Set environment variables for the Python script
144export SPACY_USE_DEPENDENCIES="$use_dependencies"
145export SPACY_USE_GERMALEMMA="$use_germalemma"
146
147# Log configuration
148echo "Configuration:" >&2
149echo " Model: $MODEL_TO_USE" >&2
150echo " Use dependencies: $use_dependencies" >&2
151echo " Use GermaLemma: $use_germalemma" >&2
152
153# Run the spaCy tagging pipeline
154python /app/systems/parse_spacy_pipe.py \
155 --spacy_model "$MODEL_TO_USE" \
156 --corpus_name "stdin" \
157 --gld_token_type "CoNLLUP_Token" \
158 --comment_str "#"