originTraceR: default to English prompt and add error messages
diff --git a/scripts/originTraceR.R b/scripts/originTraceR.R
index f064fa5..06ca031 100644
--- a/scripts/originTraceR.R
+++ b/scripts/originTraceR.R
@@ -46,27 +46,52 @@
translator = NA_character_,
publisher = NA_character_,
pubDate = NA_character_,
- ISBN = NA_character_) {
- fields <- c(
- sprintf("Titel: %s", dplyr::coalesce(title, "")),
- sprintf("Untertitel: %s", dplyr::coalesce(subTitle, "")),
- sprintf("Autor/in: %s", dplyr::coalesce(author, "")),
- sprintf("Übersetzer/in: %s", dplyr::coalesce(translator, "")),
- sprintf("Verlag: %s", dplyr::coalesce(publisher, "")),
- sprintf("Erscheinungsjahr (de): %s", dplyr::coalesce(pubDate, "")),
- sprintf("ISBN: %s", dplyr::coalesce(ISBN, ""))
- )
- context <- paste(fields, collapse = "\n")
- paste0(
- "Du bist ein bibliographischer Assistent. Bestimme für das folgende deutschsprachige Buch:",
- "\n1) Ob es eine Übersetzung ist.",
- "\n2) Falls ja: den Originaltitel und das Jahr der Erstveröffentlichung.",
- "\n\nGib ausschließlich ein einzelnes JSON-Objekt zurück – ohne Begleittext – mit exakt diesen Schlüsseln:",
- "\n{\n \"is_translation\": <boolean>,\n \"original_title\": <string|null>,\n \"original_publication_year\": <integer|null>,\n \"confidence\": <number zwischen 0 und 1>\n}",
- "\nNutze null für Unbekanntes. Antworte nur mit JSON.",
- "\n\nMetadaten:\n",
- context
- )
+ ISBN = NA_character_,
+ prompt_language = c("en", "de")) {
+ prompt_language <- match.arg(prompt_language)
+ if (prompt_language == "de") {
+ fields <- c(
+ sprintf("Titel: %s", dplyr::coalesce(title, "")),
+ sprintf("Untertitel: %s", dplyr::coalesce(subTitle, "")),
+ sprintf("Autor/in: %s", dplyr::coalesce(author, "")),
+ sprintf("Übersetzer/in: %s", dplyr::coalesce(translator, "")),
+ sprintf("Verlag: %s", dplyr::coalesce(publisher, "")),
+ sprintf("Erscheinungsjahr (de): %s", dplyr::coalesce(pubDate, "")),
+ sprintf("ISBN: %s", dplyr::coalesce(ISBN, ""))
+ )
+ context <- paste(fields, collapse = "\n")
+ paste0(
+ "Du bist ein bibliographischer Assistent. Bestimme für das folgende deutschsprachige Buch:",
+ "\n1) Ob es eine Übersetzung ist.",
+ "\n2) Falls ja: den Originaltitel und das Jahr der Erstveröffentlichung.",
+ "\n\nGib ausschließlich ein einzelnes JSON-Objekt zurück – ohne Begleittext – mit exakt diesen Schlüsseln:",
+ "\n{\n \"is_translation\": <boolean>,\n \"original_title\": <string|null>,\n \"original_publication_year\": <integer|null>,\n \"confidence\": <number zwischen 0 und 1>\n}",
+ "\nNutze null für Unbekanntes. Antworte nur mit JSON.",
+ "\n\nMetadaten:\n",
+ context
+ )
+ } else {
+ fields <- c(
+ sprintf("Title: %s", dplyr::coalesce(title, "")),
+ sprintf("Subtitle: %s", dplyr::coalesce(subTitle, "")),
+ sprintf("Author: %s", dplyr::coalesce(author, "")),
+ sprintf("Translator: %s", dplyr::coalesce(translator, "")),
+ sprintf("Publisher: %s", dplyr::coalesce(publisher, "")),
+ sprintf("Publication year (German edition): %s", dplyr::coalesce(pubDate, "")),
+ sprintf("ISBN: %s", dplyr::coalesce(ISBN, ""))
+ )
+ context <- paste(fields, collapse = "\n")
+ paste0(
+ "You are a bibliographic assistant. For the following German-language book, determine:",
+ "\n1) Whether it is a translation.",
+ "\n2) If yes: the original title and the year of first publication.",
+ "\n\nReturn a single JSON object only — no prose — with exactly these keys:",
+ "\n{\n \"is_translation\": <boolean>,\n \"original_title\": <string|null>,\n \"original_publication_year\": <integer|null>,\n \"confidence\": <number between 0 and 1>\n}",
+ "\nUse null for unknown. Output JSON only.",
+ "\n\nMetadata:\n",
+ context
+ )
+ }
}
# Resolve tidyllm provider from model prefix
@@ -127,7 +152,8 @@
translator = translator,
publisher = publisher,
pubDate = pubDate,
- ISBN = ISBN
+ ISBN = ISBN,
+ prompt_language = "en"
)
# Resolve provider if not supplied
@@ -135,9 +161,11 @@
provider <- resolve_provider_for_model(model)
}
- result <- try(
+ result <- NULL
+ chat_err <- NULL
+ tryCatch(
{
- tidyllm::llm_message(prompt) |>
+ result <- tidyllm::llm_message(prompt) |>
tidyllm::chat(
.provider = provider,
.model = model,
@@ -146,10 +174,16 @@
.max_tries = max_tries
)
},
- silent = TRUE
+ error = function(e) {
+ chat_err <<- conditionMessage(e)
+ }
)
- if (inherits(result, "try-error")) {
+ if (!is.null(chat_err)) {
+ message(sprintf(
+ "LLM request failed (model=%s, title=\"%s\", author=\"%s\"): %s",
+ model, as.character(title), as.character(author), chat_err
+ ))
return(tibble::tibble(
original_title = NA_character_,
original_publication_year = as.integer(NA),
@@ -198,7 +232,7 @@
augment_metadata_with_original <- function(
df,
provider = NULL,
- model = "deepseek-chat",
+ model = "gemini-2.5-pro",
include_confidence = TRUE,
temperature = 0.1,
max_tries = 3,
@@ -277,7 +311,13 @@
pubDate = input_cols$pubDate[i],
ISBN = input_cols$ISBN[i]
),
- error = function(e) empty_res
+ error = function(e) {
+ message(sprintf(
+ "Row %d failed (model=%s, title=\"%s\", author=\"%s\"): %s",
+ i, model, as.character(input_cols$title[i]), as.character(input_cols$author[i]), conditionMessage(e)
+ ))
+ empty_res
+ }
)
if (is.null(res) || !is.data.frame(res) || nrow(res) < 1) {
res <- empty_res
@@ -305,13 +345,13 @@
}
# df_aug <- augment_metadata_with_original(df, model = "gemini-2.5-pro")
-df_aug <- augment_metadata_with_original(df, model = "deepseek")
+df_aug <- augment_metadata_with_original(df, model = "deepseek-chat")
# # OpenAI
# df_aug <- augment_metadata_with_original(df, model = "gpt-4o-mini")
# # Claude
-#df_aug <- augment_metadata_with_original(df, model = "claude-3-5-sonnet-latest")
+# df_aug <- augment_metadata_with_original(df, model = "claude-3-5-sonnet-latest")
# # Gemini
# df_aug <- augment_metadata_with_original(df, model = "gemini-2.5-pro")