Add missing duckdb2tibble
diff --git a/scripts/duckdb2tibble.R b/scripts/duckdb2tibble.R
new file mode 100755
index 0000000..490338c
--- /dev/null
+++ b/scripts/duckdb2tibble.R
@@ -0,0 +1,85 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages({
+ library(DBI)
+ library(duckdb)
+ library(tibble)
+})
+
+# Public function: safe to source in RStudio
+# Returns a tibble from DuckDB using either a table name or a SQL query
+duckdb2tibble <- function(
+ db = "/home/kupietz/korap4dnb/epub2i5/data/deliko_metadata.duckdb",
+ table = "deliko_metadata",
+ sql = "",
+ limit = 0,
+ read_only = TRUE) {
+ stopifnot(is.character(db), length(db) == 1)
+ if (!file.exists(db)) stop("DuckDB not found: ", db)
+
+ drv <- duckdb::duckdb()
+ con <- DBI::dbConnect(drv, dbdir = db, read_only = isTRUE(read_only))
+ on.exit(
+ {
+ try(DBI::dbDisconnect(con, shutdown = FALSE), silent = TRUE)
+ try(duckdb::duckdb_shutdown(drv), silent = TRUE)
+ },
+ add = TRUE
+ )
+
+ if (nzchar(sql)) {
+ res <- DBI::dbGetQuery(con, sql)
+ return(tibble::as_tibble(res))
+ }
+
+ q_schema <- DBI::dbQuoteIdentifier(con, "main")
+ q_table <- DBI::dbQuoteIdentifier(con, table)
+ fq_name <- paste0(as.character(q_schema), ".", as.character(q_table))
+ q <- paste0("SELECT * FROM ", fq_name)
+ if (is.numeric(limit) && limit > 0) q <- paste0(q, " LIMIT ", as.integer(limit))
+
+ tibble::as_tibble(DBI::dbGetQuery(con, q))
+}
+
+# CLI mode only: run when invoked via Rscript
+if (!interactive()) {
+ defaults <- list(
+ db = "/home/kupietz/korap4dnb/epub2i5/data/deliko_metadata.duckdb",
+ table = "deliko_metadata",
+ sql = "",
+ limit = "0",
+ output = ""
+ )
+ opts <- defaults
+ args <- commandArgs(trailingOnly = TRUE)
+ for (a in args) {
+ if (!grepl("=", a, fixed = TRUE)) next
+ kv <- strsplit(a, "=", fixed = TRUE)[[1]]
+ if (length(kv) == 2 && kv[1] %in% names(opts)) opts[[kv[1]]] <- kv[2]
+ }
+
+ # ENV overrides
+ opts$db <- Sys.getenv("DELIKO_DUCKDB", opts$db)
+ opts$table <- Sys.getenv("DELIKO_TABLE", opts$table)
+ opts$sql <- Sys.getenv("DELIKO_SQL", opts$sql)
+ opts$limit <- Sys.getenv("LIMIT", opts$limit)
+ opts$output <- Sys.getenv("OUT_RDS", opts$output)
+
+ message("Connecting DuckDB: ", opts$db)
+ df <- duckdb2tibble(
+ db = opts$db,
+ table = opts$table,
+ sql = opts$sql,
+ limit = suppressWarnings(as.integer(opts$limit)),
+ read_only = TRUE
+ )
+
+ message("Loaded tibble: ", nrow(df), " rows x ", ncol(df), " cols")
+ message("Columns: ", paste(names(df), collapse = ", "))
+ print(utils::head(df, n = if (nrow(df) > 10) 10 else nrow(df)))
+
+ if (nzchar(opts$output)) {
+ saveRDS(df, opts$output)
+ message("Saved tibble to ", opts$output)
+ }
+}