| #!/usr/bin/env Rscript |
| |
| suppressPackageStartupMessages({ |
| library(DBI) |
| library(duckdb) |
| library(tibble) |
| }) |
| |
| # Public function: safe to source in RStudio |
| # Returns a tibble from DuckDB using either a table name or a SQL query |
| duckdb2tibble <- function( |
| db = "/home/kupietz/korap4dnb/epub2i5/data/deliko_metadata.duckdb", |
| table = "deliko_metadata", |
| sql = "", |
| limit = 0, |
| read_only = TRUE) { |
| stopifnot(is.character(db), length(db) == 1) |
| if (!file.exists(db)) stop("DuckDB not found: ", db) |
| |
| drv <- duckdb::duckdb() |
| con <- DBI::dbConnect(drv, dbdir = db, read_only = isTRUE(read_only)) |
| on.exit( |
| { |
| try(DBI::dbDisconnect(con, shutdown = FALSE), silent = TRUE) |
| try(duckdb::duckdb_shutdown(drv), silent = TRUE) |
| }, |
| add = TRUE |
| ) |
| |
| if (nzchar(sql)) { |
| res <- DBI::dbGetQuery(con, sql) |
| return(tibble::as_tibble(res)) |
| } |
| |
| q_schema <- DBI::dbQuoteIdentifier(con, "main") |
| q_table <- DBI::dbQuoteIdentifier(con, table) |
| fq_name <- paste0(as.character(q_schema), ".", as.character(q_table)) |
| q <- paste0("SELECT * FROM ", fq_name) |
| if (is.numeric(limit) && limit > 0) q <- paste0(q, " LIMIT ", as.integer(limit)) |
| |
| tibble::as_tibble(DBI::dbGetQuery(con, q)) |
| } |
| |
| # CLI mode only: run when invoked via Rscript |
| if (!interactive()) { |
| defaults <- list( |
| db = "/home/kupietz/korap4dnb/epub2i5/data/deliko_metadata.duckdb", |
| table = "deliko_metadata", |
| sql = "", |
| limit = "0", |
| output = "" |
| ) |
| opts <- defaults |
| args <- commandArgs(trailingOnly = TRUE) |
| for (a in args) { |
| if (!grepl("=", a, fixed = TRUE)) next |
| kv <- strsplit(a, "=", fixed = TRUE)[[1]] |
| if (length(kv) == 2 && kv[1] %in% names(opts)) opts[[kv[1]]] <- kv[2] |
| } |
| |
| # ENV overrides |
| opts$db <- Sys.getenv("DELIKO_DUCKDB", opts$db) |
| opts$table <- Sys.getenv("DELIKO_TABLE", opts$table) |
| opts$sql <- Sys.getenv("DELIKO_SQL", opts$sql) |
| opts$limit <- Sys.getenv("LIMIT", opts$limit) |
| opts$output <- Sys.getenv("OUT_RDS", opts$output) |
| |
| message("Connecting DuckDB: ", opts$db) |
| df <- duckdb2tibble( |
| db = opts$db, |
| table = opts$table, |
| sql = opts$sql, |
| limit = suppressWarnings(as.integer(opts$limit)), |
| read_only = TRUE |
| ) |
| |
| message("Loaded tibble: ", nrow(df), " rows x ", ncol(df), " cols") |
| message("Columns: ", paste(names(df), collapse = ", ")) |
| print(utils::head(df, n = if (nrow(df) > 10) 10 else nrow(df))) |
| |
| if (nzchar(opts$output)) { |
| saveRDS(df, opts$output) |
| message("Saved tibble to ", opts$output) |
| } |
| } |