Author: Martin Donovan | GitHub gist | profile.json
library(DBI)
library(RPostgreSQL)
library(lubridate)
library(purrr)
library(tidyr)
library(glue)
library(dplyr)
library(duckdb)
# Multi-source restaurant data aggregator — joins Aloha POS staging tables
# (gnditem, gndvoid, gndline) with OpenTable reservation data and Restaurant365
# GL transactions already staged in PostgreSQL.
#
# Table relations are only guaranteed consistent within a single day, so each
# day is retrieved and appended separately (get_ot_aloha_data_for_date).
#
# DuckDB cache fallback: get_ot_aloha_data_for_date_range() checks for a local
# DuckDB file first; falls back to remote PostgreSQL if not present or incomplete.
#
# Credentials loaded from .Renviron: DB_HOST, DB_PORT, DB_USER, DB_PASS
get_db_connection <- function() {
dbConnect(
PostgreSQL(),
dbname = Sys.getenv("WINE_DB"),
host = Sys.getenv("DB_HOST"),
port = Sys.getenv("DB_PORT"),
user = Sys.getenv("DB_USER"),
password = Sys.getenv("DB_PASS")
)
}
get_r365_data_for_date_range <- function(start_date, end_date, entity_id) {
con <- get_db_connection()
on.exit(dbDisconnect(con))
dbGetQuery(con,
"select * from r365_transactions where date between $1 and $2 and entity_id = $3",
params = list(format(as.Date(start_date), "%Y-%m-%d"),
format(as.Date(end_date), "%Y-%m-%d"),
entity_id))
}
# Retrieves one day of joined Aloha + OpenTable data from PostgreSQL.
# Runs ~15 queries in a named list via purrr::map, then joins all tables.
get_ot_aloha_data_for_date <- function(query_date, entity_id) {
con <- get_db_connection()
param_list <- list(format(query_date, "%Y-%m-%d"), entity_id)
queries <- list(
ot_query = "select visit_date, visit_time, guest_name, size as ot_size, pos_check_i_ds as pos_check_ids, \"table\" as ot_table from opentable where visit_date = $1 and entity_id = $2",
gnd_line_query = "select 'gndline' as source, FALSE as is_void, FALSE as is_normal_sale, checkid as \"check\", itemid as item, catid as category, grind_date::date, type as cmp_promo_type, typeid as cmp_promo_type_id, price as cmp_promo_price, oprice as cmp_promo_oprice from alohadbf_gndline where grind_date = $1 and entity_id = $2",
gnd_item_query = "select 'gnditem' as source, TRUE as is_normal_sale, FALSE as is_void, FALSE as is_comp, FALSE as is_promo, grind_date::date, \"check\", item, price as gnd_item_price, quantity, tableid, revid, termid, employee, seat, hour, minute, origin, category from alohadbf_gnditem where grind_date = $1 and entity_id = $2",
gnd_void_query = "select 'gndvoid' as source, \"check\" as check, TRUE as is_void, FALSE as is_comp, FALSE as is_promo, FALSE as is_normal_sale, inventory as void_inventory, tablename, item, price as void_price, hour as void_hour, minute as void_minute, reason, manager as void_manager, grind_date::date from alohadbf_gndvoid where grind_date = $1 and entity_id = $2",
itm_query = "select id as item_id, usernumber, shortname, chitname, longname, cost, price as price_from_itm from alohadbf_itm where grind_date = $1 and entity_id = $2",
trm_query = "select id as terminal_id, name as terminal_name, revenue as revenue_id from alohadbf_trm where grind_date = $1 and entity_id = $2",
emp_query = "select id as employee_id, firstname as emp_first_name, lastname as emp_last_name from alohadbf_emp where grind_date = $1 and entity_id = $2",
tndr_query = "select \"check\" as tndr_check, amount as tndr_amount, tip as tndr_tip from alohadbf_gndtndr where grind_date = $1 and entity_id = $2",
cat_query = "select id as category_id, name as category_name from alohadbf_cat where grind_date = $1 and entity_id = $2",
rev_query = "select name as revenue_name, id as revenue_id from alohadbf_rev where grind_date = $1 and entity_id = $2",
table_query = "select id as table_id, \"desc\", revcenter as tbl_revcenter, numseats from alohadbf_tab where grind_date = $1 and entity_id = $2",
gnd_sale_query = "SELECT DISTINCT ON (\"check\") \"check\", openhour, openmin, closehour, closemin FROM alohadbf_gndsale where grind_date = $1 and entity_id = $2 and \"check\" > 0 ORDER BY \"check\", closehour DESC, closemin DESC",
rsn_query = "select id as rsn_id, name as void_reason from alohadbf_rsn where grind_date = $1 and entity_id = $2",
cmp_query = "select distinct id as cmp_id, name as cmp_name from alohadbf_cmp where grind_date = $1 and entity_id = $2",
pro_query = "select distinct id as pro_id, name as pro_name from alohadbf_pro where grind_date = $1 and entity_id = $2"
)
result_dfs <- queries %>%
map(~ dbGetQuery(con, .x, params = param_list)) %>%
set_names(paste0(names(queries), "_df"))
dbDisconnect(con)
# Expand OpenTable check IDs (pipe-delimited string → one row per check)
result_dfs$ot_query_df <- result_dfs$ot_query_df %>%
separate_longer_delim(pos_check_ids, delim = "|") %>%
filter(!is.na(pos_check_ids) & pos_check_ids != "") %>%
mutate(pos_check_ids = as.integer(pos_check_ids)) %>%
rename(pos_check_id = pos_check_ids)
# gndline type: 2 = promo, 3 = comp — join to cmp/pro tables for names
gndline_cmp_promo <- result_dfs$gnd_line_query_df %>%
mutate(is_comp = (cmp_promo_type == 3),
is_promo = (cmp_promo_type == 2)) %>%
left_join(result_dfs$cmp_query_df %>% select(cmp_id, cmp_name),
by = c("cmp_promo_type_id" = "cmp_id")) %>%
left_join(result_dfs$pro_query_df %>% select(pro_id, pro_name),
by = c("cmp_promo_type_id" = "pro_id")) %>%
mutate(comp_promo_code = case_when(
cmp_promo_type == 3 ~ cmp_name,
cmp_promo_type == 2 ~ pro_name,
TRUE ~ NA_character_)) %>%
select(-c(cmp_name, pro_name, cmp_promo_type, cmp_promo_type_id))
# Join void reason names onto gndvoid
result_dfs$gnd_void_query_df <- left_join(
result_dfs$gnd_void_query_df, result_dfs$rsn_query_df,
by = c("reason" = "rsn_id"))
# Bind gnditem + gndvoid + gndline, then join all lookup tables
list(result_dfs$gnd_item_query_df,
result_dfs$gnd_void_query_df,
gndline_cmp_promo) %>%
purrr::keep(~ nrow(.) > 0) %>%
dplyr::bind_rows() %>%
left_join(result_dfs$itm_query_df, by = c("item" = "item_id")) %>%
left_join(result_dfs$ot_query_df, by = c("check" = "pos_check_id")) %>%
left_join(result_dfs$trm_query_df, by = c("termid" = "terminal_id")) %>%
left_join(result_dfs$cat_query_df, by = c("category" = "category_id")) %>%
left_join(result_dfs$emp_query_df, by = c("employee" = "employee_id")) %>%
left_join(result_dfs$rev_query_df, by = c("revid" = "revenue_id")) %>%
left_join(
result_dfs$tndr_query_df %>%
group_by(tndr_check) %>%
summarise(tndr_amount = max(tndr_amount), tndr_tip = max(tndr_tip)),
by = c("check" = "tndr_check")) %>%
filter(category_name != "RETAIL" | is.na(category_name)) %>%
mutate(ot_date_time = parse_date_time(
paste(visit_time, visit_date, sep = " "), orders = "IMp ymd")) %>%
arrange(grind_date, check, hour, minute) %>%
relocate(grind_date, check, guest_name, emp_first_name, emp_last_name,
tndr_amount, tndr_tip, shortname, revenue_name, category_name,
terminal_name, is_normal_sale, is_void, is_comp, is_promo)
}
# Iterates a date sequence; uses possibly() to skip days with no data
get_ot_aloha_data_from_remote_db_for_date_range <- function(start_date, end_date, entity_id) {
seq(from = as.Date(start_date), to = as.Date(end_date), by = "day") %>%
map(possibly(
~ get_ot_aloha_data_for_date(.x, entity_id) %>%
mutate(grind_date = as.Date(grind_date, format = "%Y-%m-%d")),
otherwise = NULL)) %>%
compact() %>%
keep(~ nrow(.) > 0) %>%
map(~ mutate(., grind_date = as.Date(grind_date))) %>%
bind_rows()
}
# Reads from a local DuckDB file if present and covering the full date range;
# returns NULL if the file is missing, the table is absent, or data is incomplete.
get_ot_aloha_data_from_duckdb_for_date_range <- function(start_date, end_date, entity_id) {
duckdb_path <- "database_file.duckdb"
table_name <- "2024_data"
date_col <- "grind_date"
start_date <- as.Date(start_date)
end_date <- as.Date(end_date)
if (!file.exists(duckdb_path)) return(NULL)
con <- tryCatch(
dbConnect(duckdb(), dbdir = duckdb_path, read_only = TRUE),
error = function(e) NULL)
if (is.null(con)) return(NULL)
on.exit(dbDisconnect(con, shutdown = TRUE))
if (!dbExistsTable(con, table_name)) return(NULL)
coverage <- tryCatch(dbGetQuery(con, sprintf(
"SELECT MIN(%s) AS min_date, MAX(%s) AS max_date, COUNT(*) AS n
FROM \"%s\" WHERE %s BETWEEN '%s' AND '%s'",
date_col, date_col, table_name, date_col, start_date, end_date)),
error = function(e) NULL)
if (is.null(coverage) || coverage$n == 0) return(NULL)
if (as.Date(coverage$min_date) > start_date ||
as.Date(coverage$max_date) < end_date) return(NULL)
dbGetQuery(con, sprintf(
"SELECT * FROM \"%s\" WHERE %s BETWEEN '%s' AND '%s' ORDER BY %s",
table_name, date_col, start_date, end_date, date_col))
}
# Public entry point: DuckDB cache first, remote PostgreSQL as fallback
get_ot_aloha_data_for_date_range <- function(start_date, end_date, entity_id) {
df <- get_ot_aloha_data_from_duckdb_for_date_range(start_date, end_date, entity_id)
if (is.null(df)) {
df <- get_ot_aloha_data_from_remote_db_for_date_range(start_date, end_date, entity_id)
}
df
}