Author: Martin Donovan | GitHub gist | profile.json

Multi-source restaurant data aggregator — R: joins Aloha POS staging tables (gnditem, gndvoid, gndline) with OpenTable reservation data and Restaurant365 GL transactions. Runs per-day due to Aloha's day-level consistency constraint. Features DuckDB local cache with PostgreSQL fallback, purrr::possibly for error-safe date iteration, and comp/promo resolution via gndline type codes.

library(DBI)
library(RPostgreSQL)
library(lubridate)
library(purrr)
library(tidyr)
library(glue)
library(dplyr)
library(duckdb)

# Multi-source restaurant data aggregator — joins Aloha POS staging tables
# (gnditem, gndvoid, gndline) with OpenTable reservation data and Restaurant365
# GL transactions already staged in PostgreSQL.
#
# Table relations are only guaranteed consistent within a single day, so each
# day is retrieved and appended separately (get_ot_aloha_data_for_date).
#
# DuckDB cache fallback: get_ot_aloha_data_for_date_range() checks for a local
# DuckDB file first; falls back to remote PostgreSQL if not present or incomplete.
#
# Credentials loaded from .Renviron: DB_HOST, DB_PORT, DB_USER, DB_PASS

get_db_connection <- function() {
  dbConnect(
    PostgreSQL(),
    dbname   = Sys.getenv("WINE_DB"),
    host     = Sys.getenv("DB_HOST"),
    port     = Sys.getenv("DB_PORT"),
    user     = Sys.getenv("DB_USER"),
    password = Sys.getenv("DB_PASS")
  )
}

get_r365_data_for_date_range <- function(start_date, end_date, entity_id) {
  con <- get_db_connection()
  on.exit(dbDisconnect(con))
  dbGetQuery(con,
    "select * from r365_transactions where date between $1 and $2 and entity_id = $3",
    params = list(format(as.Date(start_date), "%Y-%m-%d"),
                  format(as.Date(end_date),   "%Y-%m-%d"),
                  entity_id))
}

# Retrieves one day of joined Aloha + OpenTable data from PostgreSQL.
# Runs ~15 queries in a named list via purrr::map, then joins all tables.
get_ot_aloha_data_for_date <- function(query_date, entity_id) {
  con        <- get_db_connection()
  param_list <- list(format(query_date, "%Y-%m-%d"), entity_id)

  queries <- list(
    ot_query      = "select visit_date, visit_time, guest_name, size as ot_size, pos_check_i_ds as pos_check_ids, \"table\" as ot_table from opentable where visit_date = $1 and entity_id = $2",
    gnd_line_query = "select 'gndline' as source, FALSE as is_void, FALSE as is_normal_sale, checkid as \"check\", itemid as item, catid as category, grind_date::date, type as cmp_promo_type, typeid as cmp_promo_type_id, price as cmp_promo_price, oprice as cmp_promo_oprice from alohadbf_gndline where grind_date = $1 and entity_id = $2",
    gnd_item_query = "select 'gnditem' as source, TRUE as is_normal_sale, FALSE as is_void, FALSE as is_comp, FALSE as is_promo, grind_date::date, \"check\", item, price as gnd_item_price, quantity, tableid, revid, termid, employee, seat, hour, minute, origin, category from alohadbf_gnditem where grind_date = $1 and entity_id = $2",
    gnd_void_query = "select 'gndvoid' as source, \"check\" as check, TRUE as is_void, FALSE as is_comp, FALSE as is_promo, FALSE as is_normal_sale, inventory as void_inventory, tablename, item, price as void_price, hour as void_hour, minute as void_minute, reason, manager as void_manager, grind_date::date from alohadbf_gndvoid where grind_date = $1 and entity_id = $2",
    itm_query     = "select id as item_id, usernumber, shortname, chitname, longname, cost, price as price_from_itm from alohadbf_itm where grind_date = $1 and entity_id = $2",
    trm_query     = "select id as terminal_id, name as terminal_name, revenue as revenue_id from alohadbf_trm where grind_date = $1 and entity_id = $2",
    emp_query     = "select id as employee_id, firstname as emp_first_name, lastname as emp_last_name from alohadbf_emp where grind_date = $1 and entity_id = $2",
    tndr_query    = "select \"check\" as tndr_check, amount as tndr_amount, tip as tndr_tip from alohadbf_gndtndr where grind_date = $1 and entity_id = $2",
    cat_query     = "select id as category_id, name as category_name from alohadbf_cat where grind_date = $1 and entity_id = $2",
    rev_query     = "select name as revenue_name, id as revenue_id from alohadbf_rev where grind_date = $1 and entity_id = $2",
    table_query   = "select id as table_id, \"desc\", revcenter as tbl_revcenter, numseats from alohadbf_tab where grind_date = $1 and entity_id = $2",
    gnd_sale_query = "SELECT DISTINCT ON (\"check\") \"check\", openhour, openmin, closehour, closemin FROM alohadbf_gndsale where grind_date = $1 and entity_id = $2 and \"check\" > 0 ORDER BY \"check\", closehour DESC, closemin DESC",
    rsn_query     = "select id as rsn_id, name as void_reason from alohadbf_rsn where grind_date = $1 and entity_id = $2",
    cmp_query     = "select distinct id as cmp_id, name as cmp_name from alohadbf_cmp where grind_date = $1 and entity_id = $2",
    pro_query     = "select distinct id as pro_id, name as pro_name from alohadbf_pro where grind_date = $1 and entity_id = $2"
  )

  result_dfs <- queries %>%
    map(~ dbGetQuery(con, .x, params = param_list)) %>%
    set_names(paste0(names(queries), "_df"))
  dbDisconnect(con)

  # Expand OpenTable check IDs (pipe-delimited string → one row per check)
  result_dfs$ot_query_df <- result_dfs$ot_query_df %>%
    separate_longer_delim(pos_check_ids, delim = "|") %>%
    filter(!is.na(pos_check_ids) & pos_check_ids != "") %>%
    mutate(pos_check_ids = as.integer(pos_check_ids)) %>%
    rename(pos_check_id = pos_check_ids)

  # gndline type: 2 = promo, 3 = comp — join to cmp/pro tables for names
  gndline_cmp_promo <- result_dfs$gnd_line_query_df %>%
    mutate(is_comp  = (cmp_promo_type == 3),
           is_promo = (cmp_promo_type == 2)) %>%
    left_join(result_dfs$cmp_query_df %>% select(cmp_id, cmp_name),
              by = c("cmp_promo_type_id" = "cmp_id")) %>%
    left_join(result_dfs$pro_query_df %>% select(pro_id, pro_name),
              by = c("cmp_promo_type_id" = "pro_id")) %>%
    mutate(comp_promo_code = case_when(
      cmp_promo_type == 3 ~ cmp_name,
      cmp_promo_type == 2 ~ pro_name,
      TRUE ~ NA_character_)) %>%
    select(-c(cmp_name, pro_name, cmp_promo_type, cmp_promo_type_id))

  # Join void reason names onto gndvoid
  result_dfs$gnd_void_query_df <- left_join(
    result_dfs$gnd_void_query_df, result_dfs$rsn_query_df,
    by = c("reason" = "rsn_id"))

  # Bind gnditem + gndvoid + gndline, then join all lookup tables
  list(result_dfs$gnd_item_query_df,
       result_dfs$gnd_void_query_df,
       gndline_cmp_promo) %>%
    purrr::keep(~ nrow(.) > 0) %>%
    dplyr::bind_rows() %>%
    left_join(result_dfs$itm_query_df,  by = c("item"     = "item_id")) %>%
    left_join(result_dfs$ot_query_df,   by = c("check"    = "pos_check_id")) %>%
    left_join(result_dfs$trm_query_df,  by = c("termid"   = "terminal_id")) %>%
    left_join(result_dfs$cat_query_df,  by = c("category" = "category_id")) %>%
    left_join(result_dfs$emp_query_df,  by = c("employee" = "employee_id")) %>%
    left_join(result_dfs$rev_query_df,  by = c("revid"    = "revenue_id")) %>%
    left_join(
      result_dfs$tndr_query_df %>%
        group_by(tndr_check) %>%
        summarise(tndr_amount = max(tndr_amount), tndr_tip = max(tndr_tip)),
      by = c("check" = "tndr_check")) %>%
    filter(category_name != "RETAIL" | is.na(category_name)) %>%
    mutate(ot_date_time = parse_date_time(
             paste(visit_time, visit_date, sep = " "), orders = "IMp ymd")) %>%
    arrange(grind_date, check, hour, minute) %>%
    relocate(grind_date, check, guest_name, emp_first_name, emp_last_name,
             tndr_amount, tndr_tip, shortname, revenue_name, category_name,
             terminal_name, is_normal_sale, is_void, is_comp, is_promo)
}

# Iterates a date sequence; uses possibly() to skip days with no data
get_ot_aloha_data_from_remote_db_for_date_range <- function(start_date, end_date, entity_id) {
  seq(from = as.Date(start_date), to = as.Date(end_date), by = "day") %>%
    map(possibly(
      ~ get_ot_aloha_data_for_date(.x, entity_id) %>%
          mutate(grind_date = as.Date(grind_date, format = "%Y-%m-%d")),
      otherwise = NULL)) %>%
    compact() %>%
    keep(~ nrow(.) > 0) %>%
    map(~ mutate(., grind_date = as.Date(grind_date))) %>%
    bind_rows()
}

# Reads from a local DuckDB file if present and covering the full date range;
# returns NULL if the file is missing, the table is absent, or data is incomplete.
get_ot_aloha_data_from_duckdb_for_date_range <- function(start_date, end_date, entity_id) {
  duckdb_path <- "database_file.duckdb"
  table_name  <- "2024_data"
  date_col    <- "grind_date"
  start_date  <- as.Date(start_date)
  end_date    <- as.Date(end_date)

  if (!file.exists(duckdb_path)) return(NULL)

  con <- tryCatch(
    dbConnect(duckdb(), dbdir = duckdb_path, read_only = TRUE),
    error = function(e) NULL)
  if (is.null(con)) return(NULL)
  on.exit(dbDisconnect(con, shutdown = TRUE))

  if (!dbExistsTable(con, table_name)) return(NULL)

  coverage <- tryCatch(dbGetQuery(con, sprintf(
    "SELECT MIN(%s) AS min_date, MAX(%s) AS max_date, COUNT(*) AS n
     FROM \"%s\" WHERE %s BETWEEN '%s' AND '%s'",
    date_col, date_col, table_name, date_col, start_date, end_date)),
    error = function(e) NULL)

  if (is.null(coverage) || coverage$n == 0) return(NULL)
  if (as.Date(coverage$min_date) > start_date ||
      as.Date(coverage$max_date) < end_date)  return(NULL)

  dbGetQuery(con, sprintf(
    "SELECT * FROM \"%s\" WHERE %s BETWEEN '%s' AND '%s' ORDER BY %s",
    table_name, date_col, start_date, end_date, date_col))
}

# Public entry point: DuckDB cache first, remote PostgreSQL as fallback
get_ot_aloha_data_for_date_range <- function(start_date, end_date, entity_id) {
  df <- get_ot_aloha_data_from_duckdb_for_date_range(start_date, end_date, entity_id)
  if (is.null(df)) {
    df <- get_ot_aloha_data_from_remote_db_for_date_range(start_date, end_date, entity_id)
  }
  df
}