Database with R: PostgreSQL

Load library

library(tidyverse)
library(RPostgres)
library(DBI)

Connect to PostgreSQL

Connect Method 1 with RPostgres

con1 <- DBI::dbConnect(RPostgres::Postgres(), 
                      dbname = "testdb1",
                      password = rstudioapi::askForPassword("Database password"))

dbListTables(con1)
#dbWriteTable(con1, "mtcars", mtcars)
#dbWriteTable(con1, "flights", nycflights13::flights)

rstudioapi::askForPassword requires the password input

  • In the command line (or block-run), there will be popout dialog to input password,
  • In the knit mode, render the RMarkdown file as follows:
    rmarkdown::render("MyDocument.Rmd", params = "ask")
    See Parameter User Interfaces
  • In the blogdown, server_site will halt at the above block.

Connect Method 2 with RPostgreSQL

library(RPostgreSQL)
drv <- dbDriver("PostgreSQL")
con2 <- dbConnect(drv, 
                 host = "localhost",
                 dbname = "testdb1",
                 user = "cxiao",
                 password = rstudioapi::askForPassword("Database password"))
dbListTables(con2)

Connect Method 3 with RPostgres and config.yml

Sys.setenv(R_CONFIG_ACTIVE = "AWS")
dw <- config::get(file = "config.yml", "dw")

conAWS1 <- dbConnect(RPostgres::Postgres(),
                     dbname   = dw$database,
                     user      = dw$uid,
                     password = dw$pwd,
                     host     = dw$server,
                     port     = 5432)
dbListTables(conAWS1)

Connect Method 4 with odbc and config.yml

conAWS2 <- dbConnect(odbc::odbc(),
                     driver   = dw$driver,
                     database = dw$database,
                     uid      = dw$uid,
                     pwd      = dw$pwd,
                     server   = dw$server,
                     port     = 5432)

dbListTables(conAWS1)

Connect Method 5 with odbc and DSN configuraton

conAWS3 <- dbConnect(odbc::odbc(), "AWSPostgreSQL")
dbListTables(conAWS3)

Query Methods

library(DBI)
con3 <- dbConnect(odbc::odbc(), "PostgreSQL")
dbListTables(con3)
## [1] "flights"   "mall_sale" "mtcars"

SQL ETL

SELECT * FROM mtcars LIMIT 10
Table 1: Displaying records 1 - 10
mpg cyl disp hp drat wt qsec vs am gear carb
21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4

SQL in R

dbGetQuery(con3, "SELECT * FROM mtcars LIMIT 10")
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## 6  18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## 7  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## 8  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 9  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 10 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4

dplyr query baced by dbplyr

example 1

mtcars_db <- tbl(con3, "mtcars")

top2 <- mtcars_db %>% 
  group_by(cyl) %>%
# slice not working for database 
  filter(between(row_number(), 1, 2)) 

example 2

flights_db <- tbl(con3, "flights")

flights_db
## # Source:   table<flights> [?? x 19]
## # Database: postgres [cxiao@localhost:/testdb1]
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      517            515         2      830
##  2  2013     1     1      533            529         4      850
##  3  2013     1     1      542            540         2      923
##  4  2013     1     1      544            545        -1     1004
##  5  2013     1     1      554            600        -6      812
##  6  2013     1     1      554            558        -4      740
##  7  2013     1     1      555            600        -5      913
##  8  2013     1     1      557            600        -3      709
##  9  2013     1     1      557            600        -3      838
## 10  2013     1     1      558            600        -2      753
## # ... with more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
flights_db %>% 
  tally()
## # Source:   lazy query [?? x 1]
## # Database: postgres [cxiao@localhost:/testdb1]
##   n              
##   <S3: integer64>
## 1 336776

Related