Load library
library(tidyverse)
library(RPostgres)
library(DBI)
Connect to PostgreSQL
Connect Method 1 with RPostgres
con1 <- DBI::dbConnect(RPostgres::Postgres(),
dbname = "testdb1",
password = rstudioapi::askForPassword("Database password"))
dbListTables(con1)
#dbWriteTable(con1, "mtcars", mtcars)
#dbWriteTable(con1, "flights", nycflights13::flights)
rstudioapi::askForPassword
requires the password input
- In the command line (or block-run), there will be popout dialog to input password,
- In the knit mode,
render
the RMarkdown file as follows:
rmarkdown::render("MyDocument.Rmd", params = "ask")
See Parameter User Interfaces - In the
blogdown
,server_site
will halt at the above block.
Connect Method 2 with RPostgreSQL
library(RPostgreSQL)
drv <- dbDriver("PostgreSQL")
con2 <- dbConnect(drv,
host = "localhost",
dbname = "testdb1",
user = "cxiao",
password = rstudioapi::askForPassword("Database password"))
dbListTables(con2)
Connect Method 3 with RPostgres
and config.yml
Sys.setenv(R_CONFIG_ACTIVE = "AWS")
dw <- config::get(file = "config.yml", "dw")
conAWS1 <- dbConnect(RPostgres::Postgres(),
dbname = dw$database,
user = dw$uid,
password = dw$pwd,
host = dw$server,
port = 5432)
dbListTables(conAWS1)
Connect Method 4 with odbc
and config.yml
conAWS2 <- dbConnect(odbc::odbc(),
driver = dw$driver,
database = dw$database,
uid = dw$uid,
pwd = dw$pwd,
server = dw$server,
port = 5432)
dbListTables(conAWS1)
Connect Method 5 with odbc
and DSN configuraton
conAWS3 <- dbConnect(odbc::odbc(), "AWSPostgreSQL")
dbListTables(conAWS3)
Query Methods
library(DBI)
con3 <- dbConnect(odbc::odbc(), "PostgreSQL")
dbListTables(con3)
## [1] "flights" "mall_sale" "mtcars"
SQL ETL
SELECT * FROM mtcars LIMIT 10
mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb |
---|---|---|---|---|---|---|---|---|---|---|
21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 |
21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 |
18.1 | 6 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 |
14.3 | 8 | 360.0 | 245 | 3.21 | 3.570 | 15.84 | 0 | 0 | 3 | 4 |
24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 |
22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 |
19.2 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 |
SQL in R
dbGetQuery(con3, "SELECT * FROM mtcars LIMIT 10")
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
dplyr
query baced by dbplyr
example 1
mtcars_db <- tbl(con3, "mtcars")
top2 <- mtcars_db %>%
group_by(cyl) %>%
# slice not working for database
filter(between(row_number(), 1, 2))
example 2
flights_db <- tbl(con3, "flights")
flights_db
## # Source: table<flights> [?? x 19]
## # Database: postgres [cxiao@localhost:/testdb1]
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 517 515 2 830
## 2 2013 1 1 533 529 4 850
## 3 2013 1 1 542 540 2 923
## 4 2013 1 1 544 545 -1 1004
## 5 2013 1 1 554 600 -6 812
## 6 2013 1 1 554 558 -4 740
## 7 2013 1 1 555 600 -5 913
## 8 2013 1 1 557 600 -3 709
## 9 2013 1 1 557 600 -3 838
## 10 2013 1 1 558 600 -2 753
## # ... with more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights_db %>%
tally()
## # Source: lazy query [?? x 1]
## # Database: postgres [cxiao@localhost:/testdb1]
## n
## <S3: integer64>
## 1 336776