OSDN > Developer > larry77 > Chamber > myprojects-hg-reborn > 提交

myprojects-hg-reborn
Fork

(Original repository, No fork origin)

提交

Commit MetaInfo

修订版	501a2b56575175faf3d325786395c1e2ce9a51db (tree)
时间	2022-07-07 00:00:33
作者	Lorenzo Isella <lorenzo.isella@gmai...>
Commiter	Lorenzo Isella

Log Message

A script showing the revision of the csv_to_sqlite function.

更改概述

add: R-codes/revise_create_sqlite.R (diff)

差异

diff -r c2a9e93444f3 -r 501a2b565751 R-codes/revise_create_sqlite.R

--- /dev/null Thu Jan 01 00:00:00 1970 +0000

+++ b/R-codes/revise_create_sqlite.R Wed Jul 06 17:00:33 2022 +0200

		@@ -0,0 +1,212 @@
	1	+rm(list=ls())
	2	+
	3	+library(tidyverse)
	4	+
	5	+library(DBI) # main DB interface
	6	+library(dbplyr) # dplyr back-end for DBs
	7	+library(RSQLite)
	8	+library(lubridate)
	9	+
	10	+
	11	+
	12	+csv_to_sqlite <- function(csv_file, sqlite_file, table_name,
	13	+ delim = ",",
	14	+ pre_process_size = 1000, chunk_size = 50000,
	15	+ show_progress_bar = TRUE, ...) {
	16	+ con <- dbConnect(SQLite(), dbname = sqlite_file)
	17	+
	18	+ # read a first chunk of data to extract the colnames and types
	19	+ # to figure out the date and the datetime columns
	20	+ df <- read_delim(csv_file, delim = delim, n_max = pre_process_size, ...)
	21	+ date_cols <- df %>%
	22	+ select_if(is.Date) %>%
	23	+ colnames()
	24	+ datetime_cols <- df %>%
	25	+ select_if(is.POSIXt) %>%
	26	+ colnames()
	27	+
	28	+ # write the first batch of lines to SQLITE table, converting dates to string
	29	+ # representation
	30	+ df <- df %>%
	31	+ mutate_at(.vars = date_cols, .funs = as.character.Date) %>%
	32	+ mutate_at(.vars = datetime_cols, .funs = as.character.POSIXt)
	33	+ dbWriteTable(con, table_name, df, overwrite = TRUE)
	34	+
	35	+ # readr chunk functionality
	36	+ read_delim_chunked(
	37	+ csv_file,
	38	+ callback = append_to_sqlite(con = con, table_name = table_name,
	39	+ date_cols = date_cols,
	40	+ datetime_cols = datetime_cols),
	41	+ delim = delim,
	42	+ skip = pre_process_size, chunk_size = chunk_size,
	43	+ progress = show_progress_bar,
	44	+ col_names = colnames(df), ...)
	45	+ dbDisconnect(con)
	46	+}
	47	+
	48	+#' Callback function that appends new sections to the SQLite table.
	49	+#' @param con A valid connection to SQLite database.
	50	+#' @param table_name Name of the table to store the data table in the sqlite
	51	+#' database.
	52	+#' @param date_cols Name of columns containing Date objects
	53	+#' @param datetime_cols Name of columns containint POSIXt objects.
	54	+#'
	55	+#' @keywords internal
	56	+append_to_sqlite <- function(con, table_name,
	57	+ date_cols, datetime_cols) {
	58	+ #' @param x Data.frame we are reading from.
	59	+ function(x, pos) {
	60	+
	61	+ x <- as.data.frame(x)
	62	+ x <- x %>%
	63	+ mutate_at(.vars = date_cols, .funs = as.character.Date) %>%
	64	+ mutate_at(.vars = datetime_cols, .funs = as.character.POSIXt)
	65	+ # append data frame to table
	66	+ dbWriteTable(con, table_name, x, append = TRUE)
	67	+
	68	+ }
	69	+}
	70	+
	71	+
	72	+
	73	+
	74	+
	75	+
	76	+
	77	+
	78	+
	79	+
	80	+csv_to_sqlite2 <- function(csv_file, sqlite_file, table_name,
	81	+ delim = ",",
	82	+ pre_process_size = 1000, chunk_size = 50000,
	83	+ show_progress_bar = TRUE, ...) {
	84	+ con <- dbConnect(SQLite(), dbname = sqlite_file)
	85	+
	86	+ # read a first chunk of data to extract the colnames and types
	87	+ # to figure out the date and the datetime columns
	88	+ df <- read_delim(csv_file, delim = delim, n_max = pre_process_size, ...)
	89	+
	90	+
	91	+ date_cols <- df \|>
	92	+ select(where(is.Date)) \|>
	93	+ colnames()
	94	+
	95	+ datetime_cols <- df \|>
	96	+ select(where(is.POSIXt)) \|>
	97	+ colnames()
	98	+
	99	+ # write the first batch of lines to SQLITE table, converting dates to string
	100	+ # representation
	101	+ ## df <- df %>%
	102	+ ## mutate_at(.vars = date_cols, .funs = as.character.Date) %>%
	103	+ ## mutate_at(.vars = datetime_cols, .funs = as.character.POSIXt)
	104	+
	105	+
	106	+ df <- df \|>
	107	+ mutate(across( all_of(date_cols), \(x) as.character.Date(x))) \|>
	108	+ mutate(across(all_of(datetime_cols), \(x) as.character.POSIXt(x)))
	109	+
	110	+
	111	+
	112	+ dbWriteTable(con, table_name, df, overwrite = TRUE)
	113	+
	114	+ # readr chunk functionality
	115	+ read_delim_chunked(
	116	+ csv_file,
	117	+ callback = append_to_sqlite2(con = con, table_name = table_name,
	118	+ date_cols = date_cols,
	119	+ datetime_cols = datetime_cols),
	120	+ delim = delim,
	121	+ skip = pre_process_size, chunk_size = chunk_size,
	122	+ progress = show_progress_bar,
	123	+ col_names = colnames(df), ...)
	124	+ dbDisconnect(con)
	125	+}
	126	+
	127	+
	128	+
	129	+
	130	+
	131	+append_to_sqlite2 <- function(con, table_name,
	132	+ date_cols, datetime_cols) {
	133	+ #' @param x Data.frame we are reading from.
	134	+ function(x, pos) {
	135	+
	136	+ x <- as.data.frame(x)
	137	+ x <- x \|>
	138	+ mutate(across( all_of(date_cols), \(x) as.character.Date(x))) \|>
	139	+ mutate(across( all_of(datetime_cols), \(x) as.character.POSIXt(x)))
	140	+ # append data frame to table
	141	+ dbWriteTable(con, table_name, x, append = TRUE)
	142	+
	143	+ }
	144	+}
	145	+
	146	+
	147	+
	148	+
	149	+
	150	+
	151	+
	152	+
	153	+
	154	+
	155	+
	156	+################################################################
	157	+################################################################
	158	+################################################################
	159	+################################################################
	160	+################################################################
	161	+
	162	+
	163	+
	164	+
	165	+
	166	+csv_to_sqlite("scoreboard.csv.gz", "scoreboard.db", "mytable")
	167	+
	168	+csv_to_sqlite2("scoreboard.csv.gz", "scoreboard2.db", "mytable")
	169	+
	170	+
	171	+
	172	+
	173	+con <- dbConnect(drv = RSQLite::SQLite(), # give me a SQLite connection
	174	+ dbname = "scoreboard.db")
	175	+
	176	+con2 <- dbConnect(drv = RSQLite::SQLite(), # give me a SQLite connection
	177	+ dbname = "scoreboard2.db")
	178	+
	179	+
	180	+table_list <- dbListTables(con) # List me the tables at the connection
	181	+
	182	+print(table_list)
	183	+
	184	+
	185	+
	186	+fields <- dbListFields(con, # Reach into my connection and ...
	187	+ "mytable") # tell me what fields does the `bakers` table have?
	188	+
	189	+
	190	+print("the database fields are")
	191	+print(fields)
	192	+
	193	+my_glimpse <- tbl(con, "mytable")
	194	+
	195	+print(my_glimpse)
	196	+
	197	+
	198	+
	199	+my_glimpse2 <- tbl(con2, "mytable")
	200	+
	201	+print(my_glimpse2)
	202	+
	203	+
	204	+
	205	+
	206	+## df <- read_csv("scoreboard.csv.gz")
	207	+
	208	+
	209	+
	210	+
	211	+
	212	+print("So far so good")

myprojects-hg-reborn Fork

提交

标签

Frequently used words (click to add to your profile)

Commit MetaInfo

Log Message

更改概述

差异

myprojects-hg-reborn
Fork