OSDN > Developer > larry77 > Chamber > myprojects-hg-reborn > 提交

myprojects-hg-reborn
Fork

(Original repository, No fork origin)

提交

Commit MetaInfo

修订版	c6eb618c6013552a3d1b3042b11d80725af581aa (tree)
时间	2022-10-23 16:46:19
作者	Lorenzo Isella <lorenzo.isella@gmai...>
Commiter	Lorenzo Isella

Log Message

I now open the tsv file without reading it. It is no longer loaded into memory.

更改概述

modified: R-codes/create_tam_parquet.R (diff)

差异

diff -r 1bb702817bd1 -r c6eb618c6013 R-codes/create_tam_parquet.R

--- a/R-codes/create_tam_parquet.R Sun Oct 23 09:39:02 2022 +0200

+++ b/R-codes/create_tam_parquet.R Sun Oct 23 09:46:19 2022 +0200

		@@ -8,10 +8,45 @@
8	8	source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")
9	9
10	10
11		-## for mysterious reasons this does not work...
12		-## data <- open_dataset("export.tsv", format="tsv")
13	11
14		-data <- read_tsv_arrow("export.tsv",as_data_frame=F)
	12	+## data <- read_tsv_arrow("export.tsv",as_data_frame=F)
	13	+
	14	+data <- open_dataset("export.tsv",
	15	+ format = "tsv",
	16	+ skip_rows = 1,
	17	+ schema = schema(
	18	+ AID_MEASURE_ID = string(),
	19	+ DATE_CREATED = string(),
	20	+ DATE_GRANTED = string(),
	21	+ AA_PUBLISHED_DATE = string(),
	22	+ SERVER_REF = string(),
	23	+ AM_TITLE = string(),
	24	+ AM_TITLE_EN = string(),
	25	+ STATUS = string(),
	26	+ AM_PROC_TYPE_CD = string(),
	27	+ COFINANCE = string(),
	28	+ OBJECTIVE = string(),
	29	+ OTHER_OBJECTIVE_EN = string(),
	30	+ AID_INSTRUMENT = string(),
	31	+ OTHER_AID_INSTRUMENT_EN = string(),
	32	+ BENEFICIARY_NAME = string(),
	33	+ BENEFICIARY_NAME_ENGLISH = string(),
	34	+ BENEFICIARY_NATIONAL_ID = string(),
	35	+ BENEFICIARY_NAT_ID_TYPE_SD = string(),
	36	+ BENEFICIARY_TYPE_SD = string(),
	37	+ COUNTRY_SD = string(),
	38	+ REGION_SD = string(),
	39	+ SECTOR_SD = string(),
	40	+ GRANTED_AMOUNT_FROM_EUR = double(),
	41	+ NOMINAL_AMOUNT_EUR_FROM = double(),
	42	+ GRANT_RANGE = string(),
	43	+ GRANTING_AUTHORITY_NAME = string(),
	44	+ GRANTING_AUTHORITY_NAME_EN = string(),
	45	+ NUTS_CD = string(),
	46	+ GRANTING_AUTHORITY_COUNTRY = string()
	47	+ )
	48	+)
	49	+
15	50
16	51	write_dataset(
17	52	data,

		@@ -99,52 +134,6 @@
99	134
100	135
101	136
102		-
103		-
104		-
105		-
106		-
107		-## test <- df_new[1:10, ] \|>
108		-## collect()
109		-
110		-
111		-
112		-
113		-## ranges <- df_new \|>
114		-## select(granted_aid_absolute_eur,lower_bound, upper_bound) \|>
115		-## collect() \|>
116		-## mutate(across(everything(), ~as.numeric(.x))) \|>
117		-## mutate(estimated_value=(lower_bound+upper_bound)/2) \|>
118		-## pattern_to_na(0) \|>
119		-## pull(estimated_value)
120		-
121		-## df_new <- df_new \|>
122		-## mutate(estimated_value=ranges)
123		-
124		- ## mutate(lower_bound=as.numeric(lower_bound),
125		- ## upper_bound=as.numeric(upper_bound)) \|>
126		- ## mutate(estimated_value=(lower_bound+upper_bound)/2) \|>
127		- ## ## pattern_to_na(0) \|>
128		- ## mutate(granted_value_extended_eur = case_when(
129		- ## !is.na(granted_aid_absolute_eur) ~ granted_aid_absolute_eur,
130		- ## is.na(granted_aid_absolute_eur) & !is.na(estimated_value) ~estimated_value,
131		- ## is.na(granted_aid_absolute_eur) & is.na(estimated_value) ~ nominal_aid_absolute_eur)) \|>
132		-
133		- ## mutate(nominal_value_extended_eur=
134		- ## case_when(!is.na(nominal_aid_absolute_eur) ~ nominal_aid_absolute_eur,
135		- ## is.na(nominal_aid_absolute_eur)~granted_value_extended_eur
136		- ## )) \|>
137		- ## select(-c(lower_bound, upper_bound, estimated_value))
138		-## \|>
139		- ## mutate(is_covid_case=if_else(case_reference %in% covid_data$case_reference,
140		- ## "Yes", "No")) \|>
141		- ## mutate(granted_value_extended_eur=if_else(is_covid_case=="Yes",
142		- ## NA_real_,granted_value_extended_eur ))
143		-
144		-
145		-
146		-
147		-
148	137	write_dataset(
149	138	df_new,
150	139	format = "csv",

myprojects-hg-reborn Fork

提交

标签

Frequently used words (click to add to your profile)

Commit MetaInfo

Log Message

更改概述

差异

myprojects-hg-reborn
Fork