• R/O
  • SSH

提交

标签
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

修订版c6eb618c6013552a3d1b3042b11d80725af581aa (tree)
时间2022-10-23 16:46:19
作者Lorenzo Isella <lorenzo.isella@gmai...>
CommiterLorenzo Isella

Log Message

I now open the tsv file without reading it. It is no longer loaded into memory.

更改概述

差异

diff -r 1bb702817bd1 -r c6eb618c6013 R-codes/create_tam_parquet.R
--- a/R-codes/create_tam_parquet.R Sun Oct 23 09:39:02 2022 +0200
+++ b/R-codes/create_tam_parquet.R Sun Oct 23 09:46:19 2022 +0200
@@ -8,10 +8,45 @@
88 source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")
99
1010
11-## for mysterious reasons this does not work...
12-## data <- open_dataset("export.tsv", format="tsv")
1311
14-data <- read_tsv_arrow("export.tsv",as_data_frame=F)
12+## data <- read_tsv_arrow("export.tsv",as_data_frame=F)
13+
14+data <- open_dataset("export.tsv",
15+ format = "tsv",
16+ skip_rows = 1,
17+ schema = schema(
18+ AID_MEASURE_ID = string(),
19+ DATE_CREATED = string(),
20+ DATE_GRANTED = string(),
21+ AA_PUBLISHED_DATE = string(),
22+ SERVER_REF = string(),
23+ AM_TITLE = string(),
24+ AM_TITLE_EN = string(),
25+ STATUS = string(),
26+ AM_PROC_TYPE_CD = string(),
27+ COFINANCE = string(),
28+ OBJECTIVE = string(),
29+ OTHER_OBJECTIVE_EN = string(),
30+ AID_INSTRUMENT = string(),
31+ OTHER_AID_INSTRUMENT_EN = string(),
32+ BENEFICIARY_NAME = string(),
33+ BENEFICIARY_NAME_ENGLISH = string(),
34+ BENEFICIARY_NATIONAL_ID = string(),
35+ BENEFICIARY_NAT_ID_TYPE_SD = string(),
36+ BENEFICIARY_TYPE_SD = string(),
37+ COUNTRY_SD = string(),
38+ REGION_SD = string(),
39+ SECTOR_SD = string(),
40+ GRANTED_AMOUNT_FROM_EUR = double(),
41+ NOMINAL_AMOUNT_EUR_FROM = double(),
42+ GRANT_RANGE = string(),
43+ GRANTING_AUTHORITY_NAME = string(),
44+ GRANTING_AUTHORITY_NAME_EN = string(),
45+ NUTS_CD = string(),
46+ GRANTING_AUTHORITY_COUNTRY = string()
47+ )
48+)
49+
1550
1651 write_dataset(
1752 data,
@@ -99,52 +134,6 @@
99134
100135
101136
102-
103-
104-
105-
106-
107-## test <- df_new[1:10, ] |>
108-## collect()
109-
110-
111-
112-
113-## ranges <- df_new |>
114-## select(granted_aid_absolute_eur,lower_bound, upper_bound) |>
115-## collect() |>
116-## mutate(across(everything(), ~as.numeric(.x))) |>
117-## mutate(estimated_value=(lower_bound+upper_bound)/2) |>
118-## pattern_to_na(0) |>
119-## pull(estimated_value)
120-
121-## df_new <- df_new |>
122-## mutate(estimated_value=ranges)
123-
124- ## mutate(lower_bound=as.numeric(lower_bound),
125- ## upper_bound=as.numeric(upper_bound)) |>
126- ## mutate(estimated_value=(lower_bound+upper_bound)/2) |>
127- ## ## pattern_to_na(0) |>
128- ## mutate(granted_value_extended_eur = case_when(
129- ## !is.na(granted_aid_absolute_eur) ~ granted_aid_absolute_eur,
130- ## is.na(granted_aid_absolute_eur) & !is.na(estimated_value) ~estimated_value,
131- ## is.na(granted_aid_absolute_eur) & is.na(estimated_value) ~ nominal_aid_absolute_eur)) |>
132-
133- ## mutate(nominal_value_extended_eur=
134- ## case_when(!is.na(nominal_aid_absolute_eur) ~ nominal_aid_absolute_eur,
135- ## is.na(nominal_aid_absolute_eur)~granted_value_extended_eur
136- ## )) |>
137- ## select(-c(lower_bound, upper_bound, estimated_value))
138-## |>
139- ## mutate(is_covid_case=if_else(case_reference %in% covid_data$case_reference,
140- ## "Yes", "No")) |>
141- ## mutate(granted_value_extended_eur=if_else(is_covid_case=="Yes",
142- ## NA_real_,granted_value_extended_eur ))
143-
144-
145-
146-
147-
148137 write_dataset(
149138 df_new,
150139 format = "csv",