Rev. | bc982baf214e53cbbc989ae9fec584aa63b6a185 |
---|---|
大小 | 391 字节 |
时间 | 2024-02-22 23:33:30 |
作者 | Lorenzo Isella |
Log Message | Simple code to extract a sample from a parquet file without opening it. |
rm(list=ls())
library(tidyverse)
library(arrow)
library(openxlsx)
source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")
## df <- open_dataset()
df_tam <- open_dataset("./data_output/part-0.parquet")
nn <- df_tam |>
nrow()
set.seed(1234)
ss <- sample(1:nn, 3000)
df_sample <- df_tam[ss, ] |>
collect()
save_excel(df_sample, "tam_sample.xlsx")
print("So far so good")