-
Notifications
You must be signed in to change notification settings - Fork 22
Expand file tree
/
Copy pathdataParseFunctions.R
More file actions
86 lines (74 loc) · 3.11 KB
/
dataParseFunctions.R
File metadata and controls
86 lines (74 loc) · 3.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#' Parse table data
#'
#' @keywords internal
parse_bqr_query <- function(x){
converter <- list(
integer = as.integer,
float = as.double,
boolean = as.logical,
string = identity,
numeric = as.numeric,
timestamp = function(x) as.POSIXct(as.integer(x), origin = "1970-01-01", tz = "UTC"),
date = function(x) as.Date(x, format="%Y-%m-%d") #fix for #22 if using schema DATE
)
schema <- x$schema$fields
## ffs
ncol <- 0
col_name <- c()
for (i in seq_along(schema$name)){
if(tolower(schema[i,]$type) != "record"){
ncol <- ncol + 1
col_name <- c(col_name,schema[i,]$name)
}else{
ncol <- ncol + nrow(schema[i,]$fields[[1]])
col_name <- c(col_name,paste0(schema[i,]$name,".",paste0(schema[i,]$fields[[1]]$name)))
}
}
data_f <- as.data.frame(matrix(unlist(unlist(x$rows)),
ncol = ncol,
byrow = TRUE),
stringsAsFactors = FALSE)
types <- tolower(schema$type)
# /**
# * This function parse schema and convert a dataframe according to schema and types
# * Defined in converter variable
# * @param list schema the schema to use
# * @param list df the dataframe to apply the schema
# * @return list the converted dataframe
# */
converter_funcs <- function(schema,df){
data_f <- df
types <- tolower(schema$type) # Convert types to a lower case to match it against converter values.
# Loop through all our types which are order the same way as the df
for (k in seq_along(types)){
type <- types[k]
# Base case for the recursion. If the type is not a record type (column of column)
if(type != "record"){
# The convert the df at the same position as the type
# E.g type = string int string then df[1] will be string, df[2] int etc...
data_f[,k] <- converter[[type]](data_f[,k]) #this returns the function at the position type in converter variable
}else{
# If it's a RECORD type then.
# First, if we have a RECORD type named "C" and columns of "C" are "Bars1" of type [int] and "Bars2" of type [str]
# Then the df will be C.bars1,C.bars2 at position k to k+2
# Because records are stored by the privous lines of code in a flatten way
# [A,B,[C.Bars1,C.Bars2]] will be [A,B,C.Bars1,C.Bars2] where A and B are normal columns
# See line 26 col_name <- c(...
# So in this bit of code, we extract the subdataframe ranging from k to k+n-1 where n is the number of rows in the sub schema of the RECORD C
# and we apply the same function to it's subset from k to k+n-1
sub_schema <- schema[k,]$fields[[1]]
n <- nrow(sub_schema)
data_f[,k:(k+n-1)] <- converter_funcs(sub_schema,data_f[,k:(k+n-1)])
}
}
return(data_f)
}
# converter_funcs <- converter[types]
data_f <- converter_funcs(schema,data_f)
names(data_f) <- col_name
out <- data_f
out <- as.data.frame(out, stringsAsFactors = FALSE)
attr(out, "jobReference") <- x$jobReference
attr(out, "pageToken") <- x$pageToken
out
}