R-Automation-Functions/02 Combine csv files into master one.R at main · Pablo-source/R-Automation-Functions · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# FUNCTION TO EASY LOADING DATA FROM SEVERAL .CSV FILES                                     ##
# 27/05/2022

# CREATE DUMMY DATA from PENGUINS DATASET #

# Function: write_csv; Package: readr                                          #
# install.packages("tidyverse",dependencies = TRUE)

# Use pacman package to load several libraries at once

# install.packages("pacman")
# Load packages when required
if(!require("readr")) install.packages("readr")
if(!require("here")) install.packages("here")
if(!require("dplyr")) install.packages("dplyr")
if(!require("ggplot2")) install.packages("ggplot2")
if(!require("purrr")) install.packages("purrr")


pacman::p_load(readr,here,dplyr,ggplot2,purrr)

#library(readr)
#library(here)
#library(dplyr)
#library(ggplot2)
#library(purrr)

# 1 Create directory to save data set backup from readr package
if(!dir.exists("data")){dir.create("data")}
write_csv(palmerpenguins::penguins,here("data","penguins.csv"))
list.files("data/",".csv")

Mypenguins <- palmerpenguins::penguins
nrow(Mypenguins)

# 2 Split penguin main dataset into three files
#Indexing data [rows,cols]
# Slice the first 100 rows into a new dataset
nrow(Mypenguins)
Peng1 <-Mypenguins[1:100,]
Peng2 <-Mypenguins[101:201,]
Peng3 <-Mypenguins[202:344,]

if(!dir.exists(".data/SINGLEF")){dir.create("data/SINGLEF")}

write_csv(Peng1,here("data/SINGLEF","penguins1.csv"))
write_csv(Peng2,here("data/SINGLEF","penguins2.csv"))
write_csv(Peng3,here("data/SINGLEF","penguins3.csv"))

# 01 FINAL SCRIPT TO READ AND COMBINE ALL FILES IN A DIRECTORY

# Using the PURR PACKGE (FOR THE map_df() function)

files_to_comb <- list.files(path = "data/SINGLEF/",
                            pattern="*.csv",
                            full.names = T)
files_to_comb

combined_DATA <- list.files(path = "data/SINGLEF/",
                            pattern="*.csv",
                            full.names = T) %>%
                            map_df(~read_csv(.))

# This would be the content of the whole function
# 1. List all files in any given directory
# (Run this first of step 2)
ALLFIES <-list.files("data/SINGLEF/",".csv")
ALLFIES

NFILES <-length(list.files("data/SINGLEF/",".csv"))
NFILES

# 2 Load individual files on your workspace
# myfile_Name <-c("penguins1.csv","penguins2.csv","penguins3.csv")
for(i in 1:NFILES) {
  assign(ALLFIES[i],
     read_csv(paste0("data/SINGLEF/",ALLFIES[i]))
  )
}

rm(combined_DATA.csv)

# 3. Then we combine them using list.files and map_df functions
combined_DATA <- list.files(path = "data/SINGLEF/",
                            pattern="*.csv",
                            full.names = T) %>%
                            map_df(~read_csv(.))

# 4 Finally we write out combined file as a new .csv file
write_csv(combined_DATA,here("data/SINGLEF","combined_DATA.csv"))

# JUST FOR TESTING PURPOSES. (REMOVE APPENDEN FILE)
file.remove('data/SINGLEF/combined_DATA.csv')