# Need to have R3.4.3 and above installed # IMF # For any question contact vguerreiro@imf.org or RPPI@imf.org ################################# WEIGHTS ##################################### # Read before - Box 4 - Prepare your data for processing in ## Chapter B. Assessing Source Data Quality # Weights are calculated with one year of transaction data. # Install packages packages1 <- c("magrittr","dplyr","gridExtra","rio","EnvStats","micEconIndex", "readr","dummies","broom","openxlsx","XLConnect","xlsx") pkgs2inst <- !(packages1 %in% (.packages(all.available=T))) if (any(pkgs2inst)) install.packages(packages1[pkgs2inst]) lapply(packages1, require, character.only=T) rm(packages1,pkgs2inst) rm(list = ls()) #### !!! COMPILER NEEDS TO UPDATE THE FOLLOWING:------------------------------- # !!! Define the periods WeightsYear <- "2008" Period1 <- "1Q2008" Period2 <- "2Q2008" Period3 <- "3Q2008" Period4 <- "4Q2008" # !!! Insert address of the folder where the data is Data_folder <- "C:/Users/vguerreiro/My Local Documents/CSO_Synthetic_data/data set/" # !!! Insert address of the folder where the outputs should be stored Output_folder <- "C:/Users/vguerreiro/My Local Documents/CSO_Synthetic_data/data set/" setwd(Output_folder) # !!! Define the Strata as c("a","b","c",....."z") Strat1 <- c("New") Strat2 <- c("Existing") # !!! Define the variable to be used for stratification (the "" are needed) strata.tag.name<-"Status" # !!! Choose the name of your strata (the "" are needed) strata.given.name <- "Status" #### !!! Complete with your number of strata strata <- list(Strat1,Strat2) # !!! Define the names of your strata (the "" are needed) Strata.names <- c("Total", "New","Existing") #-----------------------------------------------------------------------------# Period <- c(Period1,Period2,Period3,Period4) num.strata <- length(strata) max.num.strata <- max(as.data.frame(lapply(strata, function(x) length(x)))) # Importe data files DF1 <- read_csv(paste(Data_folder, Period1,".csv",sep = "")) DF2 <- read_csv(paste(Data_folder, Period2,".csv",sep = "")) DF3 <- read_csv(paste(Data_folder, Period3,".csv",sep = "")) DF4 <- read_csv(paste(Data_folder, Period4,".csv",sep = "")) BaseCalcul<- as.data.frame(rbind(DF1,DF2,DF3,DF4)) # Create stratification # Create a list with the nuumber of elements corresponding to the number of stata strata.num.list <- vector("list",num.strata) # Create another list with the max number of entries in all strata stata.max.num.list <- vector("list",max.num.strata) tag.ID <- which(names(BaseCalcul) %in% strata.tag.name) for (j in 1:num.strata) { # iterate over all strata for (i in 1:max.num.strata) { # iterate over the elements of each strata st <- BaseCalcul[(BaseCalcul[,tag.ID] == strata[[j]][i]),] stata.max.num.list[[i]] <- st strata.num.list[[j]][[i]] <- stata.max.num.list[[i]] } } # This creates a list corresponding to the number of stata BaseCalcul_Stratum <- vector("list",num.strata) sum.statum.price <- vector("list",num.strata) w.statum.price <- vector("list",num.strata+1) ### Weights calculation sum_price <- sum(BaseCalcul$Price) w.statum.price[[1]] <- sum_price / sum_price for (i in 1:num.strata) { BaseCalcul_Stratum[[i]] <- bind_rows(strata.num.list[[i]]) sum.statum.price[[i]] <- sum(BaseCalcul_Stratum[[i]]$Price) / sum_price w.statum.price[[i+1]] <- sum.statum.price[[i]] } sum(BaseCalcul_Stratum[[1]]$Price)+sum(BaseCalcul_Stratum[[2]]$Price) weights <- as.data.frame(w.statum.price) weights <- data.frame(matrix(unlist(w.statum.price), nrow= num.strata+1, byrow=T)) colnames(weights) <- "Weights" rownames(weights) <- Strata.names weights # Export result to csv filename <- paste("new", WeightsYear, ".csv",sep="") write.csv(new,file = filename)