﻿ 优化：将数据帧拆分为数据帧列表,每行转换数据 - 代码日志

#### 优化：将数据帧拆分为数据帧列表,每行转换数据

``````randomRow <- function() {
# make a row with a single 1 and some NA's
sample(x=c(rep(0,9),1,NA,NA),size=12,replace=F)
}

# create a data frame with 12 variables and 1500 cases
makeDf <- function() {
data <- matrix(NA,ncol=12,nrow=1500)
for (i in 1:1500) {
data[i,] <- randomRow()
}
return(data)
}

mydata <- NULL

# combine 30 of these dataframes horizontally
for (i in 1:30) {
mydata <- cbind(mydata,makeDf())
}
mydata <- as.data.frame(mydata) # example data ready
``````

``````# Divide the dataset into a list with 30 dataframes, each with 12 variables
S1 <- lapply(1:30,function(i) {
Z <- rep(1:30,each=12) # define selection vector
mydata[Z==i]           # use selection vector to get groups of variables (x12)
})

recodeDf <- function(df) {
result <- as.numeric(apply(df,1,function(x) {
if (any(!is.na(df))) which(x == 1) else NA # return the position of "1" per row
}))                                          # the if/else check is for the real data
return(result)
}
# Combine individual position vectors into a dataframe
final.df <- as.data.frame(do.call(cbind,lapply(S1,recodeDf)))
``````

``````# Match is usually faster than which, because it only returns the first match
# (and therefore won't fail on multiple matches)
# It also neatly handles your *all NA* case
recodeDf2 <- function(df) apply(df,1,match,x=1)
# You can split your data.frame by column with split.default
# (Using split on data.frame will split-by-row)
S2<-split.default(mydata,rep(1:30,each=12))
final.df2<-lapply(S2,recodeDf2)
``````

``````library(parallel)
final.df2<-mclapply(S2,recodeDf2,mc.cores=numcores)
# Where numcores is your number of processors.
``````

``````nograpes2<-function(mydata) {
test<-function(df) {
l<-lapply(df,function(x) which(x==1))
lens<-lapply(l,length)
rep.int(seq.int(l),times=lens)[order(unlist(l))]
}
S2<-split.default(mydata,rep(1:30,each=12))
data.frame(lapply(S2,test))
}
``````