options(scipen=100) df<-df[complete.cases(df$debt) , ] n<-nrow(df) df<-df[complete.cases(df) , ]
pr <- xtabs(~df$V31+df$income)
pr <- as.matrix(pr) round(prop.table(pr,1)*100, 0)
1번 출력_PROJECT-1 & PROJECT-2(수정).pdf
devtools::install_github("ssbaj/aj412s")
df<-read.csv('자료1.csv', fileEncoding='cp949', encoding='UTF-8')
colnames(df)<-c('id','brand','conv','satprice','age','gender' )
df$gender2 <- df$gender
colnames(df)[7] <- c('gender3')
df<-df[ ,-c(7) ]
df$index <- rownames(df)
df$index <- as.numeric(df$index)
# dplyr : arrange, filter, %>%
library(dplyr)
df <- df %>% arrange(brand)
df[df=='']<-NA
df <- df %>% arrange(desc(brand))
df <- df %>% arrange( index )
df %>% filter( brand=='SY')
df %>% filter( brand=='SY' | gender==1 )
df %>% group_by(brand) %>% summarize(편의성평균=mean(conv, na.rm=T), n() )
library(aj412s)
df <- mkdum(df, 2)
unique(df$age)
df<-df[ , -c(8:15)]
df$dum1<-0
df$dum2<-0
df$dum3<-0
df$dum4<-0
for(i in 1:16){
if(i==4) {
print("number 4")
next
}
print(i)
}
n <- nrow(df)
for( i in 1:n){
if( is.na(df[i,5])) { next } # NA 처리
if(df[i,5]==1) { df[i, 8] <- 1 } # dum1
else if(df[i,5]==2) { df[i, 9] <- 1 } # dum2
else if(df[i,5]==3) { df[i, 10] <- 1 } # dum3
else if(df[i,5]==4) { df[i, 11] <- 1 } # dum4
}
df[1,5]<-NA
df$dum1<-0
df$dum2<-0
df$dum3<-0
df$dum4<-0
∎ ① 재코딩, ② 엑셀의 countif와 동일한 sum(), ③ 변수 type지정 ④ 교차테이블 xtabs(~ )
∎ 엑셀 데이터
df <- read_excel(file.choose() , skip=0, sheet=3, col_names=T)