본문 바로가기
R

[R 프로그래밍] SQL 데이터를 통한 R프로그래밍 실습

by 퍼포먼스마케팅코더 2017. 2. 9.
반응형

[SQL] 데이터 수집용

-- DAU

select     cast(logtime as date) 'date', 'IK' AS 'app_name',  userno 'user_id' 

from     dbo.LoginLog with (nolock)

where

    cast(logtime as date) between '2016-01-01' and '2016-12-31'

and

logtype=0

group by

cast(logtime as date), userno


-- DPU

select cast(logtime as date) 'date', 'IK' AS 'app_name', userno 'user_id',  sum(case when currency='USD' then convert(float, itemprice) * 1100 else convert(float, itemprice) end) 'payment'

from dbo.PurchaseLog with (nolock)

where

cast(logtime as date) between '2016-01-01' and '2016-12-31'

group by

cast(logtime as date), userno


-- Install

select 

install_date,  'IK' AS 'app_name',  UserNo 'user_id' 

from 

(select 

a.UserNo, cast(min(a.LogTime) as date) 'install_date' 

from 

dbo.loginlog a with(nolock)

where

a.LogTime <= '2017-02-08 23:59:59'

group by 

a.UserNo

having

cast(min(a.LogTime) as date) between  '2016-01-01' and '2016-12-31'

) aa

group by

install_date, UserNo

order by

install_date



[R 프로그래밍]

# CSV 파일 읽기

dau <- read.csv("IK-dau.csv", header = T, stringsAsFactors = F)

head(dau)

dpu <- read.csv("dpu.csv", header = T, stringsAsFactors = F)

head(dpu)

install <- read.csv("install.csv", header = T, stringsAsFactors= F)

head(install)


#cf) txt 파일 읽기


dau <- read.table("dau.txt", header = T, stringsAsFactors = F) #txt파일 읽기

names(dau) <- c("log_date", "app_name" , "user_id")  #칼럼이름 변경



# DAU 데이터에 Install 데이터 결합

dau.install <- merge(dau, install, by = c("user_id", "app_name"), all.x=T)

head(dau.install)


# 위 데이터에 다시 PU 데이터를 결합


dau.install.payment <- merge(dau.install, dpu, by = c("log_date",

"app_name", "user_id"), all.x = T)

head(dau.install.payment)


head(na.omit(dau.install.payment))



# 비과금 유저의 과금액에 0을 넣기


dau.install.payment$payment[is.na(dau.install.payment$payment)] <- 0

head(dau.install.payment)


# 인스톨 NA 유저에게 "2013-10-01" 데이터를 넣기


dau.install.payment$install_date[is.na(dau.install.payment$install_date)] <- "2013-10-01"

head(dau.install.payment)


# 월차집계


#월 항목 추가 


dau.install.payment$log_month <-substr(dau.install.payment$log_date, 1, 7)

dau.install.payment$install_month <- substr(dau.install.payment$install_date, 1, 7)

library("plyr")

mau.payment <- ddply(dau.install.payment,

.(log_month, user_id, install_month),

summarize,

payment = sum(payment)

)

head(mau.payment)


# 신규, 기존 구분항목 추가


# 신규/기존 유저 식별


mau.payment$user.type <- ifelse(mau.payment$install_month == mau.payment$log_month,

"install", "existing")


mau.payment.summary <- ddply(mau.payment,

.(log_month, user.type), 

summarize, 

total.payment = sum(payment)

)


head(mau.payment) 

head(mau.payment.summary)


#그래프로 데이터 시각화

library("ggplot2")

library("scales")

ggplot(mau.payment.summary, aes(x = log_month, y = total.payment,

fill = user.type)) + geom_bar(stat="identity") + scale_y_continuous(label = comma)


#신규유저 그래프 데이터 시각화

ggplot(mau.payment[mau.payment$payment > 0 & mau.payment$user.type == "install", ], 

aes(x = payment, fill = log_month)) + geom_histogram(position = "dodge", binwidth = 20000) + scale_x_continuous(label = comma)


#데이터 시각화 보는 방법 고민하기

반응형

댓글