注意DWS层维度较多, 小时数据和天数据(2019-07-01/2019-07-01 10:00:00),季度数据和年数据(2019-Q3/2019),不同维度存在分区目录相同的情况。
如果使用的是Insert overwrite覆盖写入,天和小时数据存在于同一个分区中,会出现新数据互相覆盖的问题。
使用Insert Into比较安全。
分区内有旧数据的话,则要先通过sql删除旧数据:
-- 精确删除数据
--删除20190901的05点数据
delete from itcast_dws.visit_dws where yearinfo='2019' and monthinfo='07' and dayinfo='01' and hourinfo='05' and time_type=1;
--删除20190901天数据
delete from itcast_dws.visit_dws where yearinfo='2019' and monthinfo='09' and dayinfo='01' and time_type=2;
--删除201909月数据
delete from itcast_dws.visit_dws where yearinfo='2019' and monthinfo='09' and dayinfo='-1' and time_type=3;
--删除2019年3季度数据
delete from itcast_dws.visit_dws where yearinfo='2019' and quarterinfo='3' and time_type=4;
--删除2019年数据
delete from itcast_dws.visit_dws where yearinfo='2019' and time_type=4;
--删除分区下所有数据
--删除20190901天和小时数据(会把日期里面的小时数据一起删除掉,月和年尽量使用delete语句)
alter table itcast_dws.visit_dws drop partition (yearinfo='2019', monthinfo='09', dayinfo='01');
HIVE_HOME = /usr/bin/hive
if [[ $1 == "" ]];then
TD_DATE = `date -d '1 days ago' "+%Y-%m-%d"`
else
TD_DATE = $1
fi
if [[ $TD_DATE =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]];then
echo "TD_DATE is $TD_DATE"
else
echo "TD_DATE is not correct"
exit 1
fi
TD_YEAR = $(date -d "$TD_DATE" +%Y)
TD_MONTH = $(date -d "$TD_DATE" +%m)
TD_DAY = $(date -d "$TD_DATE" +%d)
month_for_quarter = `date --date="$TD_DATE" +%-m`
TD_QUARTER = $((($month_for_quarter-1)/3+1))
${HIVE_HOME} -S -e "
--动态分区配置
set hive.exec.dynamic.partition=true;
set hive.exec.max.dynamic.partitions=2000;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=10000;
set hive.exec.max.dynamic.partitions=100000;
set hive.exec.max.created.files=150000;
--hive压缩
set hive.exec.compress.intermediate=true;
set hive.exec.compress.output=true;
--写入时压缩生效
set hive.exec.orc.compression.strategy=COMPRESSION;
---------地区分组
--小时
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
area,
'-1' as seo_source,
'-1' as origin_channel,
hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo,'-',dayinfo,' ',hourinfo) as time_str,
'-1' as from_url,
'1' as grouptype,
'1' as time_type,
yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH' and dayinfo='$TD_DAY'
group by area, yearinfo, quarterinfo, monthinfo, dayinfo, hourinfo;
--天
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
area,
'-1' as seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo,'-',dayinfo) as time_str,
'-1' as from_url,
'1' as grouptype,
'2' as time_type,
yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH' and dayinfo='$TD_DAY'
group by area, yearinfo, quarterinfo, monthinfo, dayinfo;
--月
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
area,
'-1' as seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo) as time_str,
'-1' as from_url,
'1' as grouptype,
'3' as time_type,
yearinfo, monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH'
group by area, yearinfo, quarterinfo, monthinfo;
--季度
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
area,
'-1' as seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-Q',quarterinfo) as time_str,
'-1' as from_url,
'1' as grouptype,
'4' as time_type,
yearinfo,
'-1' as monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and quarterinfo='$TD_QUARTER'
group by area, yearinfo, quarterinfo;
--年
INSERT overwrite TABLE itcast_dws.visit_dws PARTITION (yearinfo,monthinfo,dayinfo)
select
COUNT(DISTINCT wce.sid) as sid_total,
COUNT(DISTINCT wce.session_id) as sessionid_total,
COUNT(DISTINCT wce.ip) as ip_total,
wce.area as area,
'-1' as seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
'-1' as quarterinfo,
wce.yearinfo as time_str,
'-1' as from_url,
'1' as groupType,
'5' as time_type,
wce.yearinfo as yearinfo,
'-1' as monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd wce
where yearinfo='$TD_YEAR'
group by wce.area,wce.yearinfo;
---------搜索来源分组
--小时
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
seo_source,
'-1' as origin_channel,
hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo,'-',dayinfo,' ',hourinfo) as time_str,
'-1' as from_url,
'2' as grouptype,
'1' as time_type,
yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH' and dayinfo='$TD_DAY'
group by seo_source, yearinfo, quarterinfo, monthinfo, dayinfo, hourinfo;
--天
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo,'-',dayinfo) as time_str,
'-1' as from_url,
'2' as grouptype,
'2' as time_type,
yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH' and dayinfo='$TD_DAY'
group by seo_source, yearinfo, quarterinfo, monthinfo, dayinfo;
--月
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo) as time_str,
'-1' as from_url,
'2' as grouptype,
'3' as time_type,
yearinfo, monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH'
group by seo_source, yearinfo, quarterinfo, monthinfo;
--季度
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-Q',quarterinfo) as time_str,
'-1' as from_url,
'2' as grouptype,
'4' as time_type,
yearinfo,
'-1' as monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and quarterinfo='$TD_QUARTER'
group by seo_source, yearinfo, quarterinfo;
--年
INSERT overwrite TABLE itcast_dws.visit_dws PARTITION (yearinfo,monthinfo,dayinfo)
select
COUNT(DISTINCT wce.sid) as sid_total,
COUNT(DISTINCT wce.session_id) as sessionid_total,
COUNT(DISTINCT wce.ip) as ip_total,
'-1' as area,
seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
'-1' as quarterinfo,
wce.yearinfo as time_str,
'-1' as from_url,
'2' as groupType,
'5' as time_type,
wce.yearinfo as yearinfo,
'-1' as monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd wce
where yearinfo='$TD_YEAR'
group by wce.seo_source,wce.yearinfo;
---------来源渠道分组
--小时
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
'-1' as seo_source,
origin_channel,
hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo,'-',dayinfo,' ',hourinfo) as time_str,
'-1' as from_url,
'3' as grouptype,
'1' as time_type,
yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH' and dayinfo='$TD_DAY'
group by origin_channel, yearinfo, quarterinfo, monthinfo, dayinfo, hourinfo;
--天
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
'-1' as seo_source,
origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo,'-',dayinfo) as time_str,
'-1' as from_url,
'3' as grouptype,
'2' as time_type,
yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH' and dayinfo='$TD_DAY'
group by origin_channel, yearinfo, quarterinfo, monthinfo, dayinfo;
--月
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
'-1' as seo_source,
origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo) as time_str,
'-1' as from_url,
'3' as grouptype,
'3' as time_type,
yearinfo, monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH'
group by origin_channel, yearinfo, quarterinfo, monthinfo;
--季度
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
'-1' as seo_source,
origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-Q',quarterinfo) as time_str,
'-1' as from_url,
'3' as grouptype,
'4' as time_type,
yearinfo,
'-1' as monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and quarterinfo='$TD_QUARTER'
group by origin_channel, yearinfo, quarterinfo;
--年
INSERT overwrite TABLE itcast_dws.visit_dws PARTITION (yearinfo,monthinfo,dayinfo)
select
COUNT(DISTINCT wce.sid) as sid_total,
COUNT(DISTINCT wce.session_id) as sessionid_total,
COUNT(DISTINCT wce.ip) as ip_total,
'-1' as area,
'-1' as seo_source,
origin_channel,
'-1' as hourinfo,
'-1' as quarterinfo,
wce.yearinfo as time_str,
'-1' as from_url,
'3' as groupType,
'5' as time_type,
wce.yearinfo as yearinfo,
'-1' as monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd wce
where yearinfo='$TD_YEAR'
group by wce.origin_channel,wce.yearinfo;
---------会话来源页面分组
--小时
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
'-1' as seo_source,
'-1' as origin_channel,
hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo,'-',dayinfo,' ',hourinfo) as time_str,
from_url,
'4' as grouptype,
'1' as time_type,
yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH' and dayinfo='$TD_DAY'
group by from_url, yearinfo, quarterinfo, monthinfo, dayinfo, hourinfo;
--天
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
'-1' as seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo,'-',dayinfo) as time_str,
from_url,
'4' as grouptype,
'2' as time_type,
yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH' and dayinfo='$TD_DAY'
group by from_url, yearinfo, quarterinfo, monthinfo, dayinfo;
--月
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
'-1' as seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo) as time_str,
from_url,
'4' as grouptype,
'3' as time_type,
yearinfo, monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH'
group by from_url, yearinfo, quarterinfo, monthinfo;
--季度
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
'-1' as seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-Q',quarterinfo) as time_str,
from_url,
'4' as grouptype,
'4' as time_type,
yearinfo,
'-1' as monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and quarterinfo='$TD_QUARTER'
group by from_url, yearinfo, quarterinfo;
--年
INSERT overwrite TABLE itcast_dws.visit_dws PARTITION (yearinfo,monthinfo,dayinfo)
select
COUNT(DISTINCT wce.sid) as sid_total,
COUNT(DISTINCT wce.session_id) as sessionid_total,
COUNT(DISTINCT wce.ip) as ip_total,
'-1' as area,
'-1' as seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
'-1' as quarterinfo,
wce.yearinfo as time_str,
from_url,
'4' as groupType,
'5' as time_type,
wce.yearinfo as yearinfo,
'-1' as monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd wce
where yearinfo='$TD_YEAR'
group by wce.from_url,wce.yearinfo;
---------总访问量分组
--小时
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
'-1' as seo_source,
'-1' as origin_channel,
hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo,'-',dayinfo,' ',hourinfo) as time_str,
'-1' as from_url,
'5' as grouptype,
'1' as time_type,
yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH' and dayinfo='$TD_DAY'
group by yearinfo, quarterinfo, monthinfo, dayinfo, hourinfo;
--天
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
'-1' as seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo,'-',dayinfo) as time_str,
'-1' as from_url,
'5' as grouptype,
'2' as time_type,
yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH' and dayinfo='$TD_DAY'
group by yearinfo, quarterinfo, monthinfo, dayinfo;
--月
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
'-1' as seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-',monthinfo) as time_str,
'-1' as from_url,
'5' as grouptype,
'3' as time_type,
yearinfo, monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH'
group by yearinfo, quarterinfo, monthinfo;
--季度
insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid) as sid_total,
count(distinct session_id) as session_total,
count(distinct ip) as ip_total,
'-1' as area,
'-1' as seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat(yearinfo,'-Q',quarterinfo) as time_str,
'-1' as from_url,
'5' as grouptype,
'4' as time_type,
yearinfo,
'-1' as monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where yearinfo='$TD_YEAR' and quarterinfo='$TD_QUARTER'
group by yearinfo, quarterinfo;
--年
INSERT overwrite TABLE itcast_dws.visit_dws PARTITION (yearinfo,monthinfo,dayinfo)
select
COUNT(DISTINCT wce.sid) as sid_total,
COUNT(DISTINCT wce.session_id) as sessionid_total,
COUNT(DISTINCT wce.ip) as ip_total,
'-1' as area,
'-1' as seo_source,
'-1' as origin_channel,
'-1' as hourinfo,
'-1' as quarterinfo,
wce.yearinfo as time_str,
'-1' as from_url,
'5' as groupType,
'5' as time_type,
wce.yearinfo as yearinfo,
'-1' as monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd wce
where yearinfo='$TD_YEAR'
group by wce.yearinfo;
"
Copy
SQOOP_HOME = /usr/bin/sqoop
if [[ $1 == "" ]];then
TD_DATE = `date -d '1 days ago' "+%Y-%m-%d"`
else
TD_DATE = $1
fi
if [[ $TD_DATE =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]];then
echo "TD_DATE is $TD_DATE"
else
echo "TD_DATE is not correct"
exit 1
fi
TD_YEAR = $(date -d "$TD_DATE" +%Y)
mysql -uroot -p'84^j1TZ-+2b9A@S7Hs' -h172.16.100.42 -P4000 -e "delete from scrm_bi.itcast_visit where yearinfo='$TD_YEAR';"
wait
$SQOOP_HOME export \
--connect "jdbc:mysql://172.16.100.42:4000/scrm_bi?useUnicode=true&characterEncoding=utf-8" \
--username root \
--password 84^j1TZ-+2b9A@S7Hs \
--table visit_dws \
--hcatalog-database itcast_dws \
--hcatalog-table visit_dws \
--hcatalog-partition-keys yearinfo \
--hcatalog-partition-values $TD_YEAR \
-m 1
HIVE_HOME = /usr/bin/hive
if [[ $1 == "" ]];then
TD_DATE = `date -d '1 days ago' "+%Y-%m-%d"`
else
TD_DATE = $1
fi
if [[ $TD_DATE =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]];then
echo "TD_DATE is $TD_DATE"
else
echo "TD_DATE is not correct"
exit 1
fi
TD_YEAR = $(date -d "$TD_DATE" +%Y)
TD_MONTH = $(date -d "$TD_DATE" +%m)
TD_DAY = $(date -d "$TD_DATE" +%d)
TD_QUARTER = $((($TD_MONTH-1)/3+1))
$HIVE_HOME -S -e "
--动态分区配置
set hive.exec.dynamic.partition=true;
set hive.exec.max.dynamic.partitions=2000;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=10000;
set hive.exec.max.dynamic.partitions=100000;
set hive.exec.max.created.files=150000;
--hive压缩
set hive.exec.compress.intermediate=true;
set hive.exec.compress.output=true;
--写入时压缩生效
set hive.exec.orc.compression.strategy=COMPRESSION;
-------地区分组
--天
insert into itcast_dws.consult_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid),
count(distinct session_id),
count(distinct ip),
area,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat_ws('-',yearinfo,monthinfo,dayinfo),
'1',
'2',
yearinfo,monthinfo,dayinfo
from itcast_dwd.visit_consult_dwd
where msg_count >= 1 and yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH' and dayinfo = '$TD_DAY'
group by area, yearinfo, quarterinfo, monthinfo, dayinfo;
--月
insert into itcast_dws.consult_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid),
count(distinct session_id),
count(distinct ip),
area,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat_ws('-',yearinfo,monthinfo),
'1',
'3',
yearinfo,monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where msg_count >= 1 and yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH'
group by area, yearinfo, quarterinfo, monthinfo;
--季度
insert into itcast_dws.consult_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid),
count(distinct session_id),
count(distinct ip),
area,
'-1' as origin_channel,
'-1' as hourinfo,
quarterinfo,
concat_ws('-Q',yearinfo,quarterinfo),
'1',
'4',
yearinfo,'-1' as monthinfo,'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where msg_count >= 1 and yearinfo='$TD_YEAR' and quarterinfo='$TD_QUARTER'
group by area, yearinfo, quarterinfo;
--年
insert into itcast_dws.consult_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid),
count(distinct session_id),
count(distinct ip),
area,
'-1' as origin_channel,
'-1' as hourinfo,
'-1' as quarterinfo,
yearinfo,
'1',
'5',
yearinfo,'-1' as monthinfo,'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where msg_count >= 1 and yearinfo='$TD_YEAR'
group by area, yearinfo;
------来源渠道分组
--天
insert into itcast_dws.consult_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid),
count(distinct session_id),
count(distinct ip),
'-1' as area,
origin_channel,
'-1' as hourinfo,
quarterinfo,
concat_ws('-',yearinfo,monthinfo,dayinfo),
'2',
'2',
yearinfo,monthinfo,dayinfo
from itcast_dwd.visit_consult_dwd
where msg_count >= 1 and yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH' and dayinfo = '$TD_DAY'
group by origin_channel, yearinfo, quarterinfo, monthinfo, dayinfo;
--月
insert into itcast_dws.consult_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid),
count(distinct session_id),
count(distinct ip),
'-1' as area,
origin_channel,
'-1' as hourinfo,
quarterinfo,
concat_ws('-',yearinfo,monthinfo),
'2',
'3',
yearinfo,monthinfo,
'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where msg_count >= 1 and yearinfo='$TD_YEAR' and monthinfo='$TD_MONTH'
group by origin_channel, yearinfo, quarterinfo, monthinfo;
--季度
insert into itcast_dws.consult_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid),
count(distinct session_id),
count(distinct ip),
'-1' as area,
origin_channel,
'-1' as hourinfo,
quarterinfo,
concat_ws('-Q',yearinfo,quarterinfo),
'2',
'4',
yearinfo,'-1' as monthinfo,'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where msg_count >= 1 and yearinfo='$TD_YEAR' and quarterinfo='$TD_QUARTER'
group by origin_channel, yearinfo, quarterinfo;
--年
insert into itcast_dws.consult_dws partition (yearinfo, monthinfo, dayinfo)
select
count(distinct sid),
count(distinct session_id),
count(distinct ip),
'-1' as area,
origin_channel,
'-1' as hourinfo,
'-1' as quarterinfo,
yearinfo,
'2',
'5',
yearinfo,'-1' as monthinfo,'-1' as dayinfo
from itcast_dwd.visit_consult_dwd
where msg_count >= 1 and yearinfo='$TD_YEAR'
group by origin_channel, yearinfo;
"
Copy