-- 建维度表-- 服务网点维度表create external tableifnotexists one_make_dws.dim_srv_station(
id string comment'服务网点id', name string comment'服务网点名称',code string comment'网点编号',province_id string comment'省份id',province string comment'省份名称',city_id string comment'城市id',city string comment'城市',county_id string comment'县城id',county string comment'县城',status string comment'服务网点状态',status_name string comment'状态中文名',org_id string comment'所属组织机构id',org_name string comment'所属组件机构名称')comment'服务网点维度表'
partitioned by(dt string)
stored as orc
location '/test/dws/one_make/dim_srv_station';-- 加载数据insert overwrite table one_make_dws.dim_srv_station partition(dt='20210101')select
station.id
, station.name
, station.code
, province.id as province_id
, province.areaname as province
, city.id as city_id
, city.areaname as city
, county.id as county_id
, county.areaname as county
, station.statusasstatus, dict_e.dictname as status_name
, station.org_id as org_id
, station.org_name as org_name
from one_make_dwd.ciss_base_servicestation station
-- 关联省份RANK为1leftjoin one_make_dwd.ciss_base_areas province on station.dt ='20210101'and station.province = province.id and province.rank =1-- 关联城市RANK为2leftjoin one_make_dwd.ciss_base_areas city on station.city = city.id and city.rank =2-- 关联城市RANK为3leftjoin one_make_dwd.ciss_base_areas county on station.region = county.id and county.rank =3-- 关联字典父表(dict_t)crossjoin one_make_dwd.eos_dict_type dict_t on dict_t.dt ='20210101'and dict_t.dicttypename ='服务网点使用状态'-- 关联字典子表(dict_e)leftjoin one_make_dwd.eos_dict_entry dict_e on dict_e.dt ='20210101'and dict_t.dicttypeid = dict_e.dicttypeid and station.status= dict_e.dictid;-- cross join:笛卡尔积,join时候不给关联条件-select*from A,B
-select*from A join B
-crossjoin
油站维度构建
-- 建维度表**-- 创建油站维度表create external tableifnotexists one_make_dws.dim_oilstation(
id string comment'油站ID', name string comment'油站名称', code string comment'油站编码', customer_id string comment'客户ID', customer_name string comment'客户名称', province_id intcomment'省份id', province_name string comment'省份名称', city_id intcomment'城市id', city_name string comment'城市名称', county_id intcomment'县城ID', county_name string comment'县城名称', area_id intcomment'区域id', area_name string comment'区域名称', customer_classify_id string comment'客户分类ID', customer_classify_name string comment'客户分类名称',statusintcomment'油站状态(1、2)', status_name string comment'油站状态名(正常、停用)', company_id intcomment'所属公司ID', company_name string comment'所属公司名称', customer_province_id intcomment'客户所属省份ID', customer_province_name string comment'客户所属省份')COMMENT'油站维度表'
PARTITIONED BY(dt STRING)
STORED AS TEXTFILE
LOCATION '/test/dws/one_make/dim_oilstation';-- 抽取数据insert overwrite table one_make_dws.dim_oilstation partition(dt ='20210101')select oil.id, oil.name, oil.code, customer_id, customer_name
, oil.province province_id, p.areaname province_name
, oil.city city_id, c.areaname city_name
, oil.region county_id, county.areaname county_name
, oil.township area_id, a.areaname area_name
, oil.customer_classify customer_classify_id, ede.dictname customer_classify_name
, oil.statusstatus, eosde.dictname status_name
, cbc.company company_id, binfo.companyname company_name
, proname.id customer_province_id, proname.areaname customer_province_name
from(select id, name, code, customer_id, customer_name, province, city, region, township,status, customer_classify, dt
from one_make_dwd.ciss_base_oilstation where id !=''and name isnotnulland name !='null'and customer_id isnotnull) oil
leftjoin(select id, areaname, parentid from one_make_dwd.ciss_base_areas where rank =1) p on oil.province = p.id
leftjoin(select id, areaname, parentid from one_make_dwd.ciss_base_areas where rank =2) c on oil.city = c.id
leftjoin(select id, areaname, parentid from one_make_dwd.ciss_base_areas where rank =3) county on oil.region = county.id
leftjoin(select id, areaname, parentid from one_make_dwd.ciss_base_areas where rank =4) a on oil.township = a.id
leftjoin(select dictid, dictname from one_make_dwd.eos_dict_entry) ede on oil.customer_classify = ede.dictid
leftjoin(select dictid, dictname from one_make_dwd.eos_dict_entry t1 leftjoin one_make_dwd.eos_dict_type t2 on t1.dicttypeid = t2.dicttypeid where t2.dicttypename ='油站状态') eosde on oil.status= eosde.dictid
-- 客户所属公司id,所属公司名称,所属省份id,所属省份名称leftjoin(select code, province, company from one_make_dwd.ciss_base_customer) cbc on oil.customer_id = cbc.code
leftjoin(select id, areaname from one_make_dwd.ciss_base_areas where rank =1and id !=83) proname on cbc.province = proname.areaname
leftjoin(select ygcode, companyname from one_make_dwd.ciss_base_baseinfo groupby ygcode, companyname) binfo on cbc.company = binfo.ygcode where dt ='20210101';
其他维度
---- 组织机构-- 建维度表-- 创建组织机构维度表,组织机构人员是经常变动的,所以按照日期分区create external tableifnotexists one_make_dws.dim_emporg(
empid string comment'人员id', empcode string comment'人员编码(erp对应的账号id)', empname string comment'人员姓名', userid string comment'用户系统id(登录用户名)', posid string comment'岗位id', posicode string comment'岗位编码', posiname string comment'岗位名称', orgid string comment'部门id', orgcode string comment'部门编码', orgname string comment'部门名称')comment'组织机构维度表'
partitioned by(dt string)
stored as orc
location '/test/dws/one_make/dim_emporg';-- 抽取数据-- 先根据dwd层的表进行关联,然后分别把数据取出来insert overwrite table one_make_dws.dim_emporg partition(dt='20210101')select
emp.empid as empid
, emp.empcode as empcode
, emp.empname as empname
, emp.userid as userid
, pos.positionid as posid
, pos.posicode as posicode
, pos.posiname as posiname
, org.orgid as orgid
, org.orgcode as orgcode
, org.orgname as orgname
from one_make_dwd.org_employee emp
leftjoin one_make_dwd.org_empposition emppos
on emp.empid = emppos.empid and emp.dt ='20210101'and emppos.dt ='20210101'leftjoin one_make_dwd.org_position pos
on emppos.positionid = pos.positionid and pos.dt ='20210101'leftjoin one_make_dwd.org_organization org
on pos.orgid = org.orgid and org.dt ='20210101';---- 仓库、物流-- 仓库维度表create external tableifnotexists one_make_dws.dim_warehouse(
code string comment'仓库编码', name string comment'仓库名称', company_id string comment'所属公司', company string comment'公司名称', srv_station_id string comment'所属服务网点ID', srv_station_name string comment'所属服务网点名称')comment'仓库维度表'
partitioned by(dt string)
stored as orc
location '/test/dws/one_make/dim_warehouse';-- 加载数据insert overwrite table one_make_dws.dim_warehouse partition(dt='20210101')select
warehouse.code as code
, warehouse.name as name
, warehouse.company as company_id
, cmp.compmay as compmay
, station.id as srv_station_id
, station.name as srv_station_name
from
one_make_dwd.ciss_base_warehouse warehouse
-- 关联公司信息表leftjoin(select
ygcode as company_id,max(companyname)as compmay
from one_make_dwd.ciss_base_baseinfo where dt='20210101'-- 需要对company信息进行分组去重,里面有一些重复数据 groupby ygcode) cmp
on warehouse.dt ='20210101'and cmp.company_id = warehouse.company
-- 关联服务网点和仓库关系表leftjoin one_make_dwd.ciss_r_serstation_warehouse station_r_warehouse
on station_r_warehouse.dt ='20210101'and station_r_warehouse.warehouse_code = warehouse.code
-- 关联服务网点表 leftjoin one_make_dwd.ciss_base_servicestation station
on station.dt ='20210101'and station.id = station_r_warehouse.service_station_id;-- 物流维度表-- 物流维度表(和服务属性表类似)create external tableifnotexists one_make_dws.dim_logistics(
prop_name string comment'字典名称', type_id string comment'属性id', type_name string comment'属性名称')comment'物流维度表'
partitioned by(dt string)
stored as orc
-- 加载insert overwrite table one_make_dws.dim_logistics partition(dt ='20210101')select
dict_t.dicttypename as prop_name
, dict_e.dictid as type_id
, dict_e.dictname as type_name
from one_make_dwd.eos_dict_type dict_t
innerjoin one_make_dwd.eos_dict_entry dict_e
on dict_t.dt ='20210101'and dict_e.dt ='20210101'and dict_t.dicttypeid = dict_e.dicttypeid
and dict_t.dicttypename in('物流公司','物流类型')orderby dict_t.dicttypename, dict_e.dictid;
常见错误
1.错误:没有开启Cross Join -Spark2.x默认不允许执行笛卡尔积,除非**显示申明cross join**或者开启属性:`spark.sql.crossJoin.enabled true`Exceptionin thread "main" org.apache.spark.sql.AnalysisException: Detected implicit cartesian product for INNER join between logical plans.Use the CROSS JOIN syntax to allow cartesian products between these relations2.错误:Unable to move source -重启SparkSQL的ThriftServer,与MetaStore构建新的会话连接Error:org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to move source hdfs://hadoop.bigdata.cn:9000/test/dws/one_make/dim_warehouse/.hive-staging_hive_2020-12-23_04-26-01_363_5663538019799519260-16/-ext-10000/part-00000-63069107-6405-4e31-a55a-6bdeefcd7d9b-c000 to destination hdfs://hadoop.bigdata.cn:9000/test/dws/one_make/dim_warehouse/dt=20210101/part-00000-63069107-6405-4e31-a55a-6bdeefcd7d9b-c000; (state=,code=0)