

create view v_data_clean_null as
select * from `data` d where
job_href is not null and job_href != '' and
job_name is not null and job_name != '' and
company_href is not null and company_href != '' and
company_name is not null and company_name != '' and
providesalary_text is not null and providesalary_text != '' and
workarea is not null and workarea != '' and
workarea_text is not null and workarea_text != '' and
companytype_text is not null and companytype_text != '' and
degreefrom is not null and degreefrom != '' and
workyear is not null and workyear != '' and
updatedate is not null and updatedate != '' and
issuedate is not null and issuedate != '' and
parse2_job_detail is not null and parse2_job_detail != '';


-- 去掉排序字段,把需要的字段都输入一遍
create view v_data_clean_distinct as
with p as
(select *,
row_number () over (partition by company_name,job_name order by issuedate) as row1
from v_data_clean_null )
select id,job_href,job_name,company_href,company_name,providesalary_text,workarea,workarea_text,updatedate,companytype_text,degreefrom,workyear,issuedate,parse2_job_detail
from p where row1=1;


-- 过滤招聘地区
create view v_data_clean_workplace as
select * from
(select *,
case when workarea_text like '%北京%' then '北京' when workarea_text like '%上海%' then '上海' when workarea_text like '%广州%' then '广州' when workarea_text like '%深圳%' then '深圳'
end as workplace
from v_data_clean_distinct ) a where a.workplace is not null;


create view v_data_clean_jobname as
select * from v_data_clean_workplace vdcw where job_name like '%数据%';
-- 最终清洗结果
create view v_data_clean as
(select * from v_data_clean_jobname);



create view v_data_market_demand as
select workplace as '城市',
sum(degreefrom) as '招聘总量',
count(*) as '职位数'
from v_data_clean group by workplace;



create view v_data_companytype_degree as
select companytype_text as '企业类型',
companytype_degreefrom as '招聘总量',
concat(cast(companytype_degreefrom /sum_degreefrom*100 as decimal(4,2)),'%') as '招聘占比'
(select companytype_text ,
sum(degreefrom) as companytype_degreefrom
from v_data_clean group by companytype_text) f1,
(select sum(degreefrom) as sum_degreefrom from v_data_clean) f2
order by companytype_degreefrom desc;



create view v_data_salary_min_max_mean as
with p as
(select * ,
(case when unit=10000 then substring_index(substring_index(providesalary_text,'万/月',1),'-',1)when unit=1000 then substring_index(substring_index(providesalary_text,'千/月',1),'-',1)when unit=833 then substring_index(substring_index(providesalary_text,'万/年',1),'-',1)
end ) as decimal(10,2))*unit as salary_min,
(case when unit=10000 then substring_index(substring_index(providesalary_text,'万/月',1),'-',-1)when unit=1000 then substring_index(substring_index(providesalary_text,'千/月',1),'-',-1)when unit=833 then substring_index(substring_index(providesalary_text,'万/年',1),'-',-1)
end ) as decimal(10,2))*unit as salary_max
from v_data_salary_unit)
select *,cast((salary_min+salary_max)/2 as decimal(10,2)) as salary_mean from p;


create view v_data_workyear_salary as
select workyear as '工作年限',
avg(salary_mean) as '平均薪资'
from v_data_salary_min_max_mean
group by workyear
order by length(workyear),workyear ;


create view v_data_companytype_salary as
select companytype_text  as '企业类型',
avg(salary_mean) as '平均薪资'
from v_data_salary_min_max_mean
group by companytype_text
order by avg(salary_mean) desc ;



create view v_data_skill_quantity as
select st.skill ,
count(*) as quantity
from v_data_clean v
inner join skill_table st on v.parse2_job_detail like concat('%',st.skill,'%')
group by st.skill
order by quantity desc limit 30;


create view v_data_skill as
select skill as '技能点',
quantity as '出现频数',
concat(cast(quantity/total_quantity*100 as decimal(10,2)),'%') as '出现频率'
from v_data_skill_quantity ,(select count(*) as total_quantity from v_data_clean ) as f;






