nvl空字段赋值  nvl(comm, -1)
count(*) sum(1)
LATERAL VIEW udtf(expression) tableAlias AS columnAlias
split()切成后为数组
explode()将 hive 一列中复杂的 Array 或者 Map 结构拆分成多行。
CAST('1' AS INT) 字符串'1'转换成整数1;如果强制类型转换失败,如执行
CAST('X' AS INT),表达式返回空值 NULL
concat  拼接函数
collect_list        将某字段的值进行汇总,产生array类型字段。
collect_set     将某字段的值进行去重汇总,产生array类型字段。
concat_ws(separator,[string|array(string)]) separator是分隔符,数组按照分隔符进行拼接
CURRENT ROW:当前行
n PRECEDING:往前 n 行数据
n FOLLOWING:往后 n 行数据
UNBOUNDED:起点,
UNBOUNDED PRECEDING 表示从前面的起点,
UNBOUNDED FOLLOWING 表示到后面的终点
LAG(col,n,default_val):往前第 n 行数据
LEAD(col,n, default_val):往后第 n 行数据
NTILE(n):把有序窗口的行分发到指定数据的组中,各个组有编号,编号从 1 开始,对
于每一行,NTILE 返回此行所属的组的编号。注意:n 必须为 int 类型。
datediff(CURRENT_DATE(), "1990-06-04");
date_add(current_date(), 90);
current_date 返回当前日期
next_day 返回指定日期之后一周中特定的日期
date_format
last_day 获取每个月的最后一天
get_json_object
get_json_object(json_txt, path): 从一个JSON字符串中取出指定路径对应的数据!
核心:path怎么写?$:  代表根对象
. : 获取子元素的操作符
[] : 获取一个数组中子元素的操作符round( , ) 保留几位小数--各种聚合
select name,orderdate,cost,
sum(cost) over() as sample1,--所有行相加
sum(cost) over(partition by name) as sample2,--按name分组,组内数据相加
sum(cost) over(partition by name order by orderdate) as sample3,--按name分组,组内数据累加
sum(cost) over(partition by name order by orderdate rows between UNBOUNDED PRECEDING and current row ) as sample4 ,--和sample3一样,由起点到当前行的聚合
sum(cost) over(partition by name order by orderdate rows between 1 PRECEDING and current row) as sample5, --当前行和前面一行做聚合
sum(cost) over(partition by name order by orderdate rows between 1 PRECEDING AND 1 FOLLOWING ) as sample6,--当前行和前边一行及后面一行
sum(cost) over(partition by name order by orderdate rows between current row and UNBOUNDED FOLLOWING ) as sample7 --当前行及后面所有行
from business;

1.最多连胜次数

透过题目看本质:开窗函数算是一个打标记,然后我通过打的标记,去找对应的规律。得出最终的结果。

题目解析:

2.连胜的最大天数

3.直播间访客峰值

4.相互关注

5.统计累积访问次数

需求:我们有如下的用户访问数据

要求:使用SQL统计出每个用户的累积访问次数,如下表所示:

create database test_sql;
use test_sql;
--第一题
CREATE TABLE test_sql.test1 (userId string,visitDate string,visitCount INT )
ROW format delimited FIELDS TERMINATED BY "\t";
INSERT INTO TABLE test_sql.test1
VALUES
( 'u01', '2017/1/21', 5 ),
( 'u02', '2017/1/23', 6 ),
( 'u03', '2017/1/22', 8 ),
( 'u04', '2017/1/20', 3 ),
( 'u01', '2017/1/23', 6 ),
( 'u01', '2017/2/21', 8 ),
( 'u02', '2017/1/23', 6 ),
( 'u01', '2017/2/22', 4 );
set spark.sql.shuffle.partitions=4;select *,sum(sum1) over (partition by userId order by visitMonth rows between unbounded preceding and current row ) as sum2--累积
from
(select userId,substr(visitDate,0,6) as visitMonth,sum(visitCount) as sum1 --小计
from test1
group by userId,substr(visitDate,0,6)) t
order by userId,visitMonth;--优化1
select *,sum(sum1) over (partition by userId order by visitMonth rows between unbounded preceding and current row ) as sum2--累积
from
(select userId,date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM')  as visitMonth,sum(visitCount) as sum1 --小计
from test1
group by userId,date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') ) t
order by userId,visitMonth;

6.2017年11月的新客数

数据准备

CREATE TABLE test_sql.test3 (dt string,order_id string,user_id string,amount DECIMAL ( 10, 2 ) )ROW format delimited FIELDS TERMINATED BY '\t';
INSERT INTO TABLE test_sql.test3 VALUES ('2017-01-01','10029028','1000003251',33.57);
INSERT INTO TABLE test_sql.test3 VALUES ('2017-01-01','10029029','1000003251',33.57);
INSERT INTO TABLE test_sql.test3 VALUES ('2017-01-01','100290288','1000003252',33.57);
INSERT INTO TABLE test_sql.test3 VALUES ('2017-02-02','10029088','1000003251',33.57);
INSERT INTO TABLE test_sql.test3 VALUES ('2017-02-02','10028888','1000008888',33.57);
INSERT INTO TABLE test_sql.test3 VALUES ('2017-02-02','100290281','1000003251',33.57);
INSERT INTO TABLE test_sql.test3 VALUES ('2017-02-02','100290282','1000003253',33.57);
INSERT INTO TABLE test_sql.test3 VALUES ('2017-11-02','10290282','100003253',234);
INSERT INTO TABLE test_sql.test3 VALUES ('2018-11-02','10290284','100003243',234);

答案:


select * from test3;
--(1)给出 2017年每个月的订单数、用户数、总成交金额。
select date_format(dt, 'yyyy-MM') as month1,count(order_id)            as cnt_orders,--订单数count(distinct user_id)    as cnt_users,--用户数sum(amount)                as sum_amt--总成交金额
from test3 where year(dt)=2017
group by date_format(dt, 'yyyy-MM');--语法2
select date_format(dt, 'yyyy-MM') as month1,count(order_id)            as cnt_orders,--订单数count(distinct user_id)    as cnt_users,--用户数sum(amount)                as sum_amt--总成交金额
from test3 where date_format(dt,'yyyy')=2017
group by date_format(dt, 'yyyy-MM');-- (2)给出2017年2月的新客数(指在2月才有第一笔订单)
with t1 as (select *,date_format(dt,'yyyy-MM') as ym
from test3
),t2 as (select user_id,min(ym) as min_ymfrom t1group by user_id)
select count(user_id) as cnt from t2 where min_ym='2017-11';

7.

--方式1
select shop,count(distinct user_id) as uv
from test2 group by shop;
--方式2
select shop,count(user_id) as uvfrom
(select user_id,shop
from test2 group by user_id,shop) t
group by shop ;
-- (2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数
--语法1
select *
from
(select shop,user_id,cnt,row_number() over (partition by shop order by cnt desc) rn
from
(select user_id,shop,count(*) as cnt
from test2
group by user_id,shop
order by shop,user_id) t)
where rn<=3;--语法2 用with as
with t1 as (select user_id,shop,count(*) as cntfrom test2group by user_id, shoporder by shop, user_id
),t2 as (select shop,user_id,cnt,row_number() over (partition by shop order by cnt desc) rnfrom t1)
select *from t2 where rn<=3 order by shop ,cnt desc;

8.

with t1 as (select user_id,count(url) as cntfrom test4loggroup by user_id
),t2 as (select user_id,age,case when age>=0 and age<=10 then '0-10'when age>10 and age<=20 then '10-20'when age>20 and age<=30 then '20-30'when age>30 and age<=40 then '30-40'when age>40 and age<=50 then '40-50'when age>50 and age<=60 then '50-60'when age>60 and age<=70 then '60-70'end age_phasefrom test4user),t3 as (select age_phase,sum(cnt) sum1from t1 join t2 on t1.user_id=t2.user_idgroup by t2.age_phase)
select * from t3;
--方案二
with t1 as (select user_id,count(url) as cntfrom test4loggroup by user_id
),t2 as (select user_id,age,concat( floor(age/10)*10,'-',(floor(age/10)+1)*10) as age_phasefrom test4user),t3 as (select age_phase,sum(cnt) sum1from t1 join t2 on t1.user_id=t2.user_idgroup by t2.age_phase)
select * from t3;select floor(15/10)*10 as x;
select ceil(15/10)*10 as x;
select concat( floor(15/10)*10,'-',ceil(15/10)*10) as x;
select concat( floor(25/10)*10,'-',ceil(25/10)*10) as x;
select concat( floor(20/10)*10,'-',(floor(20/10)+1)*10) as x;

9.

select userid, money, paymenttime
from (select *,row_number() over (partition by userid order by paymenttime) rnfrom test6where date_format(paymenttime, 'yyyy-MM') = '2017-10') t
where rn = 1;

10.

with t1 as (select distinctuser_id,agefrom test5
),t2 as (select '所有用户' as type,count(user_id) as cnt,avg(age)       as avg_agefrom t1),--步骤 2 活跃用户的总数及平均年龄,活跃用户指连续两天都有 访问记录的用户)t3 as (select distinct dt,user_id,agefrom test5),t4 as (select dt,user_id,age,--同一个客户,按照不同日期排序,得到序号row_number() over (partition by user_id order by dt) as rnfrom t3),t5 as (select *,--用日期减去序号得到临时日期date_sub(dt,rn) as date2from t4),t6 as (--统计date2临时日期出现几次。如果2次则表示连续登陆2次select user_id,date2,max(age) age,count(1) as cntfrom t5group by user_id,date2having count(1)>=2),t7 as (select distinct user_id,agefrom t6),t8 as (select '活跃用户'         as type,count(user_id) as cnt,avg(age)       as avg_agefrom t7)
select * from t2 union all
select * from t8;

11.

--(1)创建图书管理库的图书、读者和借阅三个基本表的表结构。请写出建表语句。
-- 创建图书表book*/
CREATE TABLE test_sql.book
(book_id   string,`SORT`    string,book_name string,writer    string,OUTPUT    string,price     decimal(10, 2)
);-- 创建读者表reader
CREATE TABLE test_sql.reader
(reader_id string,company   string,name      string,sex       string,grade     string,addr      string
);CREATE TABLE test_sql.borrow_log
(reader_id   string,book_id     string,borrow_date string
);
-- (2)找出姓李的读者姓名(NAME)和所在单位(COMPANY)。
select name,company from reader where name like '李%';
-- (3)查找“科学出版社”的所有图书名称(BOOK_NAME)及单价(PRICE),结果按单价降序 排序。
select book_name,price from book where OUTPUT='科学出版社';
-- (4)查找价格介于10元和20元之间的图书种类(SORT)出版单位(OUTPUT)和单价(PRICE),
-- 结果按出版单位(OUTPUT)和单价(PRICE)升序排序。
select SORT,OUTPUT,price from book where price >=10 and price<=20 order by OUTPUT,price;
-- (5)查找所有借了书的读者的姓名(NAME)及所在单位(COMPANY)。
select b.name,b.company from borrow_log a
join reader b on a.reader_id=b.reader_id;
-- (6)求”科学出版社”图书的最高单价、最低单价、平均单价。
select max(price),min(price),avg(price) from book where OUTPUT='科学出版社';
-- (7)找出当前至少借阅了2本图书(大于等于2本)的读者姓名及其所在单位。
select b.reader_id,b.name, b.company
from borrow_log a
join reader b on a.reader_id=b.reader_id
group by b.reader_id,b.name, b.company
having count(*)>=2
;
-- (8)考虑到数据安全的需要,需定时将“借阅记录”中数据进行备份,请使用一条SQL语句,
-- 在备份 用户bak下创建与“借阅记录”表结构完全一致的数据表 BORROW_LOG_BAK.
-- 井且将“借阅记录”中现 有数据全部复制到BORROW_L0G_ BAK中。
create table BORROW_LOG_BAK as select * from borrow_log;
-- (9)现在需要将原Oracle数据库中数据迁移至Hive仓库,
-- 请写出“图书”在Hive中的建表语句(Hive 实现,提示:列分隔符|;数据表数据需要外部导入:分区分别以month_part、day_part 命名)
CREATE TABLE book_hive(book_id   string,SORT      string,book_name string,writer    string,OUTPUT    string,price     DECIMAL(10, 2)
)partitioned BY ( month_part string, day_part string )ROW format delimited FIELDS TERMINATED BY '\\|' stored AS textfile;
-- (10)Hive中有表A,现在需要将表A的月分区 201505 中 user_id为20000的user_dinner字段更新为 bonc8920,
-- 其他用户user_dinner字段数据不变,请列出更新的方法步骤。
-- (Hive实现, 提示:Hlive中无update语法,请通过其他办法进行数据更新)insert overwrite table A
select user_id,'bonc8920' as user_dinner  from A where user_id=20000
union all
select * from A where user_id!=20000

12.

select ip,count(*)
from test8
where date >= '2016-11-09 14:00:00'and date < '2016-11-09 15:00:00'and interface='/api/user/login'
group by ip
order by count(*) desc
limit 10;

13.

select *
from (select *,row_number() over (distribute by dist_id order by money desc) as rnfrom test9where to_date(create_time) = '2019-01-02') t
where rn = 1;
;

14.

select dist_id, account, gold
from (select *,row_number() over (partition by dist_id order by gold desc) rnfrom test_sql.test10) t
where rn <= 10
;

15.行转列

select a,max(case when b="A" then c end) col_A,max(case when b="B" then c end) col_B
from t1
group by a;

16.列转行

select a,b,c
from (select a,"A" as b,col_a as c from t1_2 union all select a,"B" as b,col_b as c from t1_2
)tmp; 

17.行转列

select a,max(case when b="A" then c end) col_A,max(case when b="B" then c end) col_B
from (select a,b,concat_ws(",",collect_set(cast(c as string))) as cfrom t1group by a,b
)tmp
group by a;

18.按a分组取b字段最小时对应的c字段

selecta,c as min_c
from
(selecta,b,c,row_number() over(partition by a order by b) as rn from t2
)a
where rn = 1;

19.按a分组取b字段排第二时对应的c字段

selecta,c as second_c
from
(selecta,b,c,row_number() over(partition by a order by b) as rn from t2
)a
where rn = 2;

20.按a分组取b字段最小和最大时对应的c字段

selecta,min(if(asc_rn = 1, c, null)) as min_c,max(if(desc_rn = 1, c, null)) as max_c
from
(selecta,b,c,row_number() over(partition by a order by b) as asc_rn,row_number() over(partition by a order by b desc) as desc_rn from t2
)a
where asc_rn = 1 or desc_rn = 1
group by a; 

21.按a分组取b字段第二小和第二大时对应的c字段

selectret.a,max(case when ret.rn_min = 2 then ret.c else null end) as min_c,max(case when ret.rn_max = 2 then ret.c else null end) as max_c
from (select*,row_number() over(partition by t2.a order by t2.b) as rn_min,row_number() over(partition by t2.a order by t2.b desc) as rn_maxfrom t2
) as ret
where ret.rn_min = 2
or ret.rn_max = 2
group by ret.a;

22.按a分组取b字段前两小和前两大时对应的c字段

selecttmp1.a as a,min_c,max_c
from
(select a,concat_ws(',', collect_list(c)) as min_cfrom(selecta,b,c,row_number() over(partition by a order by b) as asc_rnfrom t2)awhere asc_rn <= 2 group by a
)tmp1
join
(select a,concat_ws(',', collect_list(c)) as max_cfrom(selecta,b,c,row_number() over(partition by a order by b desc) as desc_rn from t2)awhere desc_rn <= 2group by a
)tmp2
on tmp1.a = tmp2.a; 

23. 按a分组按b字段排序,对c累计求和

24.按a分组按b字段排序,对c取累计平均值

25.按a分组按b字段排序,对b取累计排名比例

select a, b, c, round(row_number() over(partition by a order by b) / (count(c) over(partition by a)),2) as ratio_c
from t3
order by a,b;

26.按a分组按b字段排序,对b取累计求和比例

select a, b, c, round(sum(c) over(partition by a order by b) / (sum(c) over(partition by a)),2) as ratio_c
from t3
order by a,b;

27.按a分组按b字段排序,对c取前后各一行的和

不包含当前行

select a,b,lag(c,1,0) over(partition by a order by b)+lead(c,1,0) over(partition by a order by b) as sum_c
from t4;

28.按a分组按b字段排序,对c取平均值

看题。

selecta,b,case when lag_c is null then celse (c+lag_c)/2 end as avg_c
from(selecta,b,c,lag(c,1) over(partition by a order by b) as lag_cfrom t4)temp;

29.产生连续数值

语法: space(int n)
返回值: string
说明:返回长度为n的空格字符串
举例:
hive> select space(10) from dual;
hive> select length(space(10)) from dual;
10space函数与split函数结合,可以得到空格字符串数组
hive> select split(space(10), '');
[" "," "," "," "," "," "," "," "," "," ",""]
selectrow_number() over() as id
from  (select split(space(99), ' ') as x) t
lateral view
explode(x) ex;

那如何产生1至1000000连续数值?

selectrow_number() over() as id
from  (select split(space(999999), ' ') as x) t
lateral view
explode(x) ex;

30.数据扩充

31.数据扩充,排除偶数

32.如何处理字符串累计拼接

33.如果a字段有重复,如何实现字符串累计拼接

select a,b
from
(select  t.a,t.rn,concat_ws('、',collect_list(cast(t.a1 as string))) as bfrom(   select  a.a,a.rn,b.a1from(select  a,row_number() over(order by a ) as rn from t6) aleft join(   select  a as a1,row_number() over(order by a ) as rn  from t6) bon 1 = 1where a.a >= b.a1 and a.rn >= b.rn order by a.a, b.a1 ) tgroup by  t.a,t.rnorder by t.a,t.rn
) tt; 

34.数据展开

如何将字符串"1-5,16,11-13,9"扩展成"1,2,3,4,5,16,11,12,13,9"?注意顺序不变。

select  concat_ws(',',collect_list(cast(rn as string)))
from
(select  a.rn,b.num,b.posfrom(selectrow_number() over() as rnfrom (select split(space(20), ' ') as x) t -- space(20)可灵活调整lateral viewexplode(x) pe) a lateral view outer posexplode(split('1-5,16,11-13,9', ',')) b as pos, numwhere a.rn between cast(split(num, '-')[0] as int) and cast(split(num, '-')[1] as int) or a.rn = numorder by pos, rn
) t;

35.合并与拆分

36.合并与拆分

37.如何将字符'1'的位置提取出来

select a,concat_ws(",",collect_list(cast(index as string))) as res
from (select a,index+1 as index,chrfrom (select a,concat_ws(",",substr(a,1,1),substr(a,2,1),substr(a,3,1),substr(a,-1)) strfrom t8) tmp1lateral view posexplode(split(str,",")) t as index,chrwhere chr = "1"
) tmp2
group by a;

38.不使用distinct或group by去重

selectt2.year,t2.num
from(select*,row_number() over (partition by t1.year,t1.num) as rank_1from (select a as year,d as numfrom t9union allselect b as year,d as numfrom t9union allselect c as year,d as numfrom t9)t1
)t2
where rank_1=1
order by num;

39.反转逗号分隔的数据:改变顺序,内容不变

select a,concat_ws(",",collect_list(reverse(str)))
from
(select a,strfrom t10lateral view explode(split(reverse(a),",")) t as str
) tmp1
group by a;

40.反转逗号分隔的数据:改变内容,顺序不变

select a,concat_ws(",",collect_list(reverse(str)))
from
(select a,strfrom t10lateral view explode(split(a,",")) t as str
) tmp1
group by a;

41.成对提取数据,字段一一对应

select a_inx,b_inx
from
(select a,b,a_id,a_inx,b_id,b_inxfrom t11lateral view posexplode(split(a,'/')) t as a_id,a_inxlateral view posexplode(split(b,'/')) t as b_id,b_inx
) tmp
where a_id=b_id;

数仓SQL面试题(持续更新中!!!)相关推荐

  1. db2dual_DB2常用SQL的写法(持续更新中...)

    DB2常用SQL的写法(持续更新中...) -- Author: lavasoft -- Date  :  2006-12-14 -- 创建一个自定义单值类型 create  distinct typ ...

  2. psid mysql_DB2常用SQL的写法(持续更新中...)

    DB2常用SQL的写法(持续更新中...) -- Author: lavasoft -- Date  :  2006-12-14 -- 创建一个自定义单值类型 create  distinct typ ...

  3. 史上最全的spark面试题——持续更新中

    1.spark中的RDD是什么,有哪些特性? 答:RDD(Resilient Distributed Dataset)叫做分布式数据集,是spark中最基本的数据抽象,它代表一个不可变,可分区,里面的 ...

  4. java史上最全面试题--持续更新中(一)

    1.面向对象的特征有哪些方面? 抽象:将同类对象的共同特征提取出来构造类. 继承:基于基类创建新类. 封装:将数据隐藏起来,对数据的访问只能通过特定接口. 多态性:不同子类型对象对相同消息作出不同响应 ...

  5. 2020年Java面试题及答案_Java面试宝典_Java笔试题(持续更新中)

    推荐面试视频教程 2019年最新Java互联网大厂面试精粹 前言 前言: 少年易老学难成,一寸光阴不可轻.未觉池塘春草梦,阶前梧叶已秋声 .-朱熹<劝学诗> 勤奋才是改变你命运的唯一捷径. ...

  6. Dubbo面试题及答案,2021年Dubbo面试题-持续更新中

    2021最新Dubbo面试题[附答案解析]Dubbo面试题及答案2021,Dubbo最新面试题及答案,Dubbo面试题新答案已经全部更新完了,有些答案是自己总结的,也有些答案是在网上搜集整理的.这些答 ...

  7. SpringBoot面试题大汇总附答案,SpringBoot面试题-持续更新中

    2021最新SpringBoot面试题[附答案解析]SpringBoot面试题及答案2021,SpringBoot2021最新面试题及答案,SpringBoot面试题新答案已经全部更新完了,有些答案是 ...

  8. mysql查询更新优化_mysql查询优化(持续更新中)

    1.索引不会包含有NULL值的列 (1)   应尽量避免在where子句中对字段进行null值判断,否则将导致引擎放弃使用索引而进行全表扫描 (2)   数据库设计时不要让字段的默认值为null,可以 ...

  9. 面试1:Java、微服务、架构常见面试题(持续更新中)

    Java.微服务.架构常见面试题(持续更新中) 文章目录 Java.微服务.架构常见面试题(持续更新中) ==**Java**== 1.Java概述 (1)JVM.JRE和JDK (2)Java特点 ...

  10. 2020年拼多多校招面试题及答案-最全最新-持续更新中(2)

    大家好我是好好学习天天编程的天天 一个整天在互联网上种菜和砍柴的程序员~ 2020年拼多多校招面试题及答案-最全最新-持续更新中(2) 2020年拼多多校招面试题一面 2020年拼多多校招面试题一面- ...

最新文章

  1. Exchange 2010与Exchange Online混合部署PART 5:配置边缘
  2. Android之Fragment使用简介
  3. oracle区号,Oracle 存儲過程
  4. 51单片机——LCD1602
  5. mongodb常用操作命令(待续)
  6. 计算机网络原理应用题/计算题
  7. 苹果Mac视频特效合成神器:Blackmagic Fusion Studio
  8. 一维和二维傅里叶变换的图片直观理解
  9. B站视频下载助手使用教程
  10. PC控制台使用-素材管理
  11. 彩色蟒蛇绘制。对 Python 蟒蛇的每个部分采用不同颜色,绘制一条彩色蟒蛇。
  12. 掌握IDEA的这两款插件,写“破解补丁”分分钟
  13. 基于FPGA的GV7600驱动
  14. 一起来“泡博”[--老沙]
  15. CSS反爬虫 大众点评
  16. 全面理解云上网络技术
  17. 阿里云轻量应用服务器基于CentOS系统镜像快速部署Apache服务
  18. 项目变更管理、项目集合管理、流程管理、知识管理、战略管理真题
  19. 联想z485在ubuntu13.04系统下发热量大的解决办法
  20. 软件学习史上最强攻略之--如何选择软件测试培训学校

热门文章

  1. Android NDK 建立cocos2dx项目
  2. 编程英文单词的标准缩写
  3. 自动避障算法c语言,基于单片机的自动避障小车设计与实现报告.doc
  4. 直方图均衡化与直方图规定化
  5. layui表单验证必填;下拉列表必填;xmselect必填;layui vue下拉列表必填
  6. 梅科尔工作室-第五次网页前端培训笔记(Javascript)
  7. Quartz-CronTrigger解读
  8. 【源码】基于部分随机PSO的光伏MPPT算法
  9. 开题报告:基于java的电子商务购物网站系统 毕业设计论文开题报告模板
  10. 软件设计师-2.程序设计语言