hive shell/sql 命令行

命令hive进入hive命令行

//列表数据库

show databases;

//创建数据库

create database myhive;

//创建数据库时检查存在与否

create database if not exists t1;

//创建数据库时带注释

create database if not exists t2 comment 'learning hive';

//创建带属性的数据库

create database if not exists t3 with dbproperties('creator'='hadoop','date'='2018-04-05');

//使用数据库

use myhive;

//显示数据库信息

desc database t2;

desc database extended t3;

//列表数据表

show tables;

show tables in t1;#t1为数据库名

///查看student_c开头的表

show tables like 'student_c*';

//查看当前正在使用的数据库

select current_database();

//创建一张表

CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name

　　[(col_name data_type [COMMENT col_comment], ...)]

　　[COMMENT table_comment]

　　[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]

　　[CLUSTERED BY (col_name, col_name, ...)

　　　　[SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]

　　[ROW FORMAT row_format]

　　[STORED AS file_format]

　　[LOCATION hdfs_path]

详情请参见： https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualD DL-CreateTable

•CREATE TABLE 创建一个指定名字的表。如果相同名字的表已经存在，则抛出异常；用户可以用 IF NOT EXIST 选项来忽略这个异常 •EXTERNAL 关键字可以让用户创建一个外部表，在建表的同时指定一个指向实际数据的路径（LOCATION） •LIKE 允许用户复制现有的表结构，但是不复制数据 •COMMENT可以为表与字段增加描述

•PARTITIONED BY 指定分区

•ROW FORMAT

　　DELIMITED [FIELDS TERMINATED BY char] [COLLECTION ITEMS TERMINATED BY char]

　　　　MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]

　　　　| SERDE serde_name [WITH SERDEPROPERTIES

　　　　(property_name=property_value, property_name=property_value, ...)]

　　用户在建表的时候可以自定义 SerDe 或者使用自带的 SerDe。如果没有指定 ROW FORMAT 或者 ROW FORMAT DELIMITED，将会使用自带的 SerDe。在建表的时候，

用户还需要为表指定列，用户在指定表的列的同时也会指定自定义的 SerDe，Hive 通过 SerDe 确定表的具体的列的数据。

•STORED AS

　　SEQUENCEFILE //序列化文件

　　| TEXTFILE //普通的文本文件格式

　　| RCFILE　　//行列存储相结合的文件

　　| INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname //自定义文件格式

　　如果文件数据是纯文本，可以使用 STORED AS TEXTFILE。如果数据需要压缩，使用 STORED AS SEQUENCE 。

•LOCATION指定表在HDFS的存储路径

最佳实践：

　　如果一份数据已经存储在HDFS上，并且要被多个用户或者客户端使用，最好创建外部表

　　反之，最好创建内部表。

　　如果不指定，就按照默认的规则存储在默认的仓库路径中。

///创建内部表

create table student(id int, name string, sex string, age int, department string) row format delimited fields terminated by ",";

///创建外部表

create external table student_ext

(id int, name string, sex string, age int,department string) row format delimited fields terminated by "," location "/user/hive/outtable/student_ext";

注意需要切换到hdfs用户登录赋权，root才有hdfs的/user/hive目录权限

su - hdfs

hdfs dfs -chmod 777 /user/hive

hdfs dfs -ls /user

///创建分区表

create external table student_ptn

(id int, name string, sex string, age int,department string)

partitioned by (city string)

row format delimited fields terminated by ","

location "/user/hive/outtable/student_ptn";

添加分区

alter table student_ptn add partition(city="beijing");

alter table student_ptn add partition(city="tianjin");

如果某张表是分区表。那么每个分区的定义，其实就表现为了这张表的数据存储目录下的一个子目录

如果是分区表。那么数据文件一定要存储在某个分区中，而不能直接存储在表中。

创建分桶表

create external table student_bck(id int, name string, sex string, age int,department string) clustered by (id) sorted by (id asc, name desc) into 4 buckets row format delimited fields terminated by ","

location "/user/hive/outtable/student_bck";

//使用CTAS创建表（查询结果创建表）

create table student_ctas as select * from student where id < 95012;

//复制表结构

create table student_copy like student;

//创建本地数据文件（本地文件系统，非HDFS）

cat <<EOF >student.txt

95002,刘晨,女,19,IS

95017,王风娟,女,18,IS

95018,王一,女,19,IS

95013,冯伟,男,21,CS

95014,王小丽,女,19,CS

95019,邢小丽,女,19,IS

95020,赵钱,男,21,IS

95003,王敏,女,22,MA

95004,张立,男,19,IS

95012,孙花,女,20,CS

95010,孔小涛,男,19,CS

95005,刘刚,男,18,MA

95006,孙庆,男,23,CS

95007,易思玲,女,19,MA

95008,李娜,女,18,CS

95021,周二,男,17,MA

95022,郑明,男,20,MA

95001,李勇,男,20,CS

95011,包小柏,男,18,MA

95009,梦圆圆,女,18,MA

95015,王君,男,18,MA

EOF

//加载数据

load data local inpath "/home/hadoop/student.txt" into table student;

加载的数据会直接把文件放到hdfs表的目录中

//查询数据

select * from student;

//查看表结构

hive> desc student;

id int

name string

sex string

age int

department string

Time taken: 0.709 seconds, Fetched: 5 row(s)

hive> desc extended student;

id int

name string

sex string

age int

department string

Detailed Table Information Table(tableName:student, dbName:myhive, owner:root, createTime:1551859665, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:sex, type:string, comment:null), FieldSchema(name:age, type:int, comment:null), FieldSchema(name:department, type:string, comment:null)], location:hdfs://node2:8020/user/hive/warehouse/myhive.db/student, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{field.delim=,, serialization.format=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{totalSize=504, COLUMN_STATS_ACCURATE=true, numFiles=1, transient_lastDdlTime=1551859708}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE, ownerType:USER)

Time taken: 0.618 seconds, Fetched: 7 row(s)

///格式友好的表结构信息

hive> desc formatted student;

# col_name data_type comment

id int

name string

sex string

age int

department string

# Detailed Table Information

Database: myhive

OwnerType: USER

Owner: root

CreateTime: Wed Mar 06 16:07:45 CST 2019

LastAccessTime: UNKNOWN

Protect Mode: None

Retention: 0

Location: hdfs://node2:8020/user/hive/warehouse/myhive.db/student

Table Type: MANAGED_TABLE

Table Parameters:

COLUMN_STATS_ACCURATE true

numFiles 1

totalSize 504

transient_lastDdlTime 1551859708

# Storage Information

SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

InputFormat: org.apache.hadoop.mapred.TextInputFormat

OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Compressed: No

Num Buckets: -1

Bucket Columns: []

Sort Columns: []

Storage Desc Params:

field.delim ,

serialization.format ,

Time taken: 0.703 seconds, Fetched: 35 row(s)

//查看分区信息

show partitions student_ptn;

//查看详细建表信息

show create table student_ptn;

//删除库

drop database dbname; drop database if exists dbname;

默认情况下，hive 不允许删除包含表的数据库，有两种解决办法：

1、手动删除库下所有表，然后删除库

2、使用 cascade 关键字

drop database if exists dbname cascade;

//删除表

drop table tbname;

//修改表名

alter table student rename to new_student;

//添加字段

alter table new_student add columns (score int);

//修改字段定义

alter table new_student change name new_name string;

///删除字段

不支持

//替换所有字段定义

alter table new_student replace columns (id int, name string, address string);

///添加多个表分区

alter table student_ptn add partition(city="chongqing2") partition(city="chongqing3") partition(city="chongqing4");

///动态分区

先添加数据

load data local inpath "/var/lib/hadoop-hdfs/student.txt" into table student_ptn partition(city="beijing");

现在我把这张表的内容直接插入到另一张表student_ptn_age中，并实现sex为动态分区（不指定到底是哪中性别，让系统自己分配决定）

首先创建student_ptn_age并指定分区为age

create table student_ptn_age(id int,name string,sex string,department string) partitioned by (age int);

从student_ptn表中查询数据并插入student_ptn_age表中

insert overwrite table student_ptn_age partition(age) select id,name,sex,department,age from student_ptn;

此语句报错：FAILED: SemanticException [Error 10096]: Dynamic partition strict mode requires at least one static partition column. To turn this off set hive.exec.dynamic.partition.mode=nonstrict

需要执行如下设置后即可

set hive.exec.dynamic.partition.mode=nonstrict;

hive.exec.dynamic.partition.mode默认是strict，必须制定一个分区进行插入数据，以避免覆盖所有的分区数据；但是如果需要动态分区插入数据就必须设置nonstrict，nonstrict表示不是严格的必须指定一个静态分区，言外之意就是动态分区插入。其他属性容易理解不解释。

参考：https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DML#LanguageManualDML-DynamicPartitionInserts

//添加分区指定存储目录

alter table student_ptn add if not exists partition(city='beijing') location '/user/hive/outtable/student_ptn/student_ptn_beijing' partition(city='jilin') location '/user/hive/outtable/student_ptn/student_ptn_jilin';

//修改已经指定好的分区的数据存储目录

alter table student_ptn partition (city='beijing') set location '/user/hive/outtable/student_ptn/student_ptn_beijing';

此时原先的分区文件夹仍存在，但是在往分区添加数据时，只会添加到新的分区目录。

而且查询的时候只查新分区，不会查旧分区里的文件，如果需要保留数据，需要把文件也挪过来。

//删除分区

alter table student_ptn drop partition (city='beijing');

//清空表

truncate table student_ptn;

//查看函数列表

show functions;

参考文章

https://www.cnblogs.com/qingyunzong/p/8723271.html

hive shell/sql 命令行相关推荐

oracle sql命令行中上下左右使用
oracle sql命令行中上下左右使用 yum -y install readline,rlwrap 配置环境变量 alias sqlplus='rlwrap sqlplus' 测试 posted ...
解决SQL命令行回退的问题
场景在linux或者aix上安装后Oracle后,在SQL命令行下无法通过键盘的退格键回退,如下解决方法安装软件 # rpm -ivh rlwrap-0.41-1.el6.x86_64.rpm ...
Shell Scipt 命令行带参数，输出log
Shell Scipt 命令行带参数,输出log 命令行带参数,以及字符串参数放到ssh命令里可以这么放: #!/bin/bash 这行保证运行bash可以这样: ./data.sh i ...
mysql dede arctiny_如何用织梦SQL命令行工具操作数据库及常用sql语句整理
用织梦的朋友都知道,如果需要对数据库进行批量操作,可以用织梦后台[系统]-[sql命令行工具]实现.其实这只是织梦系统为了大家操作方便,提供的一个对mysql数据库操作的工具,这样大家在需要批量操作数 ...
你以为Shell只是命令行？读懂这篇文，给你的工作赋能
Shell 是什么呢?很多人以为的Shell就是命令行,一条条可以百度出来的命令行.而简历上你只会写熟悉Linux?今天用摩拜App给大家讲解下如何使用 Shell 去操作 Android 设备进行自 ...
sql+php修改mysql结构,MSSQL_修改SQL-SERVER数据库表结构的SQL命令附sql命令行修改数据库，具体不废话了，请看下文代码 - phpStudy...
修改SQL-SERVER数据库表结构的SQL命令附sql命令行修改数据库具体不废话了,请看下文代码详细说明吧. 向表中增加一个 varchar 列: ALTER TABLE distributors ...
Unity中使用调用Shell的命令行
Unity中使用调用Shell的命令行有的时候我们在做unity开发的时候需要去写一些工具编辑器之外的话通常会使用python,但有时也会使用shell命令行来完成一些操作,比如我们在写一自动化打包 ...
HIVE——常用sql命令总结
文章目录 hive常用交互命令 `-e`执行sql `-f`执行脚本中sql语句 hive cli命令行窗口操作hdfs 查看hive中输入的所有历史命令库创建库查看库使用库修改库删除库 ...
Hive Shell 常用命令
Hive 命令行常用命令加载数据 load data local inpath '/home/IVR_CSR_MENU_MAP.txt' into table ivr_csr_menu_map; ...

hive shell/sql 命令行

hive shell/sql 命令行相关推荐

最新文章

热门文章