hive解析json
-- json数据
{"OTHER_INFO":{"MOBILE_ADDR":"四川省内江市潘咱陛路1233号闷羞小区19单元484室","DATA2":{"CERT_ADDR":"宁夏回族自治区固原市薄精路3992号裸糕氏小区9单元1092室"},"DATA1":{"MOBILE_NUM":"15360425546"},"MOBILE_NUM":"15902875723"},"APP_NO":"RVVG5252500393","CHANNEL_SOURCE":"微信","BUSINESS_CODE":"TEST01","STEP_TYPE":"002","SESSION_ID":"YLYC99019292814246548503","CUST_INFO":{"GENDER_CD":"2","NAME_EN":"Edmund","MAIL":"m74e5czt2y@rvhts.pdu","CERT_TYPE":"2","CERT_NO":"446760199707032392","IDEN_VALID_DT":"99991231","CUST_NAME":"独孤拘害","AGE":"5","MOBILE":"15744158190"},"SCENARIO_INFO":{"ADDRESS":"云南省临沧市猫逐还路1265号兢顶朔小区3单元376室","CARD_NUM":"6223887916967795"},"APPLY_TIME":"1628067497730"}
{"OTHER_INFO":{"MOBILE_ADDR":"青海省海北藏族自治州砰役厩路3499号唇瞪小区9单元491室","DATA2":{"CERT_ADDR":"安徽省淮南市蚂鄙系路2476号时究小区10单元2465室"},"DATA1":{"MOBILE_NUM":"15210663115"},"MOBILE_NUM":"15290171152"},"APP_NO":"FZGR9373403233","CHANNEL_SOURCE":"微信","BUSINESS_CODE":"TEST01","STEP_TYPE":"002","SESSION_ID":"KYTD89059389151000106477","CUST_INFO":{"GENDER_CD":"2","NAME_EN":"Frederica","MAIL":"y0jogwn7vv@bnryy.4cp","CERT_TYPE":"2","CERT_NO":"423047199210041239","IDEN_VALID_DT":"99991231","CUST_NAME":"尉迟蛀","AGE":"5","MOBILE":"13857964198"},"SCENARIO_INFO":{"ADDRESS":"山东省济宁市迂佬藻路1489号展镁小区7单元1617室","CARD_NUM":"6229127368018050"},"APPLY_TIME":"1628067579234"}
{"OTHER_INFO":{"MOBILE_ADDR":"浙江省杭州市藻极路1103号缄奉小区5单元1080室","DATA2":{"CERT_ADDR":"甘肃省天水市褥茨路735号废输剑小区14单元490室"},"DATA1":{"MOBILE_NUM":"15065179482"},"MOBILE_NUM":"13427757308"},"APP_NO":"CQEL4819162613","CHANNEL_SOURCE":"APP","BUSINESS_CODE":"TEST01","STEP_TYPE":"004","SESSION_ID":"LECY94935395143750820224","CUST_INFO":{"GENDER_CD":"1","NAME_EN":"Ursula","MAIL":"ximjrvukl5@43sw4.xvg","CERT_TYPE":"1","CERT_NO":"141043198504099090","IDEN_VALID_DT":"99991231","CUST_NAME":"高晚","AGE":"1","MOBILE":"13658608487"},"SCENARIO_INFO":{"ADDRESS":"河北省唐山市圭布懂路4597号骡昏小区8单元544室","CARD_NUM":"6224871441029643"},"APPLY_TIME":"1628067745581"}
-- hive get_json_objec解析json练习
CREATE TABLE json_t( lines string ) comment'json测试表';load data local inpath '/root/json.txt' overwrite into table json_t;0: jdbc:hive2://wxt01:10000> select lines from json_t;
OK
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
| lines |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
| {"OTHER_INFO":{"MOBILE_ADDR":"四川省内江市潘咱陛路1233号闷羞小区19单元484室","DATA2":{"CERT_ADDR":"宁夏回族自治区固原市薄精路3992号裸糕氏小区9单元1092室"},"DATA1":{"MOBILE_NUM":"15360425546"},"MOBILE_NUM":"15902875723"},"APP_NO":"RVVG5252500393","CHANNEL_SOURCE":"微信","BUSINESS_CODE":"TEST01","STEP_TYPE":"002","SESSION_ID":"YLYC99019292814246548503","CUST_INFO":{"GENDER_CD":"2","NAME_EN":"Edmund","MAIL":"m74e5czt2y@rvhts.pdu","CERT_TYPE":"2","CERT_NO":"446760199707032392","IDEN_VALID_DT":"99991231","CUST_NAME":"独孤拘害","AGE":"5","MOBILE":"15744158190"},"SCENARIO_INFO":{"ADDRESS":"云南省临沧市猫逐还路1265号兢顶朔小区3单元376室","CARD_NUM":"6223887916967795"},"APPLY_TIME":"1628067497730"} |
| {"OTHER_INFO":{"MOBILE_ADDR":"青海省海北藏族自治州砰役厩路3499号唇瞪小区9单元491室","DATA2":{"CERT_ADDR":"安徽省淮南市蚂鄙系路2476号时究小区10单元2465室"},"DATA1":{"MOBILE_NUM":"15210663115"},"MOBILE_NUM":"15290171152"},"APP_NO":"FZGR9373403233","CHANNEL_SOURCE":"微信","BUSINESS_CODE":"TEST01","STEP_TYPE":"002","SESSION_ID":"KYTD89059389151000106477","CUST_INFO":{"GENDER_CD":"2","NAME_EN":"Frederica","MAIL":"y0jogwn7vv@bnryy.4cp","CERT_TYPE":"2","CERT_NO":"423047199210041239","IDEN_VALID_DT":"99991231","CUST_NAME":"尉迟蛀","AGE":"5","MOBILE":"13857964198"},"SCENARIO_INFO":{"ADDRESS":"山东省济宁市迂佬藻路1489号展镁小区7单元1617室","CARD_NUM":"6229127368018050"},"APPLY_TIME":"1628067579234"} |
| {"OTHER_INFO":{"MOBILE_ADDR":"浙江省杭州市藻极路1103号缄奉小区5单元1080室","DATA2":{"CERT_ADDR":"甘肃省天水市褥茨路735号废输剑小区14单元490室"},"DATA1":{"MOBILE_NUM":"15065179482"},"MOBILE_NUM":"13427757308"},"APP_NO":"CQEL4819162613","CHANNEL_SOURCE":"APP","BUSINESS_CODE":"TEST01","STEP_TYPE":"004","SESSION_ID":"LECY94935395143750820224","CUST_INFO":{"GENDER_CD":"1","NAME_EN":"Ursula","MAIL":"ximjrvukl5@43sw4.xvg","CERT_TYPE":"1","CERT_NO":"141043198504099090","IDEN_VALID_DT":"99991231","CUST_NAME":"高晚","AGE":"1","MOBILE":"13658608487"},"SCENARIO_INFO":{"ADDRESS":"河北省唐山市圭布懂路4597号骡昏小区8单元544室","CARD_NUM":"6224871441029643"},"APPLY_TIME":"1628067745581"} |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
3 rows selected (0.1 seconds)
0: jdbc:hive2://wxt01:10000> -- get_json_object()
SELECT
get_json_object(lines,'$.APP_NO') as APP_NO,
get_json_object(lines,'$.CHANNEL_SOURCE') as CHANNEL_SOURCE,
get_json_object(lines,'$.SESSION_ID') as SESSION_ID
FROM json_t
;
+-----------------+-----------------+---------------------------+--+
| app_no | channel_source | session_id |
+-----------------+-----------------+---------------------------+--+
| RVVG5252500393 | 微信 | YLYC99019292814246548503 |
| FZGR9373403233 | 微信 | KYTD89059389151000106477 |
| CQEL4819162613 | APP | LECY94935395143750820224 |
+-----------------+-----------------+---------------------------+--+-- json_tuple
select
APP_NO,
CHANNEL_SOURCE,
SESSION_ID
from json_t
lateral view json_tuple(lines,'APP_NO','CHANNEL_SOURCE','SESSION_ID') b as APP_NO,CHANNEL_SOURCE,SESSION_ID;
+-----------------+-----------------+---------------------------+--+
| app_no | channel_source | session_id |
+-----------------+-----------------+---------------------------+--+
| RVVG5252500393 | 微信 | YLYC99019292814246548503 |
| FZGR9373403233 | 微信 | KYTD89059389151000106477 |
| CQEL4819162613 | APP | LECY94935395143750820224 |
+-----------------+-----------------+---------------------------+--+SELECT
get_json_object(lines,'$.OTHER_INFO.MOBILE_ADDR') as MOBILE_ADDR,
get_json_object(lines,'$.OTHER_INFO.DATA2.CERT_ADDR') as CERT_ADDR,
get_json_object(lines,'$.OTHER_INFO.DATA1.MOBILE_NUM') as MOBILE_NUM_one,
get_json_object(lines,'$.OTHER_INFO.MOBILE_NUM') as MOBILE_NUM_two,
get_json_object(lines,'$.APP_NO') as APP_NO,
get_json_object(lines,'$.CHANNEL_SOURCE') as CHANNEL_SOURCE,
get_json_object(lines,'$.BUSINESS_CODE') as BUSINESS_CODE,
get_json_object(lines,'$.STEP_TYPE') as STEP_TYPE,
get_json_object(lines,'$.SESSION_ID') as SESSION_ID,
get_json_object(lines,'$.APPLY_TIME') as APPLY_TIME,
get_json_object(lines,'$.CUST_INFO.GENDER_CD') as GENDER_CD,
get_json_object(lines,'$.CUST_INFO.NAME_EN') as NAME_EN,
get_json_object(lines,'$.CUST_INFO.MAIL') as MAIL,
get_json_object(lines,'$.CUST_INFO.CERT_TYPE') as CERT_TYPE,
get_json_object(lines,'$.CUST_INFO.CERT_NO') as CERT_NO,
get_json_object(lines,'$.CUST_INFO.IDEN_VALID_DT') as IDEN_VALID_DT,
get_json_object(lines,'$.CUST_INFO.CUST_NAME') as CUST_NAME,
get_json_object(lines,'$.CUST_INFO.AGE') as AGE,
get_json_object(lines,'$.CUST_INFO.MOBILE') as MOBILE,
get_json_object(lines,'$.SCENARIO_INFO.ADDRESS') as ADDRESS,
get_json_object(lines,'$.SCENARIO_INFO.CARD_NUM') as CARD_NUM
FROM json_t
;+---------------------------------+----------------------------------+-----------------+-----------------+-----------------+-----------------+----------------+------------+---------------------------+----------------+------------+------------+-----------------------+------------+---------------------+----------------+------------+------+--------------+------------------------------+-------------------+--+
| mobile_addr | cert_addr | mobile_num_one | mobile_num_two | app_no | channel_source | business_code | step_type | session_id | apply_time | gender_cd | name_en | mail | cert_type | cert_no | iden_valid_dt | cust_name | age | mobile | address | card_num |
+---------------------------------+----------------------------------+-----------------+-----------------+-----------------+-----------------+----------------+------------+---------------------------+----------------+------------+------------+-----------------------+------------+---------------------+----------------+------------+------+--------------+------------------------------+-------------------+--+
| 四川省内江市潘咱陛路1233号闷羞小区19单元484室 | 宁夏回族自治区固原市薄精路3992号裸糕氏小区9单元1092室 | 15360425546 | 15902875723 | RVVG5252500393 | 微信 | TEST01 | 002 | YLYC99019292814246548503 | 1628067497730 | 2 | Edmund | m74e5czt2y@rvhts.pdu | 2 | 446760199707032392 | 99991231 | 独孤拘害 | 5 | 15744158190 | 云南省临沧市猫逐还路1265号兢顶朔小区3单元376室 | 6223887916967795 |
| 青海省海北藏族自治州砰役厩路3499号唇瞪小区9单元491室 | 安徽省淮南市蚂鄙系路2476号时究小区10单元2465室 | 15210663115 | 15290171152 | FZGR9373403233 | 微信 | TEST01 | 002 | KYTD89059389151000106477 | 1628067579234 | 2 | Frederica | y0jogwn7vv@bnryy.4cp | 2 | 423047199210041239 | 99991231 | 尉迟蛀 | 5 | 13857964198 | 山东省济宁市迂佬藻路1489号展镁小区7单元1617室 | 6229127368018050 |
| 浙江省杭州市藻极路1103号缄奉小区5单元1080室 | 甘肃省天水市褥茨路735号废输剑小区14单元490室 | 15065179482 | 13427757308 | CQEL4819162613 | APP | TEST01 | 004 | LECY94935395143750820224 | 1628067745581 | 1 | Ursula | ximjrvukl5@43sw4.xvg | 1 | 141043198504099090 | 99991231 | 高晚 | 1 | 13658608487 | 河北省唐山市圭布懂路4597号骡昏小区8单元544室 | 6224871441029643 |
+---------------------------------+----------------------------------+-----------------+-----------------+-----------------+-----------------+----------------+------------+---------------------------+----------------+------------+------------+-----------------------+------------+---------------------+----------------+------------+------+--------------+------------------------------+-------------------+--+select
get_json_object('{"name":"张三","age":"44"}','$.name') as name,
get_json_object('{"name":"张三","age":"44"}','$.age') as age
;
+-------+------+--+
| name | age |
+-------+------+--+
| 张三 | 44 |
+-------+------+--+-- json格式不正确或不完整的放回null
select
get_json_object('{"name":"张三","age":""}','$.name') as name,
get_json_object('{"name":"张三","age":""}','$.age') as age
;
+-------+------+--+
| name | age |
+-------+------+--+
| 张三 | |
+-------+------+--+select
get_json_object('{"name":}','$.name') as name
;
+-------+--+
| name |
+-------+--+
| NULL |
+-------+--+select
get_json_object('{"name":null}','$.name') as name
;
+-------+--+
| name |
+-------+--+
| NULL |
+-------+--+select
get_json_object('{"name":张三}','$.name') as name
;
+-------+--+
| name |
+-------+--+
| NULL |
+-------+--+select
get_json_object('{"name":zhangsang}','$.name') as name
;select
get_json_object('{"age":4}','$.age') as age
;
+------+--+
| age |
+------+--+
| 4 |
+------+--+-- json不完整的返回null
select
get_json_object('{"name":"张三","age":"44"','$.name') as name,
get_json_object('{"name":"张三","age":"44"}','$.age') as age
;
+-------+------+--+
| name | age |
+-------+------+--+
| NULL | 44 |
+-------+------+--+-- json中有数组
{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}select
get_json_object('{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}','$.code') as code,
get_json_object('{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}','$.msg') as msg,
get_json_object('{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}','$.data') as data
;
+-------+------+-------------------------------------------------------------+--+
| code | msg | data |
+-------+------+-------------------------------------------------------------+--+
| 200 | OK | ["3148120404262382","1443363211304208","4322133941384257"] |
+-------+------+-------------------------------------------------------------+--+--获取json中的数组中第一个数据
select
get_json_object('{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}','$.data[0]') as data
;
+-------------------+--+
| data |
+-------------------+--+
| 3148120404262382 |
+-------------------+--+{"store":{"fruit":[{"weight":8,"type":"apple"}, {"weight":9,"type":"pear"}], "bicycle":{"price":19.95,"color":"red"}}, "email":"amy@only_for_json_udf_test.net", "owner":"amy" }
-- 去到从json中解析出第一个weight字段
select
get_json_object('{"store":{"fruit":[{"weight":8,"type":"apple"}, {"weight":9,"type":"pear"}], "bicycle":{"price":19.95,"color":"red"}}, "email":"amy@only_for_json_udf_test.net", "owner":"amy" }','$.store.fruit[0].weight') as weight
;
+---------+--+
| weight |
+---------+--+
| 8 |
+---------+--+
hive解析json相关推荐
- 学会Hive解析Json数组
在Hive中会有很多数据是用Json格式来存储的,如开发人员对APP上的页面进行埋点时,会将多个字段存放在一个json数组中,因此数据平台调用数据时,要对埋点数据进行解析.接下来就聊聊Hive中是如何 ...
- Hive解析Json数组超全讲解
在Hive中会有很多数据是用Json格式来存储的,如开发人员对APP上的页面进行埋点时,会将多个字段存放在一个json数组中,因此数据平台调用数据时,要对埋点数据进行解析.接下来就聊聊Hive中是如何 ...
- Hive解析json字段,虽然json_tuple写法很优雅,但是效率并不一定高哦
在写Hive SQL时,需要从一个json列中解析出多个key的信息,查阅资料发现到有两种写法,一种是get_json_object,另外一种是json_tuple.两种用法的示例如下所示 get_j ...
- Hive解析Json格式用户日志
解析json格式数据 Json数据 HIVE SQL解析 第一种方式 第二种方式 第三种方式 string类型的数组形式的行转列拆分 Json数据 第一次写博客,mark一下 --20191025 下 ...
- hive - 解析 json
内置函数:get_json_object(json串,解析路径) 解析路径说明: $ :跟对象 . :子对象 [] :数组下标 * :所有 举例: 数据样例: {"movie":& ...
- hive解析json数组
前言 解析如下数组 获取app_name和 app_type 字段 [{"app_name":"Kloningponsel","app_type&qu ...
- hive 解析json字符串
例如字段 str_a {'SID':'JDC-XSZ-HHS001','SFZH':'914102257258165412'} 这种是标准的一个json. select get_json_object ...
- Hive之解析Json数组
目录 Hive自带的json解析函数 1.get_json_object函数 2.json_tuple函数 Hive解析json数组 一.嵌套子查询解析json数组 二.使用 lateral view ...
- hive处理json数据_(转)hive中解析json数组
hive中解析一般的json是很容易的,get_json_object就可以了. 但如果字段是json数组,比如 [{"bssid":"6C:59:40:21:05:C4 ...
最新文章
- 生态伙伴 | 小鹅通企学院入驻飞书,助力企业一键打造自己的企业大学
- cache + db 性能
- 作业综合练习-初始化工作
- SwiftUI之深入解析如何实现3D Scroll效果
- php 前置操作方法,前置操作-THINKPHP 5.0 手册最新版
- 你知道,java项目中是如何获取文件地址的吗?
- 【华为敏捷/DevOps实践】5. 如何避免DevOps变革的六大“焦油坑”
- Java instanceof用法
- Linux系统编程 -- stdin stdout stderr
- 暴力猴简单脚本 - 去除右键菜单、文本选择限制
- 验证码java如何实现_怎样用java实现验证码
- Unity全新的版本发布计划(2018)【转自游戏蛮牛】
- windows功能_【各种功能升级】微信windows版v2.9新体验
- 华硕和兰博基尼将发布iPhone的对手
- 在智能手机上跟踪ADS-B系统的飞机航线信息
- 物联网大数据平台功能有哪些
- Google GSON GsonBuilder().setDateFormat(yyyy-MM-dd HH:mm:ss)不能格式化Data
- vue Tesseract的 ocr 文字识别
- 计算机适应性考试题目,计算机控制考试题目整理
- origin绘图软件2021 2022-origin中文版下载安装-origin功能使用