mysql 数据割接_数据割接笔记

--exprot date from oracle

exp tbcy/123456@TestB1 file=OM_chetian.dmp log=20.log tables=chey indexes=n GRANTS=n CONSTRAINTS=n TRIGGERS=n

--import date to oracle(ignore:已经存在的表忽略)

imp com/123456@pdb1 file=OM_chetian.dmp log=20.log tables=chey indexes=n GRANTS=n CONSTRAINTS=n TRIGGERS=n ignore=y

------------------------------------------------------------------------------------------------------------------

删除表：drop lc_chetest purge;--正真删除表

并发创建索引：

create index inx_che_oid on loc_chetest (oid) tablespace d_log_01 nologging parallel 16;

关闭并发：alter index inx_che_oid noparallel;

并发创建临时表：

create table inf_che_all_his_1010 tablespace d_space nologging parallel 16 as

select b.oid,b.name, b.addr, b.phone,'0' status, sysdate his_date,0 deal_flag, 0 limitflag

from sducy.inf_che_all b

where exists ( select 1 from sducy.user_account c

where c.acctid=b.acctid);

关闭并发：

alter table inf_che_all_his_1010 noparallel;

说明：

新老库表数据割接，把老库的数据割接到新库中。

1.表字段相同：直接exp老表数据，然后imp老表数据到新库中(或者c++直接跨库搬迁数据)

2.表字段不同：

2.1).新核对新老表字段映射关系，数据映射关系表格。

2.2).老库需要多张表映射一张新表数据的，根据表关系，查询多个表关联查询，组合成一张临时新表(添加处理数据状态:flag)

2.3).exp临时新表，然后再imp老表数据到新库

2.4).循环编译新库临时表待处理数据,获取正式表的字段信息插入到正式表中，更临时表处理状态(0:待处理;1:处理成功；5:数据已经存在重复；9:处理失败)

------------------------------------------------------------------------------------------------------------------

样例：

1支付关系表 2.已经割接数据表 3.账号表 4.用户表

要求：1.割接支付关系表到新库中

2.前期有小批量割接数据

3.确保支付关系中的账户是有效的。

4.确认账号是有用户在使用的。(即:有人使用的有效账号，这样的支付关系才需要割接)

other说明：1)由于多表关联，且都是大表，即使开并发也处理不动，将多表关联拆成多次关联，创建临时表，提高效率。

步奏：

1.捞取支付关系表，先过滤已经割接数据

create table 临时表1 tablespace d_space nologging parallel 16 as

select b.oid,b.name, b.addr, b.phone,'0' status, sysdate his_date,0 deal_flag, 0 limitflag

from 支付关系表 b where not exists ( select 1 from 已经割接表 c

where c.payoid=b.payid);

--关闭临时表1并发

2.捞取临时表1，过滤账号id不存在的数据,创建临时表2

create table 临时表2 tablespace d_space nologging parallel 16 as

select b.*

from 临时表1 b where exists ( select 1 from 账号表 c

where c.acctid=b.acctid);

--关闭临时表2并发

3.捞取临时表2，过滤账号不被用户使用的数据，创建临时表3

create table 临时表1 tablespace d_space nologging parallel 16 as

select b.*

from 临时表2 b where exists ( select 1 from 用户表 c

where c.acctid=b.acctid

and active = 0);

--关闭临时表2并发

4.为临时表3创建处理状态标识.

alter table 创建临时表3 add flag varchar2(1);--也可以在创建临时表是，直接添加flag

5.将临时表3导出，在导入到新库中

6.写一个块程序，捞取临时表，入库到线网业务表。

declare

n_commitcnt := 0;--5000

begin

n_commitcnt :=0;

for cur in (select rowid,t.* from 临时表 where flag =0 )--or flag is null

loop

begin

--参数校验处理，如时间校验null,数据唯一性校验等

--当数据重复时，update 临时表 falg=5 where rowid = cur.rowid;

.....

insert 线网表；

更新临时表处理标识

update 临时表 falg=1 where rowid = cur.rowid;

if n_commit >= 500 then

n_commit := 0;

end if;

exception

when others then

update 临时表 falg=9 where rowid = cur.rowid;

end;

end loop;

commit;

end;

说明：1)临时表数据量大，无索引，更新时使用rowid提供运行效率。

7.割接数据处理结果跟踪

select /*+ parallel 10*/flag,count(*) from 临时表 group by flag;

8.当线网表数据无唯一性约束时，可能存在相同数据的重复数据，需要进行去重处理(分两步)

8.1):捞取重复数据：

create table mg_重复数据 nologging tablespace d_user parallel 16 as

select users_id ,0 flag

from user t

where rowid >(select mix(rowid)

from user a

where a.user_id = t.user_id

and t.regon = t.regon);

8.2):删除重复数据

注释：由于数据量较大，采取开通道0-19 ，并行处理提高效率。

declare

n_commitcnt := 0;--5000

v_count number(6);

v_userid varchar(32);

begin

n_commitcnt :=0;

v_count :=0;

v_userid :='';

for cur in (select rowid,t.* from mg_重复数据 where flag =0

and mod(user_id,20) = 0)--开通道0-19 ，并行处理提高效率。

loop

begin

--数据唯一性校验等

select count(*) into v_count from user where user_id = cur.user_id

and inst_id = '88888888';

if v_count>0 then

delete user where user_id = cur.user_id

and inst_id = '88888888';

else

delete user t

where rowid <>(select max(rowid)

from user a

where a.user_id = cur.user_id

and t.regon = cur.regon);

end if;

insert 线网表；

更新临时表处理标识

update mg_重复数据 falg=1 where rowid = cur.rowid;

if n_commit >= 500 then

n_commit := 0;

end if;

exception

when others then

update mg_重复数据 falg=9 where rowid = cur.rowid;

end;

end loop;

commit;

end;

8.3检查结果

select /*+ parallel 10*/flag,count(*) from mg_重复数据 group by flag;

其他：

块逻辑卡死问题查询，根据执行计划，思考优化方法，优化性能sql

--登陆sql用户，查询执行sql，查看一直卡死的sql_id

select a.sql_id,a.SQL_TEXT,t.EVENT,t.*

from v$session t, v$sql a

where t.sql_id = a.sql_id

and t.OSUSER='cn_xiaocai';

--根据sqlID——查询执行计划

select * from table(dbms_xplan.display_cursor('fkduybquzyq',0,'typical'));

mysql 数据割接_数据割接笔记相关推荐

数据图表可视化_数据可视化如何选择正确的图表第1部分
数据图表可视化 According to the World Economic Forum, the world produces 2.5 quintillion bytes of data ever ...
mysql 执行计划改变_数据量增加导致mysql执行计划改变解决_MySQL
bitsCN.com 数据量增加导致mysql执行计划改变解决收到运维同学电话,mysql服务器连接数满了,登录服务器查看,确实满了,好吧,首先增加连接数到2500,暂时提供对外服务.连接继续升高, ...
数据预处理工具_数据预处理
数据预处理工具 As the title states this is the last project from Udacity Nanodegree. The goal of this proje ...
数据可视化工具_数据可视化
数据可视化工具 Visualizations are a great way to show the story that data wants to tell. However, not all v ...
数据分析师入门_数据分析师入门基础指南
数据分析师入门 Back in the summer of 2018, I was just starting my first internship as a Data Analyst. 早在201 ...
大数据平台蓝图_数据科学面试蓝图
大数据平台蓝图 1.组织是关键 (1. Organisation is Key) I've interviewed at Google (and DeepMind), Uber, Facebook, ...
etl数据抽取工具_数据同步工具ETL、ELT傻傻分不清楚？3分钟看懂两者区别
什么是数据同步工具(ETL.ELT) 数据同步工具ETL或者ELT的作用是将业务系统的数据经过抽取.清洗转换之后加载到数据仓库的过程,目的是将企业中的分散.零乱.标准不统一的数据整合到一起,为企业的决 ...
数据透视表和数据交叉表_数据透视表的数据提取
数据透视表和数据交叉表 Consider the data of healthcare drugs as provided in the excel sheet. The concept of piv ...
鲜活数据数据可视化指南_数据可视化实用指南
鲜活数据数据可视化指南 Exploratory data analysis (EDA) is an essential part of the data science or the machine ...

mysql 数据割接_数据割接笔记

mysql 数据割接_数据割接笔记相关推荐

最新文章

热门文章