SQL Tuning Case Study -- AMP CPU Skew
记录一下今天做的一个SQL tuning, 来自于Jacky Shen的需求。
SQL 代码如下:
INSERT INTO dqrc.dw_lstg_item_trans_w
(
ITEM_ID
,AUCT_END_DT
,AUCT_START_DT
,SALE_SCHED_END_DT
,AUCT_TYPE_CODE
,ITEM_SITE_ID
,ITEM_CNTRY_ID
,LSTG_CURNCY_ID
,LSTG_STATUS_ID
,ICON_FLAGS
,SLR_ID
,SLR_SITE_ID
,SLR_CNTRY_ID
,HIGH_BDR_ID
,LEAF_CATEG_ID
,LEAF_CATEG_ID2
,AUCT_START_EXCHNG_RATE
,AUCT_END_EXCHNG_RATE
,START_PRICE_LSTG_CURNCY
,RSRV_PRICE_LIST_CRNCY
,BIN_PRICE_LSTG_CURNCY
,CURNT_PRICE_LSTG_CURNCY
,GMS_LSTG_CURNCY
,GMS_LSTG_CURNCY_EST
,QTY_SOLD
,QTY_AVAIL
,BIDCOUNT
,WACKO_YN
,SUCCESS_YN
,STOR_LSTG_YN_FLAG_ID
,MR_LISTER_FLAG
,SLNG_MGR_SBSCRBR_FLAG
,SLNG_MGR_PRO_SBSCRBR_FLAG
,SLNG_MGR_PRO_USED_UP_FLAG
,BIN_SOLD_UP_FLAG
,GTC_UP_FLAG
,PAYPAL_ACCEPTED_UP_FLAG
,CK_FETR_UP_FLAG
,MRCH_TOOL_UP_FLAG
,SYI_2_UP_FLAG
,CHARITY_GIVING_TOOL_UP_FLAG
,B2B_UP_FLAG
,SLR_PAYPAL_PREFD_UP_FLAG
,CATEG_SLNG_GUIDE_USED_UP_FLAG
,RELIST_UP_FLAG
,RELIST_CRD_UP_FLAG
,LISTED_WITH_AUTOPAY_UP_FLAG
,APPLICATION_ID
,PRODUCT_ID
,AUCT_END_DATE
,PROCESS_FLAG
,EBX_ELGBL_ITEM_YN_ID
,OFRD_SLNG_CHNL_ID
,CRE_USER
,CRE_DATE
)
SELECT
item.ID ITEM_ID
,cast(item.SALE_END as date) AUCT_END_DT
,cast(item.SALE_START as date) AUCT_START_DT
,cast(item.SALE_SCHED_END as date) SALE_SCHED_END_DT
,case when item.SALE_TYPE > 999999999 then -888 else item.SALE_TYPE end AUCT_TYPE_CODE
,item.SITE_ID ITEM_SITE_ID
,item.COUNTRY_ID ITEM_CNTRY_ID
,item.CURRENCY_ID LSTG_CURNCY_ID
,item.NEW_LSTG_STATUS_ID LSTG_STATUS_ID
,item.ICON_FLAGS ICON_FLAGS
,item.SELLER SLR_ID
,case when abs(coalesce(usr.user_site_id, -999)) <= 9999
then (coalesce(usr.user_site_id, -999))
else -888
end SLR_SITE_ID
,case when abs(coalesce(usr.user_cntry_id, -999)) <= 9999
then (coalesce(usr.user_cntry_id, -999) )
else -888
end SLR_CNTRY_ID
,item.HIGH_BIDDER HIGH_BDR_ID
,case when item.CATEGORY > 999999999 or item.CATEGORY < -999999999 then -888 else item.CATEGORY end LEAF_CATEG_ID
,case when item.CATEGORY2 > 999999999 or item.CATEGORY2 < -999999999 then -888 else item.CATEGORY2 end LEAF_CATEG_ID2
,coalesce(start_exchng.EXCHNG_RATE ,1.00000000000000 ) AUCT_START_EXCHNG_RATE
,coalesce(end_exchng.EXCHNG_RATE ,1.00000000000000 ) AUCT_END_EXCHNG_RATE
,item.START_PRICE START_PRICE_LSTG_CURNCY
,item.RESERVE_PRICE RSRV_PRICE_LIST_CRNCY
,item.BIN_PRICE BIN_PRICE_LSTG_CURNCY
,item.CURRENT_PRICE CURNT_PRICE_LSTG_CURNCY
,Case
When SALE_TYPE in (2,7,9,12,13,14,15) then DUTCH_GMS
When SALE_TYPE in (1,8,5 ) then
(Case When SUCCESS_YN='Y' then CURRENT_PRICE else 0.00 end )
Else (Case When SUCCESS_YN='Y' then DUTCH_GMS else 0.00 end )
End GMS_LSTG_CURNCY
,GMS_LSTG_CURNCY GMS_LSTG_CURNCY_EST
,case when item.QUANTITY_SOLD > 2147483647 then 0 else item.QUANTITY_SOLD end QTY_SOLD
,case when item.QUANTITY > 2147483647 then 1 else item.QUANTITY end QTY_AVAIL
,case when item.BIDCOUNT > 2147483647 then 0 else item.BIDCOUNT end BIDCOUNT
,item.WACKO_YN
,Case
When SALE_TYPE IN ( 2,7,9,12,13,14,15 ) THEN
( CASE WHEN (QUANTITY_SOLD > 0 AND DUTCH_GMS > 0) THEN 'Y' ELSE 'N' END )
ELSE
( CASE WHEN ( CURRENT_PRICE > 0 AND CURRENT_PRICE >= RESERVE_PRICE AND BIDCOUNT >0 AND HIGH_BIDDER
<> -1 AND QUANTITY_SOLD > 0) THEN 'Y' ELSE 'N' END )
End SUCCESS_YN
,STOR_LSTG_YN_FLAG_ID
,0 MR_LISTER_FLAG
,0 SLNG_MGR_SBSCRBR_FLAG
,0 SLNG_MGR_PRO_SBSCRBR_FLAG
,item.SLNG_MGR_PRO_USED_UP_FLAG
,item.BIN_SOLD_UP_FLAG
,item.GTC_UP_FLAG
,item.PAYPAL_ACCEPTED_UP_FLAG
,item.CK_FETR_UP_FLAG
,item.MRCH_TOOL_UP_FLAG
,item.SYI_2_UP_FLAG
,item.CHARITY_GIVING_TOOL_UP_FLAG
,item.B2B_UP_FLAG
,item.SLR_PAYPAL_PREFD_UP_FLAG
,item.CATEG_SLNG_GUIDE_USED_UP_FLAG
,item.RELIST_UP_FLAG
,item.RELIST_CRD_UP_FLAG
,item.LISTED_WITH_AUTOPAY_UP_FLAG
,item.APPLICATION_ID
,item.PRODUCT_ID
,item.SALE_END
,item.PROCESS_FLAG
,item.EBX_ELGBL_ITEM_YN_ID
,CASE WHEN item.SALE_TYPE IN (1,2,3,4,5,6,7,8,9,10,13) and item.EBX_ELGBL_ITEM_YN_ID <> 1 and item.HALF_ON_EBAY_ID <> 1
and item.IS_ARTISAN_ID <> 1 THEN 1
WHEN item.SALE_TYPE= 14 THEN 2
WHEN item.SALE_TYPE= 15 THEN 3
WHEN item.SALE_TYPE= 12 and item.HALF_ON_EBAY_ID <>1 and item.EBX_ELGBL_ITEM_YN_ID <> 1 and item.IS_ARTISAN_ID <> 1 THEN 4
WHEN item.SALE_TYPE= 12 and item.EBX_ELGBL_ITEM_YN_ID = 1 and AUCT_START_DT < '2008-09-20'
and item.HALF_ON_EBAY_ID <> 1 and item.IS_ARTISAN_ID <> 1 THEN 6
WHEN item.SALE_TYPE= 12 and item.EBX_ELGBL_ITEM_YN_ID = 1
and AUCT_START_DT >= '2008-09-20' and item.HALF_ON_EBAY_ID <> 1 and item.IS_ARTISAN_ID <> 1 THEN 4
WHEN item.SALE_TYPE= 12 and item.HALF_ON_EBAY_ID = 1 and item.EBX_ELGBL_ITEM_YN_ID <> 1 and item.IS_ARTISAN_ID <> 1 THEN 7
-- WHEN item.SALE_TYPE= 12 and item.HALF_ON_EBAY_ID = 1 and item.EBX_ELGBL_ITEM_YN_ID = 1 and item.IS_ARTISAN_ID <> 1 and AUCT_START_DT >= '2008-09-20' THEN 7
WHEN item.SALE_TYPE= 12 and item.HALF_ON_EBAY_ID = 1 and item.EBX_ELGBL_ITEM_YN_ID = 1 and item.IS_ARTISAN_ID <> 1 THEN 7
WHEN item.SALE_TYPE IN (1,2,7,9) and item.EBX_ELGBL_ITEM_YN_ID = 1
and AUCT_START_DT < '2008-09-20' and item.IS_ARTISAN_ID <> 1 THEN 5
WHEN item.SALE_TYPE IN (1,2,7,9) and item.EBX_ELGBL_ITEM_YN_ID = 1 and AUCT_START_DT >= '2008-09-20'
and item.IS_ARTISAN_ID <> 1 THEN 1
WHEN item.SALE_TYPE IN (1,2,7,9) and item.EBX_ELGBL_ITEM_YN_ID = 1 and AUCT_START_DT < '2008-09-20'
and item.IS_ARTISAN_ID = 1 THEN 9
WHEN item.SALE_TYPE IN (1,2,7,9) and item.EBX_ELGBL_ITEM_YN_ID = 1 and AUCT_START_DT >= '2008-09-20'
and item.IS_ARTISAN_ID = 1 THEN 8
WHEN item.IS_ARTISAN_ID = 1 THEN 8
ELSE -99 END OFRD_SLNG_CHNL_ID
,'dw_batch' CRE_USER
,current_date CRE_DATE
from
dqrc.dw_lstg_item_w_vw item
left join dqrc.dw_lstg_item_trans_w1 usr
on item.id=usr.id
left join batch_views.dw_daily_exchange_rates start_exchng
on item.CURRENCY_ID = start_exchng.curncy_id
and item.EXCHNG_START_DT=start_exchng.day_of_trans_dt
left join batch_views.dw_daily_exchange_rates end_exchng
on item.CURRENCY_ID = end_exchng.curncy_id
and item.EXCHNG_END_DT=end_exchng.day_of_trans_dt
;
INSERT Failed. 3156: Request aborted by TDWM. Exception criteria exceeded: AMP CPU Skew.
系统提示AMP CPU Skew,被强制取消执行。
拿到这个query首先检查了里面用到的几张表在PI和Join Condition上是否Skew,统计信息是否最新。
发现dqrc.dw_lstg_item_w_vw这个view 下面的table没有统计信息,在PI上分布合理并不Skew,但是在Currency_ID上的分布非常Skew。
CURRENCY_ID | Count(*) |
1 | 13,468,529 |
7 | 12,568,689 |
3 | 7,064,171 |
5 | 2,870,200 |
2 | 199,217 |
44 | 102,678 |
35 | 59,390 |
84 | 21,846 |
34 | 21,777 |
39 | 17,593 |
13 | 17,278 |
46 | 9,811 |
22 | 3,361 |
于是首先尝试Collect stats dqrc.DW_ITEMS_W_20141214 column id;
完成后执行计划并没有变化。
5) We execute the following steps in parallel.
1) We do an all-AMPs RETRIEVE step from
GDW_TABLES.DW_DAILY_EXCHANGE_RATES in view
batch_views.dw_daily_exchange_rates by way of an all-rows
scan with no residual conditions into Spool 2 (all_amps),
which is redistributed by the hash code of (
GDW_TABLES.DW_DAILY_EXCHANGE_RATES.CURNCY_ID,
GDW_TABLES.DW_DAILY_EXCHANGE_RATES.DAY_OF_TRANS_DT) to all
AMPs. The size of Spool 2 is estimated with high confidence
to be 310,092 rows (10,853,220 bytes). The estimated time
for this step is 0.02 seconds.
2) We do an all-AMPs JOIN step from dqrc.DW_ITEMS_W_20141214 in
view dw_lstg_item_w_vw by way of a RowHash match scan with no
residual conditions, which is joined to dqrc.usr by way of a
RowHash match scan with no residual conditions.
dqrc.DW_ITEMS_W_20141214 and dqrc.usr are left outer joined
using a merge join, with a join condition of (
"dqrc.DW_ITEMS_W_20141214.ID = dqrc.usr.ID"). The result
goes into Spool 3 (all_amps), which is redistributed by the
hash code of (dqrc.DW_ITEMS_W_20141214.CURRENCY_ID,
dqrc.DW_ITEMS_W_20141214.EXCHNG_START_DT) to all AMPs. The
size of Spool 3 is estimated with low confidence to be
36,424,540 rows (9,834,625,800 bytes). The estimated time
for this step is 1.43 seconds.
6) We do an all-AMPs JOIN step from Spool 2 by way of an all-rows
scan, which is joined to Spool 3 (Last Use) by way of an all-rows
scan. Spool 2 and Spool 3 are right outer joined using a single
partition hash join, with a join condition of ("(EXCHNG_START_DT =
DAY_OF_TRANS_DT) AND (CURRENCY_ID = CURNCY_ID)"). The result goes
into Spool 5 (all_amps), which is redistributed by the hash code
of (dqrc.DW_ITEMS_W_20141214.EXCHNG_END_DT,
dqrc.DW_ITEMS_W_20141214.CURRENCY_ID) to all AMPs. The size of
Spool 5 is estimated with no confidence to be 66,791,132 rows (
18,300,770,168 bytes). The estimated time for this step is 1.58
seconds.
请教了有TD DBA经验的Gus同学,建议可以把redistribute后会skew的1,3,5,7这几个值作单独处理。
然而直接在where条件中加入currency_id in (1,3,5,7)并不奏效,可能系统无法正确读取统计信息。
于是进一步测试,将1,3,5,7放入Volatile table后再collect stats
create volatile table test_20141217 as
( select * from dqrc.dw_lstg_item_w_vw
where currency_id in (1,3,5,7)
) with data primary index (id)
on commit preserve rows;
collect stats test_20141217 column id;
collect stats test_20141217 column currency_id;
collect stats test_20141217 column (currency_id,EXCHNG_START_DT) ;
collect stats test_20141217 column (currency_id,EXCHNG_END_DT) ;
把test_20141217
代入原SQL中的dqrc.dw_lstg_item_w_vw处,此时查看Explan plan,发生了明显变化:
4) We execute the following steps in parallel.
1) We do an all-AMPs JOIN step from LUCAO.item by way of a
RowHash match scan with no residual conditions, which is
joined to dqrc.usr by way of a RowHash match scan with no
residual conditions. LUCAO.item and dqrc.usr are left outer
joined using a merge join, with a join condition of (
"LUCAO.item.ID = dqrc.usr.ID"). The result goes into Spool 2
(all_amps) fanned out into 5 hash join partitions, which is
built locally on the AMPs. The size of Spool 2 is estimated
with low confidence to be 35,971,590 rows (7,985,692,980
bytes). The estimated time for this step is 0.37 seconds.
2) We do an all-AMPs RETRIEVE step from
GDW_TABLES.DW_DAILY_EXCHANGE_RATES in view
batch_views.dw_daily_exchange_rates by way of an all-rows
scan with no residual conditions into Spool 3 (all_amps)
fanned out into 5 hash join partitions, which is duplicated
on all AMPs. The size of Spool 3 is estimated with high
confidence to be 1,162,845,000 rows (31,396,815,000 bytes).
The estimated time for this step is 9.80 seconds.
5) We do an all-AMPs JOIN step from Spool 2 (Last Use) by way of an
all-rows scan, which is joined to Spool 3 (Last Use) by way of an
all-rows scan. Spool 2 and Spool 3 are left outer joined using a
hash join of 5 partitions, with a join condition of (
"(EXCHNG_START_DT = DAY_OF_TRANS_DT) AND (CURRENCY_ID = CURNCY_ID)").
The result goes into Spool 4 (all_amps) fanned out into 6 hash
join partitions, which is built locally on the AMPs. The size of
Spool 4 is estimated with low confidence to be 49,325,868 rows (
11,147,646,168 bytes). The estimated time for this step is 0.41
seconds.
6) We do an all-AMPs RETRIEVE step from
GDW_TABLES.DW_DAILY_EXCHANGE_RATES in view
batch_views.dw_daily_exchange_rates by way of an all-rows scan
with no residual conditions into Spool 5 (all_amps) fanned out
into 6 hash join partitions, which is duplicated on all AMPs. The
size of Spool 5 is estimated with high confidence to be
1,162,845,000 rows (31,396,815,000 bytes). The estimated time for
this step is 9.80 seconds.
我们可以看到TD已经把小表join的结果Spool duplicate到all amps而不是像原来那样把驱动表redistribute.再次试跑SQL,成功输出结果!
经验总结:
1. 尝试给Join columns做collect stats
2. 如果一定要redistribute的话,可以把大量skew的数据作单独处理