记录一下今天做的一个SQL tuning, 来自于Jacky Shen的需求。

SQL 代码如下:

INSERT INTO dqrc.dw_lstg_item_trans_w 
( 
 ITEM_ID 
,AUCT_END_DT 
,AUCT_START_DT 
,SALE_SCHED_END_DT 
,AUCT_TYPE_CODE 
,ITEM_SITE_ID 
,ITEM_CNTRY_ID 
,LSTG_CURNCY_ID 
,LSTG_STATUS_ID 
,ICON_FLAGS 
,SLR_ID 
,SLR_SITE_ID 
,SLR_CNTRY_ID 
,HIGH_BDR_ID 
,LEAF_CATEG_ID 
,LEAF_CATEG_ID2 
,AUCT_START_EXCHNG_RATE 
,AUCT_END_EXCHNG_RATE 
,START_PRICE_LSTG_CURNCY 
,RSRV_PRICE_LIST_CRNCY 
,BIN_PRICE_LSTG_CURNCY 
,CURNT_PRICE_LSTG_CURNCY 
,GMS_LSTG_CURNCY 
,GMS_LSTG_CURNCY_EST 
,QTY_SOLD 
,QTY_AVAIL 
,BIDCOUNT 
,WACKO_YN 
,SUCCESS_YN 
,STOR_LSTG_YN_FLAG_ID 
,MR_LISTER_FLAG 
,SLNG_MGR_SBSCRBR_FLAG 
,SLNG_MGR_PRO_SBSCRBR_FLAG 
,SLNG_MGR_PRO_USED_UP_FLAG 
,BIN_SOLD_UP_FLAG 
,GTC_UP_FLAG 
,PAYPAL_ACCEPTED_UP_FLAG 
,CK_FETR_UP_FLAG 
,MRCH_TOOL_UP_FLAG 
,SYI_2_UP_FLAG 
,CHARITY_GIVING_TOOL_UP_FLAG 
,B2B_UP_FLAG 
,SLR_PAYPAL_PREFD_UP_FLAG 
,CATEG_SLNG_GUIDE_USED_UP_FLAG 
,RELIST_UP_FLAG 
,RELIST_CRD_UP_FLAG 
,LISTED_WITH_AUTOPAY_UP_FLAG 
,APPLICATION_ID 
,PRODUCT_ID 
,AUCT_END_DATE 
,PROCESS_FLAG 
,EBX_ELGBL_ITEM_YN_ID 
,OFRD_SLNG_CHNL_ID 
,CRE_USER 
,CRE_DATE 
) 
SELECT 
 item.ID ITEM_ID 
,cast(item.SALE_END as date) AUCT_END_DT 
,cast(item.SALE_START as date) AUCT_START_DT 
,cast(item.SALE_SCHED_END as date) SALE_SCHED_END_DT 
,case when item.SALE_TYPE > 999999999 then -888 else item.SALE_TYPE end AUCT_TYPE_CODE 
,item.SITE_ID ITEM_SITE_ID 
,item.COUNTRY_ID ITEM_CNTRY_ID 
,item.CURRENCY_ID LSTG_CURNCY_ID 
,item.NEW_LSTG_STATUS_ID LSTG_STATUS_ID 
,item.ICON_FLAGS ICON_FLAGS 
,item.SELLER SLR_ID 
,case when abs(coalesce(usr.user_site_id, -999)) <= 9999 
        then (coalesce(usr.user_site_id, -999)) 
        else -888 
end SLR_SITE_ID 
,case when abs(coalesce(usr.user_cntry_id, -999)) <= 9999 
        then (coalesce(usr.user_cntry_id, -999) ) 
        else  -888 
end SLR_CNTRY_ID 
,item.HIGH_BIDDER HIGH_BDR_ID 
,case when item.CATEGORY  > 999999999 or item.CATEGORY < -999999999 then -888 else item.CATEGORY  end LEAF_CATEG_ID 
,case when item.CATEGORY2 > 999999999 or item.CATEGORY2 < -999999999 then -888 else item.CATEGORY2 end LEAF_CATEG_ID2 
,coalesce(start_exchng.EXCHNG_RATE ,1.00000000000000 ) AUCT_START_EXCHNG_RATE 
,coalesce(end_exchng.EXCHNG_RATE   ,1.00000000000000 ) AUCT_END_EXCHNG_RATE 
,item.START_PRICE START_PRICE_LSTG_CURNCY 
,item.RESERVE_PRICE RSRV_PRICE_LIST_CRNCY 
,item.BIN_PRICE BIN_PRICE_LSTG_CURNCY 
,item.CURRENT_PRICE CURNT_PRICE_LSTG_CURNCY 
,Case 
   When SALE_TYPE in (2,7,9,12,13,14,15) then DUTCH_GMS 
   When SALE_TYPE in (1,8,5 ) then 
        (Case When SUCCESS_YN='Y' then CURRENT_PRICE else 0.00 end ) 
   Else (Case When SUCCESS_YN='Y' then DUTCH_GMS     else 0.00 end ) 
   End GMS_LSTG_CURNCY 
,GMS_LSTG_CURNCY GMS_LSTG_CURNCY_EST 
,case when item.QUANTITY_SOLD > 2147483647 then 0 else item.QUANTITY_SOLD end QTY_SOLD 
,case when item.QUANTITY      > 2147483647 then 1 else item.QUANTITY      end QTY_AVAIL 
,case when item.BIDCOUNT      > 2147483647 then 0 else item.BIDCOUNT      end BIDCOUNT 
,item.WACKO_YN 
,Case 
   When SALE_TYPE IN ( 2,7,9,12,13,14,15 )  THEN 
                       ( CASE   WHEN    (QUANTITY_SOLD > 0 AND DUTCH_GMS > 0) THEN  'Y' ELSE 'N' END    ) 
   ELSE 
               
 ( CASE WHEN ( CURRENT_PRICE > 0 AND CURRENT_PRICE >= RESERVE_PRICE AND BIDCOUNT >0 AND HIGH_BIDDER 
 <> -1 AND QUANTITY_SOLD > 0) THEN 'Y' ELSE 'N' END ) 
  End SUCCESS_YN 
,STOR_LSTG_YN_FLAG_ID 
,0 MR_LISTER_FLAG 
,0 SLNG_MGR_SBSCRBR_FLAG 
,0 SLNG_MGR_PRO_SBSCRBR_FLAG 
,item.SLNG_MGR_PRO_USED_UP_FLAG 
,item.BIN_SOLD_UP_FLAG 
,item.GTC_UP_FLAG 
,item.PAYPAL_ACCEPTED_UP_FLAG 
,item.CK_FETR_UP_FLAG 
,item.MRCH_TOOL_UP_FLAG 
,item.SYI_2_UP_FLAG 
,item.CHARITY_GIVING_TOOL_UP_FLAG 
,item.B2B_UP_FLAG 
,item.SLR_PAYPAL_PREFD_UP_FLAG 
,item.CATEG_SLNG_GUIDE_USED_UP_FLAG 
,item.RELIST_UP_FLAG 
,item.RELIST_CRD_UP_FLAG 
,item.LISTED_WITH_AUTOPAY_UP_FLAG 
,item.APPLICATION_ID 
,item.PRODUCT_ID 
,item.SALE_END 
,item.PROCESS_FLAG 
,item.EBX_ELGBL_ITEM_YN_ID 
,CASE WHEN item.SALE_TYPE IN (1,2,3,4,5,6,7,8,9,10,13) and  item.EBX_ELGBL_ITEM_YN_ID <> 1 and item.HALF_ON_EBAY_ID <> 1  
                                                       and  item.IS_ARTISAN_ID <> 1  THEN 1                                                                                             
                   WHEN item.SALE_TYPE= 14  THEN 2                                                                                                      
                   WHEN item.SALE_TYPE= 15 THEN 3                                                                                                       
                   WHEN item.SALE_TYPE= 12 and item.HALF_ON_EBAY_ID <>1  and item.EBX_ELGBL_ITEM_YN_ID <> 1 and item.IS_ARTISAN_ID <> 1 THEN 4                                                       
                                                
                   WHEN item.SALE_TYPE= 12 and item.EBX_ELGBL_ITEM_YN_ID = 1 and AUCT_START_DT < '2008-09-20'  
                                           and item.HALF_ON_EBAY_ID <> 1 and item.IS_ARTISAN_ID <> 1 THEN 6                                                                                          
                
                   WHEN item.SALE_TYPE= 12 and item.EBX_ELGBL_ITEM_YN_ID = 1 
                                         and AUCT_START_DT >= '2008-09-20' and  item.HALF_ON_EBAY_ID <> 1  and item.IS_ARTISAN_ID <> 1 THEN 4                                                        
                                                                                                        
                   WHEN item.SALE_TYPE= 12 and item.HALF_ON_EBAY_ID = 1 and item.EBX_ELGBL_ITEM_YN_ID <> 1 and item.IS_ARTISAN_ID <> 1 THEN 7                                                        
                                                                                                        
                -- WHEN item.SALE_TYPE= 12 and item.HALF_ON_EBAY_ID = 1 and item.EBX_ELGBL_ITEM_YN_ID = 1 and item.IS_ARTISAN_ID <> 1 and AUCT_START_DT >= '2008-09-20' THEN 7 
             WHEN item.SALE_TYPE= 12 and item.HALF_ON_EBAY_ID = 1 and item.EBX_ELGBL_ITEM_YN_ID = 1 and item.IS_ARTISAN_ID <> 1 THEN 7 
             WHEN item.SALE_TYPE IN (1,2,7,9) and item.EBX_ELGBL_ITEM_YN_ID = 1 
                                                    and AUCT_START_DT < '2008-09-20'   and  item.IS_ARTISAN_ID  <> 1  THEN 5                                                                         
                                                                                        
                   WHEN item.SALE_TYPE IN (1,2,7,9) and item.EBX_ELGBL_ITEM_YN_ID = 1 and AUCT_START_DT >= '2008-09-20'  
                                                    and item.IS_ARTISAN_ID <> 1  THEN 1                                                                                                              
                                        
                   WHEN item.SALE_TYPE IN (1,2,7,9) and item.EBX_ELGBL_ITEM_YN_ID = 1 and AUCT_START_DT < '2008-09-20'  
                                                    and item.IS_ARTISAN_ID = 1  THEN 9                                                                                                               
                                                
                   WHEN item.SALE_TYPE IN (1,2,7,9) and item.EBX_ELGBL_ITEM_YN_ID = 1 and AUCT_START_DT >= '2008-09-20' 
                                                    and item.IS_ARTISAN_ID = 1   THEN 8                                                                                                              
                                                
                   WHEN item.IS_ARTISAN_ID = 1 THEN 8                                                                                                                                                
                        
                          ELSE -99  END OFRD_SLNG_CHNL_ID                                                                                                                               
,'dw_batch' CRE_USER 
,current_date CRE_DATE
from 
 dqrc.dw_lstg_item_w_vw  item 
 left join dqrc.dw_lstg_item_trans_w1 usr 
 on item.id=usr.id 
 left join batch_views.dw_daily_exchange_rates start_exchng 
 on item.CURRENCY_ID = start_exchng.curncy_id 
 and item.EXCHNG_START_DT=start_exchng.day_of_trans_dt 
 left join batch_views.dw_daily_exchange_rates end_exchng 
 on item.CURRENCY_ID = end_exchng.curncy_id 
 and item.EXCHNG_END_DT=end_exchng.day_of_trans_dt 
;

INSERT Failed. 3156: Request aborted by TDWM. Exception criteria exceeded: AMP CPU Skew.

系统提示AMP CPU Skew,被强制取消执行。

拿到这个query首先检查了里面用到的几张表在PI和Join Condition上是否Skew,统计信息是否最新。

发现dqrc.dw_lstg_item_w_vw这个view 下面的table没有统计信息,在PI上分布合理并不Skew,但是在Currency_ID上的分布非常Skew。

CURRENCY_ID Count(*)
1 13,468,529
7 12,568,689
3 7,064,171
5 2,870,200
2 199,217
44 102,678
35 59,390
84 21,846
34 21,777
39 17,593
13 17,278
46 9,811
22 3,361

于是首先尝试Collect stats dqrc.DW_ITEMS_W_20141214 column id; 完成后执行计划并没有变化。

 5) We execute the following steps in parallel.
   1) We do an all-AMPs RETRIEVE step from
      GDW_TABLES.DW_DAILY_EXCHANGE_RATES in view
      batch_views.dw_daily_exchange_rates by way of an all-rows
      scan with no residual conditions into Spool 2 (all_amps),
      which is redistributed by the hash code of (
      GDW_TABLES.DW_DAILY_EXCHANGE_RATES.CURNCY_ID,
      GDW_TABLES.DW_DAILY_EXCHANGE_RATES.DAY_OF_TRANS_DT) to all
      AMPs.  The size of Spool 2 is estimated with high confidence
      to be 310,092 rows (10,853,220 bytes).  The estimated time
      for this step is 0.02 seconds.
   2) We do an all-AMPs JOIN step from dqrc.DW_ITEMS_W_20141214 in
      view dw_lstg_item_w_vw by way of a RowHash match scan with no
      residual conditions, which is joined to dqrc.usr by way of a
      RowHash match scan with no residual conditions.
      dqrc.DW_ITEMS_W_20141214 and dqrc.usr are left outer joined
      using a merge join, with a join condition of (
      "dqrc.DW_ITEMS_W_20141214.ID = dqrc.usr.ID").  The result
      goes into Spool 3 (all_amps), which is redistributed by the
      hash code of (dqrc.DW_ITEMS_W_20141214.CURRENCY_ID,
      dqrc.DW_ITEMS_W_20141214.EXCHNG_START_DT) to all AMPs.  The
      size of Spool 3 is estimated with low confidence to be
      36,424,540 rows (9,834,625,800 bytes).  The estimated time
      for this step is 1.43 seconds.
 6) We do an all-AMPs JOIN step from Spool 2 by way of an all-rows
 scan, which is joined to Spool 3 (Last Use) by way of an all-rows
 scan.  Spool 2 and Spool 3 are right outer joined using a single
 partition hash join, with a join condition of ("(EXCHNG_START_DT =
 DAY_OF_TRANS_DT) AND (CURRENCY_ID = CURNCY_ID)").  The result goes
 into Spool 5 (all_amps), which is redistributed by the hash code
 of (dqrc.DW_ITEMS_W_20141214.EXCHNG_END_DT,
 dqrc.DW_ITEMS_W_20141214.CURRENCY_ID) to all AMPs.  The size of
 Spool 5 is estimated with no confidence to be 66,791,132 rows (
 18,300,770,168 bytes).  The estimated time for this step is 1.58
 seconds.

请教了有TD DBA经验的Gus同学,建议可以把redistribute后会skew的1,3,5,7这几个值作单独处理。

然而直接在where条件中加入currency_id in (1,3,5,7)并不奏效,可能系统无法正确读取统计信息。

于是进一步测试,将1,3,5,7放入Volatile table后再collect stats

create volatile table test_20141217 as
( select * from dqrc.dw_lstg_item_w_vw
where currency_id in (1,3,5,7)
) with data primary index (id)
on commit preserve rows;

collect stats test_20141217 column id;
collect stats test_20141217 column currency_id;
collect stats test_20141217 column (currency_id,EXCHNG_START_DT) ;
collect stats test_20141217 column (currency_id,EXCHNG_END_DT) ;

test_20141217代入原SQL中的dqrc.dw_lstg_item_w_vw处,此时查看Explan plan,发生了明显变化:

4) We execute the following steps in parallel.
   1) We do an all-AMPs JOIN step from LUCAO.item by way of a
      RowHash match scan with no residual conditions, which is
      joined to dqrc.usr by way of a RowHash match scan with no
      residual conditions.  LUCAO.item and dqrc.usr are left outer
      joined using a merge join, with a join condition of (
      "LUCAO.item.ID = dqrc.usr.ID").  The result goes into Spool 2
      (all_amps) fanned out into 5 hash join partitions, which is
      built locally on the AMPs.  The size of Spool 2 is estimated
      with low confidence to be 35,971,590 rows (7,985,692,980
      bytes).  The estimated time for this step is 0.37 seconds.
   2) We do an all-AMPs RETRIEVE step from
      GDW_TABLES.DW_DAILY_EXCHANGE_RATES in view
      batch_views.dw_daily_exchange_rates by way of an all-rows
      scan with no residual conditions into Spool 3 (all_amps)
      fanned out into 5 hash join partitions, which is duplicated
      on all AMPs.  The size of Spool 3 is estimated with high
      confidence to be 1,162,845,000 rows (31,396,815,000 bytes).
      The estimated time for this step is 9.80 seconds.
 5) We do an all-AMPs JOIN step from Spool 2 (Last Use) by way of an
 all-rows scan, which is joined to Spool 3 (Last Use) by way of an
 all-rows scan.  Spool 2 and Spool 3 are left outer joined using a
 hash join of 5 partitions, with a join condition of (
 "(EXCHNG_START_DT = DAY_OF_TRANS_DT) AND (CURRENCY_ID = CURNCY_ID)").
 The result goes into Spool 4 (all_amps) fanned out into 6 hash
 join partitions, which is built locally on the AMPs.  The size of
 Spool 4 is estimated with low confidence to be 49,325,868 rows (
 11,147,646,168 bytes).  The estimated time for this step is 0.41
 seconds.
 6) We do an all-AMPs RETRIEVE step from
 GDW_TABLES.DW_DAILY_EXCHANGE_RATES in view
 batch_views.dw_daily_exchange_rates by way of an all-rows scan
 with no residual conditions into Spool 5 (all_amps) fanned out
 into 6 hash join partitions, which is duplicated on all AMPs.  The
 size of Spool 5 is estimated with high confidence to be
 1,162,845,000 rows (31,396,815,000 bytes).  The estimated time for
 this step is 9.80 seconds.

我们可以看到TD已经把小表join的结果Spool duplicate到all amps而不是像原来那样把驱动表redistribute.再次试跑SQL,成功输出结果!

经验总结:

      1. 尝试给Join columns做collect stats
      2. 如果一定要redistribute的话,可以把大量skew的数据作单独处理


Published

16 December 2014

Category

SQL

Tags