lundi 22 décembre 2014

Other findings, ASH & DB_TIME



1] ASH - Investigate issues in the Past

1.1 [10G] Find blocking session, ex: SID 1610

Select sample_time , session_id , sql_id , session_state , blocking_session , event , wait_class , program , time_waited
from dba_hist_active_sess_history
where --     session_state= 'WAITING'  and
             SAMPLE_TIME  between to_date('09/05/2012 13:00:00','DD/MM/YYYY HH24:MI:SS') and to_date('09/05/2012 13:30:00','DD/MM/YYYY HH24:MI:SS')
order by sample_time ;

Select sample_time , session_id , username, sql_id , sql_plan_hash_value, session_state , blocking_session , event , wait_class , program , machine ,
'SELECT * FROM TABLE (DBMS_XPLAN.display_awr ('''||sql_id||''','''||sql_plan_hash_value||''', NULL,''ADVANCED''))' "ASH"
from dba_hist_active_sess_history a , dba_users b
where --     session_state= 'WAITING'  and
             SAMPLE_TIME  between to_date('07/01/2014 20:00:00','DD/MM/YYYY HH24:MI:SS') and to_date('08/01/2014 05:00:00','DD/MM/YYYY HH24:MI:SS')
and a.user_id = b.user_id
and session_id = 501
--and machine like '%TPV%'
order by sample_time ;


SAMPLE_TIME
SESSION_ID
USERNAME
SQL_ID
SQL_PLAN_HASH_VALUE
SESSION_STATE
BLOCKING_SESSION
3/2/14 3:48 PM
1569
USER
cxypu5167nmrx
3372690345
WAITING
1095

EVENT
WAIT_CLASS
PROGRAM
MACHINE
enq: TM - contention
Application
SQL Developer
PC-09003043-ID

Find more information about the blocking session with blocked session sid as argument :

Select sample_time , session_id , username, sql_id , sql_plan_hash_value, session_state , blocking_session , event , wait_class , program , machine ,
'SELECT * FROM TABLE (DBMS_XPLAN.display_awr ('''||sql_id||''','''||sql_plan_hash_value||''', NULL,''ADVANCED''))' "ASH"
from dba_hist_active_sess_history a , dba_users b
where SAMPLE_TIME  between to_date('19/08/2014 08:00:00','DD/MM/YYYY HH24:MI:SS') and to_date('19/08/2014 23:00:00','DD/MM/YYYY HH24:MI:SS')
and session_id = (
select blocking_session from dba_hist_active_sess_history a
where SAMPLE_TIME  between to_date('19/08/2014 18:00:00','DD/MM/YYYY HH24:MI:SS') and to_date('19/08/2014 23:00:00','DD/MM/YYYY HH24:MI:SS')
and session_id = 1172 -- blocked
and blocking_session is not null
and rownum < 2
)
and session_serial# = (
select blocking_session_serial# from dba_hist_active_sess_history a
where SAMPLE_TIME  between to_date('19/08/2014 18:00:00','DD/MM/YYYY HH24:MI:SS') and to_date('19/08/2014 23:00:00','DD/MM/YYYY HH24:MI:SS')
and session_id = 1172 -- blocked
and blocking_session is not null
and rownum < 2
)
and a.user_id = b.user_id
order by sample_time ;

How the lock spreads to block other sessions

select MI , count(SI) from (
select distinct trunc(sample_time,'MI') MI , session_id SI
from dba_hist_active_sess_history
where blocking_session is not null and event like 'enq%'  and
             SAMPLE_TIME  between to_date('28/05/2015 08:00:00','DD/MM/YYYY HH24:MI:SS') and to_date('28/05/2015 10:30:00','DD/MM/YYYY HH24:MI:SS')
)
group by MI
order by 1 ;

MI
Number of Blocked Sessions
28/05/2015 08:49
3
28/05/2015 08:50
5
28/05/2015 08:54
6
28/05/2015 09:09
7
28/05/2015 09:23
8
28/05/2015 09:33
9
28/05/2015 09:44
10
28/05/2015 09:45
7

[11G] which sql_id is responsible for the TEMP overfilling

Select sample_time , session_id , username, sql_id , sql_plan_hash_value, session_state , blocking_session , event , wait_class , program , machine , temp_space_allocated/1024/1024 TEMP_MB,
'SELECT * FROM TABLE (DBMS_XPLAN.display_awr ('''||sql_id||''','''||sql_plan_hash_value||''', NULL,''ADVANCED''))' "ASH"
from dba_hist_active_sess_history a , dba_users b
where --     session_state= 'WAITING'  and
           SAMPLE_TIME  between TO_DATE('21/09/2013 07:00:00','DD/MM/YYYY HH24:MI:SS') AND TO_DATE('21/09/2013 08:00:00','DD/MM/YYYY HH24:MI:SS')
and a.user_id = b.user_id
and username like '%HHR%'
--and machine like '%TPV%'
order by sample_time ;

SAMPLE_TIME
SESSION_ID
USERNAME
SQL_ID
SQL_PLAN_HASH_VALUE
SESSION_STATE
BLOCKING_SESSION
EVENT
WAIT_CLASS









3/2/14 3:27 PM
187
USR1
cmqyqs4s4bah3
1505238203
ON CPU



3/2/14 3:27 PM
62
SYS
8szmwam7fysa3
1947143079
WAITING

db file sequential read
User I/O

PROGRAM
MACHINE
TEMP_MB
ASH




sqlplus@host-13 (TNS V1-V3)
host-13
7825
SELECT * FROM TABLE (DBMS_XPLAN.display_awr ('cmqyqs4s4bah3','1505238203', NULL,'ADVANCED'))
oracle@host-5 (J000)
host-5
3
SELECT * FROM TABLE (DBMS_XPLAN.display_awr ('8szmwam7fysa3','1947143079', NULL,'ADVANCED'))


1.2 Look for Events

-- search for idle events in DBA_HIST_ACTIVE_SESS_HISTORY
select session_state , event , wait_class , count(1) from (
select sample_time , session_id , username, sql_id , sql_plan_hash_value, session_state , blocking_session , event , wait_class , program , machine
from dba_hist_active_sess_history a , dba_users b
where --     session_state= 'WAITING'  and
--     SAMPLE_TIME  between TO_DATE('25/09/2014 02:00:00','DD/MM/YYYY HH24:MI:SS') and TO_DATE('25/09/2014 04:30:00','DD/MM/YYYY HH24:MI:SS')
       SAMPLE_TIME  between TO_DATE('30/10/2014 02:00:00','DD/MM/YYYY HH24:MI:SS') AND TO_DATE('30/10/2014 07:00:00','DD/MM/YYYY HH24:MI:SS')
and a.user_id = b.user_id and username = 'HR' and upper(program) like 'GEN%'
-- and upper(EVENT) like '%CLIENT%'
order by sample_time
) group by session_state , event , wait_class
order by 3 desc

SESSION_STATE
EVENT
WAIT_CLASS
COUNT(1)
ON CPU


240
WAITING
db file sequential read
User I/O
302
WAITING
db file scattered read
User I/O
12
WAITING
SQL*Net more data from client
Network
3
WAITING
log file switch completion
Configuration
1
WAITING
log file sync
Commit
2


2] DB_Time Top Service consumption on the 5.1 the last 30 days

2.1 Every Hour 24/24

insert into ADB.STAT_MM_APP_PRO_CONSO_DBTIME select trunc(sysdate,'MONTH') , name , APP , DB_TIME from (
select service_name APP , stat_name , sum(DB_Time_S_preceding) DB_TIME from (
select begin_interval_time , service_name , stat_name , DB_Time_Snap , DB_Time_S_preceding , db_time_delta from (
select begin_interval_time , service_name , stat_name , DB_Time_Snap , DB_Time_S_preceding , DB_Time_Snap-DB_Time_S_preceding db_time_delta from (
select begin_interval_time , service_name , stat_name , DB_Time_Snap , ceil(lag(DB_Time_Snap, 1, -1) over(partition by service_name order by begin_interval_time)) DB_Time_S_preceding 
from ( select begin_interval_time , service_name , stat_name , ceil(value/3600) DB_Time_Snap from DBA_HIST_SERVICE_STAT a , dba_hist_snapshot b
where a.snap_id=b.snap_id AND begin_interval_time > sysdate-30 and stat_name like 'DB%t%'
order by 1 desc ) order by 1 desc ) order by 1 desc ) where db_time_delta < 0 order by 1 desc )
group by service_name , stat_name order by 3 desc
 ) , v$database

-- du 27/02/2017 au 28/02/2017 Somme en ?????????????
select service_name APP , stat_name , sum(db_time_delta) DB_TIME from (
select begin_interval_time , service_name , stat_name , DB_Time_Snap , DB_Time_S_preceding , DB_Time_Snap-DB_Time_S_preceding db_time_delta from (
select begin_interval_time , service_name , stat_name , DB_Time_Snap , ceil(lag(DB_Time_Snap, 1, -1) over(partition by service_name order by begin_interval_time)) DB_Time_S_preceding
from (
select begin_interval_time , service_name , stat_name , ceil(value/3600) DB_Time_Snap from DBA_HIST_SERVICE_STAT a , dba_hist_snapshot b
where a.snap_id=b.snap_id
AND begin_interval_time between to_date('02/03/2017 09:00:00','DD/MM/YYYY HH24:MI:SS') AND to_date('02/03/2017 13:00:00','DD/MM/YYYY HH24:MI:SS')
and stat_name like 'DB%t%' -- and service_name = '<USER>_BO'
order by 1 desc
) order by 1 desc
) order by 1 desc
)
group by service_name , stat_name order by 3 desc

APP           STAT_NAME    DB_TIME
SYS$USERS    DB time      22956940
SRV1          DB time      3001787
SRV2          DB time      2974216
SRV3   DB time      1240705

2.2 Business Hour only 0730 - 1900

select trunc(sysdate,'MONTH') , name , APP , DB_TIME from (
-- sum those Daily comp.
select service_name APP , stat_name , sum(db_time_delta) DB_TIME from (
-- each day at 1900 : db_time_delta is the Daily consumption
select begin_interval_time , service_name , stat_name , DB_Time_Snap , DB_Time_S_preceding , db_time_delta from (
-- get DB Time Delta between 2 snaps
select begin_interval_time , service_name , stat_name , DB_Time_Snap , DB_Time_S_preceding , DB_Time_Snap-DB_Time_S_preceding db_time_delta from (
-- get Preceding
select begin_interval_time , service_name , stat_name , DB_Time_Snap , ceil(lag(DB_Time_Snap, 1, -1) over(partition by service_name order by begin_interval_time)) DB_Time_S_preceding 
from (
-- get 0730 and 1900 only
select begin_interval_time , service_name , stat_name , ceil(value/3600) DB_Time_Snap from DBA_HIST_SERVICE_STAT a , dba_hist_snapshot b
where a.snap_id=b.snap_id AND begin_interval_time > sysdate-30 and stat_name like 'DB%t%' -- and service_name = 'SYS$USERS'
and ( to_char(begin_interval_time, 'YYYYMMDD HH24:MI:SS') like '%07:30%' or to_char(begin_interval_time, 'YYYYMMDD HH24:MI:SS') like '%19:00%' )
order by 1 desc
) order by 1 desc
) order by 1 desc
) where to_char(begin_interval_time, 'YYYYMMDD HH24:MI:SS') like '%19:00%' order by 1 desc
) group by service_name , stat_name order by 3 desc
) , v$database


3] Miscellanous

3.1 Slow performance of Datapump export hanging on [ expdp "semtimedop" EAGAIN (Resource temporarily unavailable) ]

Datapump Nightly exports became suddenly very slow, ending at 01:40AM on Monday, then 06:40AM on Wednesday and never ending – killed - at 08:30AM on Thursday !
I used “strace” on Linux to see what Dedicated Export Process was doing : lots of SEMAPHORE calls that timed out ( after one second ) :

semtimedop(1179649, 0x7fffcd2e7850, 1, {1, 0}) = -1 EAGAIN (Resource temporarily unavailable)

The solution according to Metallica Doc ID 760968.1 was to do a Linux server restart ; my test expdp went from 5 minutes to 35 seconds after the reboot, problem solved.

3.2 Total CPU Consumption from every sessions since instance startup


SELECT i.instance_name , i.startup_time , A.CPU_TOT FROM
( select sum(s.value) CPU_TOT from v$sesstat s,v$statname t WHERE s.statistic# = t.statistic# AND t.name='CPU used by this session' ) A , v$instance i
-- DB1 05/03/2017 09:56:32 = 1 829 311
-- DB2 05/03/2017 10:16:09 =   834 207


3.3 Cloud Control – User Defined Metrics

-- oracle sysman user defined mgmt metric table
select * from sysman.mgmt$metric_current WHERE metric_label = 'User-Defined SQL Metrics' order by 1

Metric  Blocking Session Count

select case CNTS when 0 then CNTS else CNTW end
FROM ( SELECT count(1) CNTS FROM v$session WHERE blocking_session IS NOT NULL ) ,
( SELECT ceil(max(seconds_in_wait/60)) CNTW FROM v$session WHERE blocking_session IS NOT NULL )

3.4 Oracle V$BH physical reads even if blocks are in cache

select * from app.dossier a  -- doesn't go into cache
select /*+ INDEX (a , DOS_PK) */ * from app.dossier a  -- go into cache

SELECT o.OBJECT_NAME, o.OBJECT_TYPE, o.OWNER, COUNT(*) NUMBER_OF_BLOCKS
     FROM DBA_OBJECTS o, V$BH bh
    WHERE o.DATA_OBJECT_ID = bh.OBJD
      AND o.OWNER = 'APP' -- NOT IN             ('SYS', 'SYSTEM', 'SYSMAN', 'XDB', 'IX', 'WMSYS', 'CTXSYS')
    GROUP BY o.OBJECT_NAME, o.OWNER, o.OBJECT_TYPE
    ORDER BY COUNT(*);

alter table xxx cache -> MRU instead of LRU