Thursday, December 29, 2011

Duplicated and Covering indexes


In almost every database I have been looking there are some tables which have Covering or duplicate indexes.

What is duplicate index?

This is when table has multiple indexes defined on the same columns. Sometimes it is indexes with different names, sometimes it is different keywords used to define the index.

For example it is quite common to see something like this:

PRIMARY KEY(Log_id), UNIQUE INDEX Log_id(Log_id), INDEX  id(Log_id)


This usual happens when someone creates a primary key constraint on a table. This also creates an unique clustered index by default, when we use Management Studio to create this constraint. As a bonus a developer also creates an unique index on that table, because he didn't see the constraint. And an other developer creates just an index on that table, because he knows he always is going to query the table with that field. The performance of SQL Server is going to be hurt when we do this. It is enough to have the primary  key on this table.

The reason why we don't want to have duplicate keys in our database:

It takes more diskspace.
More I/Os during inserts, updates and deletes.
More maintenance from fragmented indexes.

We also have Covering indexes.

Covering Indexes are indexes which are prefixed of other indexes. For example INDEX(LastName), INDEX(LastName, FisrtName). In almost every case one of the indexes is not going to be used. Or you have to measure what the impact is in having two separated indexes.

I have made a query which determines Duplicated and Covering indexes. It also shows the usage of those indexes from the moment our SQL Server instance was started.


SET NOCOUNT ON
DECLARE @objname   nvarchar(776)
      , @objid     int
-- the object id of the table
      , @indid     smallint
-- the index id of an index
      , @groupid   smallint
-- the filegroup id of an index
      , @indname   sysname
      , @groupname sysname
      , @status    int
      , @keys      nvarchar(2126)
--Length (16*max_identifierLength)+(15*2)+(16*3)
      , @dbname    sysname
      , @usrname   sysname
      , @i         int
      , @thiskey   nvarchar(131)
-- 128+3

-- Check to see that the object names are local to the current database.
SELECT @dbname = parsename(@objname,3)

IF  @dbname IS NOT NULL
AND @dbname <> db_name()
BEGIN
  raiserror(15250,-1,-1)
END

-- create temp table

create table #indextable
(usr_name   sysname
,table_name sysname
,objid      int
,index_name sysname        collate database_default
,indid      int
,stats      int
,groupname  sysname        collate database_default
,index_keys nvarchar(2126) collate database_default
-- see @keys above for length descr
)

-- OPEN CURSOR OVER TABLES
DECLARE cur_tables CURSOR LOCAL STATIC
FOR
  SELECT t1.id
       , t1.name
       , t2.name
    FROM sysobjects t1
   INNER JOIN sysusers t2 on t1.uid = t2.uid
   WHERE type = 'U'

OPEN cur_tables

FETCH cur_tables
 INTO @objid
    , @objname
    , @usrname

WHILE @@fetch_status >= 0
BEGIN
-- OPEN CURSOR OVER INDEXES
  DECLARE cur_indexes CURSOR LOCAL STATIC
  FOR
    SELECT indid
         , groupid
         , name
         , status
      FROM sysindexes
     WHERE id = @objid
       AND indid > 0
       AND indid < 255
       AND (status & 64) = 0
     ORDER BY indid

  OPEN cur_indexes

  FETCH cur_indexes
   INTO @indid
      , @groupid
      , @indname
      , @status

  WHILE @@fetch_status >= 0
  BEGIN
-- First we'll figure out what the keys are.
    SELECT @keys = index_col(@usrname + '.' + @objname, @indid, 1)
         , @i    = 2

    IF (indexkey_property(@objid, @indid, 1, 'isdescending') = 1)
      SELECT @keys = @keys
                   + '(-)'

    SELECT @thiskey = index_col(@usrname + '.' + @objname, @indid, @i)

    IF ((@thiskey IS NOT NULL)
    AND (indexkey_property(@objid, @indid, @i, 'isdescending') = 1))
      SELECT @thiskey = @thiskey
                      + '(-)'

    WHILE (@thiskey IS NOT NULL)
    BEGIN
      SELECT @keys = @keys
                   + ', '
                   + @thiskey
           , @i = @i + 1

      SELECT @thiskey = index_col(@usrname + '.' + @objname, @indid, @i)

      IF ((@thiskey IS NOT NULL)
      AND (indexkey_property(@objid, @indid, @i, 'isdescending') = 1))
        SELECT @thiskey = @thiskey + '(-)'

    END

    SELECT @groupname = groupname
      FROM sysfilegroups
     WHERE groupid = @groupid

-- INSERT ROW FOR INDEX

    INSERT INTO #indextable
         VALUES (@usrname, @objname, @objid, @indname, @indid, @status, @groupname, @keys)

-- Next index

    FETCH cur_indexes
     INTO @indid
        , @groupid
        , @indname
        , @status

  END

  CLOSE cur_indexes

  DEALLOCATE cur_indexes

  FETCH cur_tables
   INTO @objid
      , @objname
      , @usrname

END

DEALLOCATE cur_tables

-- DISPLAY THE RESULTS DUPLICATED

SELECT @@SERVERNAME  as Instance
     , DB_NAME()     as DatabaseName
     , t5.create_date as Instance_started
     , t1.usr_name   as usr_name
     , t1.table_name as table_name
     , t1.index_name as index_name
     , ISNULL(t3.user_seeks, 0) as user_seeks
     , ISNULL(t3.user_scans, 0) as user_scans
     , convert(varchar(210),
--bits 16 off, 1, 2, 16777216 on, located on group
              case
                when (t1.stats & 16)<>0
                  then 'clustered'
                  else 'nonclustered'
              end
            + case
                when (t1.stats & 1)<>0
                  then ', ignore duplicate keys'
                  else ''
              end
            + case
                when (t1.stats & 2)<>0
                  then ', unique'
                  else ''
              end
            + case
                when (t1.stats & 4)<>0
                  then ', ignore duplicate rows'
                  else ''
              end
            + case
                when (t1.stats & 64)<>0
                  then ', statistics'
                  else
                    case
                      when (t1.stats & 32)<>0
                        then ', hypothetical'
                        else ''
                    end
              end
            + case
                when (t1.stats & 2048)<>0
                  then ', primary_key'
                  else ''
              end
            + case
                when (t1.stats & 4096)<>0
                  then ', unique_key'
                  else ''
              end
           + case
               when (t1.stats & 8388608)<>0
                 then ', auto_create'
                 else ''
             end
           + case
               when (t1.stats & 16777216)<>0
                 then ', stats_no_recompute'
                 else ''
               end
           + ' located on '
           + t1.groupname) as index_description
     , t2.index_keys as index_keys
     , t2.index_name as index_name
     , ISNULL(t4.user_seeks, 0) as user_seeks
     , ISNULL(t4.user_scans, 0) as user_scans
     , convert(varchar(210),
--bits 16 off, 1, 2, 16777216 on, located on group
             case
               when (t2.stats & 16)<>0
                 then 'clustered'
                 else 'nonclustered'
             end
           + case
               when (t2.stats & 1)<>0
                 then ', ignore duplicate keys'
                 else ''
             end
           + case
               when (t2.stats & 2)<>0
                 then ', unique'
                 else ''
             end
           + case
               when (t2.stats & 4)<>0
                 then ', ignore duplicate rows'
                 else ''
             end
           + case
               when (t2.stats & 64)<>0
                 then ', statistics'
                 else
                   case
                     when (t2.stats & 32)<>0
                       then ', hypothetical'
                       else ''
                   end
             end
           + case
               when (t2.stats & 2048)<>0
                 then ', primary_key'
                 else ''
             end
           + case
               when (t2.stats & 4096)<>0
                 then ', unique key'
                 else ''
             end
           + case
               when (t2.stats & 8388608)<>0
                 then ', auto create'
                 else ''
               end
           + case
               when (t2.stats & 16777216)<>0
                 then ', stats no recompute'
                 else ''
             end
           + ' located on '
           + t2.groupname) as index_description
     , t2.index_keys as index_keys
  FROM #indextable t1
 INNER JOIN #indextable t2 ON t1.table_name = t2.table_name
                          AND t1.index_name != t2.index_name
                          AND t1.index_keys = t2.index_keys
                          AND t1.indid      < t2.indid
  LEFT JOIN sys.dm_db_index_usage_stats t3 ON t3.database_id = DB_ID()
                                          AND t1.objid = t3.object_id
                                          AND t1.indid = t3.index_id
  LEFT JOIN sys.dm_db_index_usage_stats t4 ON t4.database_id = DB_ID()
                                          AND t2.objid = t4.object_id
                                          AND t2.indid = t4.index_id
  INNER JOIN sys.databases t5 ON t5.name = 'tempdb'
 ORDER BY t1.table_name
        , t1.index_name

-- DISPLAY THE RESULTS OVERLAPPING

SELECT @@SERVERNAME as Instance
     , DB_NAME() as DatabaseName
     , t5.create_date as Instance_started
     , t1.usr_name as usr_name
     , t1.table_name as table_name
     , t1.index_name as index_name
     , ISNULL(t3.user_seeks, 0) as user_seeks
     , ISNULL(t3.user_scans, 0) as user_scans
     , convert(varchar(210),
--bits 16 off, 1, 2, 16777216 on, located on group
             case
               when (t1.stats & 16)<>0
                 then 'clustered'
                 else 'nonclustered'
             end
           + case
               when (t1.stats & 1)<>0
                 then ', ignore duplicate keys'
                 else ''
             end
           + case
               when (t1.stats & 2)<>0
                 then ', unique'
                 else ''
             end
           + case
               when (t1.stats & 4)<>0
                 then ', ignore duplicate rows'
                 else ''
             end
           + case
               when (t1.stats & 64)<>0
                 then ', statistics'
                 else
                   case
                     when (t1.stats & 32)<>0
                       then ', hypothetical'
                       else ''
                   end
             end
           + case
               when (t1.stats & 2048)<>0
                 then ', primary_key'
                 else ''
             end
           + case
               when (t1.stats & 4096)<>0
                 then ', unique key'
                 else ''
             end
          + case
              when (t1.stats & 8388608)<>0
                then ', auto create'
                else ''
            end
          + case
              when (t1.stats & 16777216)<>0
                then 'stats no recompute'
                else ''
              end
          + ' located on '
          + t1.groupname) as index_description
     , t1.index_keys as index_keys
     , t2.index_name as index_name
     , ISNULL(t4.user_seeks, 0) as user_seeks
     , ISNULL(t4.user_scans, 0) as user_scans
     , convert(varchar(210),
--bits 16 off, 1, 2, 16777216 on, located on group
             case
               when (t2.stats & 16)<>0
                 then 'clustered'
                 else 'nonclustered'
             end
           + case
               when (t2.stats & 1)<>0
                 then ', ignore duplicate keys'
                 else ''
             end
           + case
               when (t2.stats & 2)<>0
                 then ', unique'
                 else ''
             end
           + case
               when (t2.stats & 4)<>0
                 then ', ignore duplicate rows'
                 else ''
             end
           + case
               when (t2.stats & 64)<>0
                 then ', statistics'
                 else
                   case
                     when (t2.stats & 32)<>0
                       then ', hypothetical'
                       else ''
                   end
             end
           + case
               when (t2.stats & 2048)<>0
                 then ', primary_key'
                 else ''
             end
           + case
               when (t2.stats & 4096)<>0
                 then ', unique key'
                 else ''
             end
           + case
               when (t2.stats & 8388608)<>0
                 then ', auto create'
                 else ''
               end
           + case
               when (t2.stats & 16777216)<>0
                 then ', stats no recompute'
                 else ''
             end
           + ' located on '
           + t2.groupname) as index_description
       , t2.index_keys as index_keys
  FROM #indextable t1
 INNER JOIN #indextable t2 ON t1.table_name = t2.table_name
                          AND t1.index_name != t2.index_name
                          AND t1.index_keys like t2.index_keys + ',' + '%'
                          AND LTRIM(RTRIM(t1.index_keys)) != LTRIM(RTRIM(t2.index_keys))
  LEFT JOIN sys.dm_db_index_usage_stats t3 ON t3.database_id = DB_ID()
                                          AND t1.objid = t3.object_id
                                          AND t1.indid = t3.index_id
  LEFT JOIN sys.dm_db_index_usage_stats t4 ON t4.database_id = DB_ID()
                                          AND t2.objid = t4.object_id
                                          AND t2.indid = t4.index_id
  INNER JOIN sys.databases t5 ON t5.name = 'tempdb'
 ORDER BY t1.table_name
        , t1.index_name

DROP TABLE #indextable





2 comments:

  1. Nice work, did you also read Kimberly Trip Blog about duplicated indexes.

    http://www.sqlskills.com/blogs/kimberly/

    Regards

    Michel Bruggeman

    ReplyDelete
    Replies
    1. Michel, Thank you for pointing out his Blog. Very interesting.

      Regards,

      Jacob

      Delete