Wednesday, October 19, 2011

Set UPDATE With TOP START AT and COMMIT

Monday's article about Set UPDATE Versus WHERE CURRENT OF Cursor showed that a set-oriented UPDATE statement runs a lot faster than a row-oriented FOR loop, but it also asked the question

What about the commits?


This article shows how the TOP and START AT clauses can be used to break one giant set-oriented UPDATE statement into smaller subsets with COMMIT statements in between.

This technique
  • still runs a lot faster than the FOR loop (19 minutes instead of 75 minutes in one test),

  • doesn't run too much slower than a single UPDATE (19 minutes versus 15 minutes for the single UPDATE), and

  • can be used for large numbers of rows without blowing up the rollback log or gathering up endless row locks.

The following example looks a lot like the earlier article Using LockCount To Display UPDATE Progress; in fact, it uses the same funky technique to display progress messages rather than the simpler method shown in Set UPDATE Versus WHERE CURRENT OF Cursor.

Here's the code, with an explanation afterwards:
DECLARE @rroad_group_2_property_pivot_row_count   BIGINT;
DECLARE @start_at_row_count                       BIGINT; 
DECLARE @continue_updating                        VARCHAR ( 1 );
DECLARE @remaining_row_count                      BIGINT; 
DECLARE @top_limit_row_count                      BIGINT;
DECLARE @batch_count                              BIGINT;
DECLARE @batch_counter                            BIGINT;
DECLARE @batch_description                        LONG VARCHAR;

CHECKPOINT;

SELECT SYSTAB.count
  INTO @rroad_group_2_property_pivot_row_count
  FROM SYSTAB
 WHERE SYSTAB.table_name = 'rroad_group_2_property_pivot';

SET @start_at_row_count = 1;

IF @start_at_row_count <= @rroad_group_2_property_pivot_row_count THEN

   SET @continue_updating   = 'Y';
   SET @remaining_row_count = @rroad_group_2_property_pivot_row_count;
   SET @top_limit_row_count = LESSER ( @remaining_row_count, 1000000 );
   SET @batch_count         = CAST ( CEILING ( CAST ( @rroad_group_2_property_pivot_row_count AS DOUBLE ) / CAST ( 1000000 AS DOUBLE ) ) AS BIGINT );
   SET @batch_counter       = 1;

ELSE

   SET @continue_updating = 'N';

END IF;

WHILE @continue_updating = 'Y' LOOP

   INSERT rroad_progress_messages_requested ( requested_by_connection_id ) VALUES ( @@SPID );

   COMMIT;

   IF @batch_count > 1 THEN

      MESSAGE STRING (
         CURRENT TIMESTAMP,
         ' DATA UPGRADE: Updating Foxhound Monitor connection-level sample rows, batch ',
         rroad_f_number_with_commas ( @batch_counter ),
         ' of ',
         rroad_f_number_with_commas ( @batch_count ),
         ' of ',
         rroad_f_number_with_commas ( @rroad_group_2_property_pivot_row_count ),
         ' total rows.' ) TO CONSOLE;

      SET @batch_description = STRING (
         ' in batch ',
         rroad_f_number_with_commas ( @batch_counter ),
         ' of ',
         rroad_f_number_with_commas ( @batch_count ),
         ' of ',
         rroad_f_number_with_commas ( @rroad_group_2_property_pivot_row_count ),
         ' total rows' );

   ELSE

      SET @batch_description = '';

   END IF;

   TRIGGER EVENT rroad_display_progress_messages (
      @row_count_string  = STRING ( @top_limit_row_count ),
      @batch_description = @batch_description );

   UPDATE TOP @top_limit_row_count
          START AT @start_at_row_count
          rroad_group_2_property_pivot
             INNER JOIN rroad_sample_set
                ON rroad_sample_set.sample_set_number = rroad_group_2_property_pivot.sample_set_number
             LEFT OUTER JOIN ( SELECT rroad_group_2_property_pivot.sample_set_number AS sample_set_number,
                                      rroad_group_2_property_pivot.BlockedOn         AS blocking_connection_number,
                                      COUNT(*)                                       AS blocked_connection_count
                                 FROM rroad_group_2_property_pivot
                                WHERE rroad_group_2_property_pivot.BlockedOn > 0
                                GROUP BY rroad_group_2_property_pivot.sample_set_number,
                                         rroad_group_2_property_pivot.BlockedOn          ) AS blocking_connection
                ON blocking_connection.sample_set_number          = rroad_group_2_property_pivot.sample_set_number
               AND blocking_connection.blocking_connection_number = rroad_group_2_property_pivot.connection_number

      SET rroad_group_2_property_pivot.blocked_connection_count
                = COALESCE ( blocking_connection.blocked_connection_count, 0 ),

          rroad_group_2_property_pivot.current_req_status
                = IF TRIM ( COALESCE ( rroad_group_2_property_pivot.ReqStatus, '' ) ) = ''
                     THEN '-'
                     ELSE CASE TRIM ( rroad_group_2_property_pivot.ReqStatus )
                             WHEN 'Idle'              THEN 'Idle'
                             WHEN 'Unscheduled'       THEN 'Waiting for thread'
                             WHEN 'BlockedIO'         THEN 'Waiting for I/O'
                             WHEN 'BlockedContention' THEN 'Waiting for shared resource'
                             WHEN 'BlockedLock'       THEN 'Blocked by lock'
                             WHEN 'Executing'         THEN 'Executing'
                             ELSE TRIM ( rroad_group_2_property_pivot.ReqStatus )
                          END
                  ENDIF,

          rroad_group_2_property_pivot.cache_satisfaction
                = LESSER ( GREATER (
                     COALESCE (
                        CAST ( ROUND ( CAST ( rroad_group_2_property_pivot.CacheHits AS DECIMAL ( 30, 6 ) )
                                          / CAST ( GREATER ( 1, rroad_group_2_property_pivot.CacheRead ) AS DECIMAL ( 30, 6 ) )
                                          * 100.0,
                                       0 )
                               AS DECIMAL ( 30, 0 ) ),
                        0.0 ),
                     0.0 ), 100.0 ), -- limit the cache_satisfaction to between 0 and 100

          rroad_group_2_property_pivot.time_connected
                = IF TRIM ( COALESCE ( rroad_group_2_property_pivot.LoginTime, '' ) ) = ''
                     THEN 0
                     ELSE DATEDIFF ( MILLISECOND,
                             rroad_f_dateadd_msec ( 3,
                                                    rroad_group_2_property_pivot.LoginTime ),
                             rroad_sample_set.sample_finished_at ) -- also called "sample_recorded_at"
                  ENDIF,

          rroad_group_2_property_pivot.total_waits
                = COALESCE ( rroad_group_2_property_pivot.ReqCountBlockContention, 0 )
                     + COALESCE ( rroad_group_2_property_pivot.ReqCountBlockIO, 0 )
                     + COALESCE ( rroad_group_2_property_pivot.ReqCountBlockLock, 0 )
                     + COALESCE ( rroad_group_2_property_pivot.ReqCountUnscheduled, 0 ),

          rroad_group_2_property_pivot.waiting_time
                = COALESCE ( rroad_group_2_property_pivot.ReqTimeBlockContention, 0.0 )
                     + COALESCE ( rroad_group_2_property_pivot.ReqTimeBlockIO, 0.0 )
                     + COALESCE ( rroad_group_2_property_pivot.ReqTimeBlockLock, 0.0 )
                     + COALESCE ( rroad_group_2_property_pivot.ReqTimeUnscheduled, 0.0 ),

          rroad_group_2_property_pivot.transaction_running_time
                = IF TRIM ( COALESCE ( rroad_group_2_property_pivot.TransactionStartTime, '' ) ) = ''
                     THEN 0
                     ELSE DATEDIFF ( MILLISECOND,
                             rroad_f_dateadd_msec ( rroad_sample_set.datediff_msec_between_target_and_local,
                                                    rroad_group_2_property_pivot.TransactionStartTime ),
                             rroad_sample_set.sample_finished_at ) -- also called "sample_recorded_at"
                  ENDIF,

          rroad_group_2_property_pivot.time_since_last_request
                = IF TRIM ( COALESCE ( rroad_group_2_property_pivot.LastReqTime, '' ) ) = ''
                     THEN 0
                     ELSE DATEDIFF ( MILLISECOND,
                             rroad_f_dateadd_msec ( rroad_sample_set.datediff_msec_between_target_and_local,
                                                    rroad_group_2_property_pivot.LastReqTime ),
                             rroad_sample_set.sample_finished_at ) -- also called "sample_recorded_at"
                  ENDIF,

          rroad_group_2_property_pivot.index_satisfaction
                = LESSER ( GREATER (
                     COALESCE (
                        CAST ( ROUND ( CAST ( rroad_group_2_property_pivot.IndLookup
                                                 - LESSER ( rroad_group_2_property_pivot.IndLookup, rroad_group_2_property_pivot.FullCompare )
                                              AS DECIMAL ( 30, 6 ) )
                                          / CAST ( GREATER ( 1, rroad_group_2_property_pivot.IndLookup )
                                                   AS DECIMAL ( 30, 6 ) )
                                          * 100.0,
                                       0 )
                               AS DECIMAL ( 30, 0 ) ),
                        0.0 ),
                     0.0 ), 100.0 )  -- limit the index_satisfaction to between 0 and 100

    ORDER BY rroad_group_2_property_pivot.sample_set_number,
          rroad_group_2_property_pivot.connection_number;

   COMMIT;

   DELETE rroad_progress_messages_requested
    WHERE requested_by_connection_id = @@spid;

   COMMIT;

   SET @start_at_row_count = @start_at_row_count + 1000000;

   IF @start_at_row_count <= @rroad_group_2_property_pivot_row_count THEN

      SET @continue_updating   = 'Y';
      SET @remaining_row_count = @remaining_row_count - @top_limit_row_count;
      SET @top_limit_row_count = LESSER ( @remaining_row_count, 1000000 );
      SET @batch_counter       = @batch_counter + 1;

   ELSE

      SET @continue_updating = 'N';

   END IF;

END LOOP; -- WHILE @continue_updating = 'Y' LOOP
  • The initialization logic on lines 17 through 31 fills in all the new variables used to how the rows are divided into batches (subsets) of one million rows each.

  • The WHILE loop starting on line 33 makes one pass for each batch of rows.

  • The INSERT on line 35 comes from the technique shown in Using LockCount To Display UPDATE Progress.

  • The SET @batch_description statement on lines 51 through 58 fills a new string that will be passed to the event which produces the progress messages; more on this later.

  • The TRIGGER EVENT on lines 66 through 68 fires up the event that produces the progress messages; code for this event is shown later.

  • The UPDATE on lines 70 through 167 looks just like the original UPDATE except for three new clauses: TOP, START AT and ORDER BY. Sadly, it may be the addition of the ORDER BY which makes this technique somewhat slower than the big-bang UPDATE, but an ORDER BY really is necessary if you're going to use TOP and START AT in this fashion (the Help says that without an ORDER BY the result is "non-deterministic" which is ANSI for "here be dragons!")

  • The COMMIT on line 169 is why we're going to all this trouble: using set-oriented UPDATE statements for speed, COMMIT for safety.

  • The DELETE starting on line 171 is more progress message stuff; see Using LockCount To Display UPDATE Progress.

  • The code on lines 176 through 189 determines what the next batch will be, if there is one. Yes, this is ugly code... yes, it is error prone... yes, it did have errors... OK, can we move on now?
Here are some excerpts from the database console log; note that the "Done:" messages from the event sometimes appear out-of-order with the "DATA UPGRADE:" messages from the caller; this is not surprising since they're running on separate connections:
I. 10/11 06:47:58. 2011-10-11 06:47:58.181 DATA UPGRADE: Updating Foxhound Monitor connection-level sample rows, batch 1 of 7 of 6,133,762 total rows.
I. 10/11 06:48:54. 1% updated (17,304 rows of 1,000,000 in batch 1 of 7 of 6,133,762 total rows)
I. 10/11 06:48:55. 2% updated (25,727 rows of 1,000,000 in batch 1 of 7 of 6,133,762 total rows)
I. 10/11 06:48:57. 3% updated (33,621 rows of 1,000,000 in batch 1 of 7 of 6,133,762 total rows)
I. 10/11 06:48:58. 4% updated (41,298 rows of 1,000,000 in batch 1 of 7 of 6,133,762 total rows)
I. 10/11 06:49:00. 5% updated (56,670 rows of 1,000,000 in batch 1 of 7 of 6,133,762 total rows)
...
I. 10/11 06:51:38. 95% updated (953,906 rows of 1,000,000 in batch 1 of 7 of 6,133,762 total rows)
I. 10/11 06:51:39. 96% updated (961,683 rows of 1,000,000 in batch 1 of 7 of 6,133,762 total rows)
I. 10/11 06:51:40. 97% updated (970,114 rows of 1,000,000 in batch 1 of 7 of 6,133,762 total rows)
I. 10/11 06:51:42. 98% updated (980,956 rows of 1,000,000 in batch 1 of 7 of 6,133,762 total rows)
I. 10/11 06:51:45. 99% updated (993,439 rows of 1,000,000 in batch 1 of 7 of 6,133,762 total rows)
I. 10/11 06:51:47. 100% updated (1,000,000 rows of 1,000,000 in batch 1 of 7 of 6,133,762 total rows)
I. 10/11 06:51:48. 2011-10-11 06:51:48.010 DATA UPGRADE: Updating Foxhound Monitor connection-level sample rows, batch 2 of 7 of 6,133,762 total rows.
I. 10/11 06:51:48. Done: 1,000,000 rows
I. 10/11 06:52:13. 1% updated (10,397 rows of 1,000,000 in batch 2 of 7 of 6,133,762 total rows)
...
I. 10/11 06:54:32. 100% updated (1,000,000 rows of 1,000,000 in batch 2 of 7 of 6,133,762 total rows)
I. 10/11 06:54:33. 2011-10-11 06:54:33.559 DATA UPGRADE: Updating Foxhound Monitor connection-level sample rows, batch 3 of 7 of 6,133,762 total rows.
I. 10/11 06:54:33. Done: 1,000,000 rows
I. 10/11 06:54:52. 1% updated (13,873 rows of 1,000,000 in batch 3 of 7 of 6,133,762 total rows)
...
I. 10/11 06:58:42. 100% updated (1,000,000 rows of 1,000,000 in batch 3 of 7 of 6,133,762 total rows)
I. 10/11 06:58:44. 76% committed (762,137 rows of 1,000,000)
I. 10/11 06:58:44. 2011-10-11 06:58:44.277 DATA UPGRADE: Updating Foxhound Monitor connection-level sample rows, batch 4 of 7 of 6,133,762 total rows.
I. 10/11 06:58:45. Done: 1,000,000 rows
I. 10/11 06:58:59. 1% updated (12,331 rows of 1,000,000 in batch 4 of 7 of 6,133,762 total rows)
...
I. 10/11 07:01:07. 100% updated (1,000,000 rows of 1,000,000 in batch 4 of 7 of 6,133,762 total rows)
I. 10/11 07:01:08. Done: 1,000,000 rows
I. 10/11 07:01:08. 2011-10-11 07:01:08.647 DATA UPGRADE: Updating Foxhound Monitor connection-level sample rows, batch 5 of 7 of 6,133,762 total rows.
I. 10/11 07:01:23. 1% updated (13,758 rows of 1,000,000 in batch 5 of 7 of 6,133,762 total rows)
...
I. 10/11 07:04:06. 100% updated (1,000,000 rows of 1,000,000 in batch 5 of 7 of 6,133,762 total rows)
I. 10/11 07:04:07. Done: 1,000,000 rows
I. 10/11 07:04:07. 2011-10-11 07:04:07.801 DATA UPGRADE: Updating Foxhound Monitor connection-level sample rows, batch 6 of 7 of 6,133,762 total rows.
I. 10/11 07:04:22. 1% updated (10,361 rows of 1,000,000 in batch 6 of 7 of 6,133,762 total rows)
...
I. 10/11 07:06:28. 100% updated (1,000,000 rows of 1,000,000 in batch 6 of 7 of 6,133,762 total rows)
I. 10/11 07:06:29. 2011-10-11 07:06:29.496 DATA UPGRADE: Updating Foxhound Monitor connection-level sample rows, batch 7 of 7 of 6,133,762 total rows.
I. 10/11 07:06:29. Done: 1,000,000 rows
I. 10/11 07:06:41. 3% updated (4,496 rows of 133,762 in batch 7 of 7 of 6,133,762 total rows)
I. 10/11 07:06:42. 10% updated (13,546 rows of 133,762 in batch 7 of 7 of 6,133,762 total rows)
...
I. 10/11 07:06:55. 93% updated (124,676 rows of 133,762 in batch 7 of 7 of 6,133,762 total rows)
I. 10/11 07:06:56. 100% updated (133,762 rows of 133,762 in batch 7 of 7 of 6,133,762 total rows)

Here's the code for the event from Using LockCount To Display UPDATE Progress after being modified to use the new @batch_description parameter.

CREATE EVENT rroad_display_progress_messages
HANDLER BEGIN

DECLARE @calling_connection_id                                INTEGER;
DECLARE @commit_percent                                       BIGINT;
DECLARE @continuing                                           VARCHAR ( 1 );
DECLARE @previous_commit_message_displayed_at_loop_counter    BIGINT;
DECLARE @previous_update_message_displayed_at_loop_counter    BIGINT;
DECLARE @lock_count                                           BIGINT;
DECLARE @lock_count_has_increased_at_least_once               VARCHAR ( 1 );
DECLARE @loop_counter                                         BIGINT;
DECLARE @previous_commit_percent                              BIGINT;
DECLARE @previous_lock_count                                  BIGINT;
DECLARE @previous_update_percent                              BIGINT;
DECLARE @row_count                                            BIGINT;
DECLARE @update_percent                                       BIGINT;
DECLARE @batch_description                                    LONG VARCHAR;

SET @continuing = 'Y'; -- until proven otherwise

SET @calling_connection_id = CAST ( COALESCE ( EVENT_PARAMETER ( 'ConnectionID' ), '0' ) AS INTEGER );

IF @calling_connection_id <= 0 THEN
   SET @continuing = 'N'; -- the calling connection id is out of range
END IF;

IF @continuing = 'Y' THEN
   SET @row_count = CAST ( COALESCE ( EVENT_PARAMETER ( '@row_count_string' ), '0' ) AS BIGINT );
   IF @row_count <= 0 THEN
      SET @continuing = 'N'; -- the row count is out of range
   END IF;
END IF;

IF @continuing = 'Y' THEN
   SET @batch_description = COALESCE ( EVENT_PARAMETER ( '@batch_description' ), '' ); -- optional, may be empty or omitted
END IF;

SET @lock_count_has_increased_at_least_once = 'N';
SET @previous_update_message_displayed_at_loop_counter = 0;
SET @previous_commit_message_displayed_at_loop_counter = 0;
SET @previous_lock_count     = 0;
SET @previous_update_percent = 0;
SET @previous_commit_percent = 0;
SET @loop_counter            = 0;

WHILE @continuing = 'Y' LOOP

   SET @loop_counter = @loop_counter + 1;

   SET @lock_count = CONNECTION_PROPERTY ( 'LockCount', @calling_connection_id );

   IF @lock_count IS NULL THEN
      SET @continuing = 'N'; -- the calling connection no longer exist
   END IF;

   IF @continuing = 'Y' THEN
      IF NOT EXISTS ( SELECT * 
                        FROM rroad_progress_messages_requested
                       WHERE requested_by_connection_id = @calling_connection_id ) THEN
         SET @continuing = 'N'; -- the caller no longer wants progress messages displayed
      END IF;
   END IF;

   IF @continuing = 'Y' THEN

      CASE

         WHEN @lock_count > 0
          AND @lock_count > @previous_lock_count THEN

            SET @lock_count_has_increased_at_least_once = 'Y';

            SET @update_percent = CAST (
               (   CAST ( @lock_count AS DECIMAL ( 11, 2 ) ) 
                 / CAST ( @row_count  AS DECIMAL ( 11, 2 ) ) ) * 100.00
               AS BIGINT );

            IF @update_percent > @previous_update_percent
            OR (     @update_percent > 0
                 AND @update_percent = @previous_update_percent
                 AND @loop_counter >= @previous_update_message_displayed_at_loop_counter + 9
                 AND MOD ( @loop_counter, 10 ) = 0  ) THEN

               MESSAGE STRING (  
                  @update_percent,  
                  '% updated (', 
                  rroad_f_number_with_commas ( @lock_count ),  
                  ' rows of ',  
                  rroad_f_number_with_commas ( @row_count ), 
                  @batch_description, 
                  ')' ) TO CONSOLE;

               SET @previous_update_message_displayed_at_loop_counter = @loop_counter;

            END IF;
               
            SET @previous_update_percent = @update_percent;

         WHEN @lock_count > 0
          AND @lock_count < @previous_lock_count
          AND @lock_count_has_increased_at_least_once = 'Y' THEN

            SET @commit_percent = CAST (
               (   ( CAST ( @row_count  AS DECIMAL ( 11, 2 ) ) - CAST ( @lock_count AS DECIMAL ( 11, 2 ) ) ) 
                 / CAST ( @row_count  AS DECIMAL ( 11, 2 ) ) ) * 100.00
               AS BIGINT );

            IF @commit_percent > @previous_commit_percent
            OR (     @commit_percent > 0
                 AND @commit_percent = @previous_commit_percent
                 AND @loop_counter >= @previous_commit_message_displayed_at_loop_counter + 9
                 AND MOD ( @loop_counter, 10 ) = 0  ) THEN

               MESSAGE STRING ( 
                  @commit_percent, 
                  '% committed (', 
                  rroad_f_number_with_commas ( @row_count - @lock_count ), 
                  ' rows of ', 
                  rroad_f_number_with_commas ( @row_count ), 
                  ')' ) TO CONSOLE;

               SET @previous_commit_message_displayed_at_loop_counter = @loop_counter;

            END IF;

            SET @previous_commit_percent = @commit_percent;

         WHEN @lock_count = 0
          AND @lock_count_has_increased_at_least_once = 'Y' THEN

            MESSAGE STRING ( 'Done: ', rroad_f_number_with_commas ( @row_count ), ' rows' ) TO CONSOLE;

            SET @continuing = 'N'; 

         ELSE
            -- No action.

      END CASE;

   END IF;

   IF @continuing = 'Y' THEN
      SET @previous_lock_count = @lock_count;
      WAITFOR DELAY '00:00:01';
   END IF;

END LOOP;

END;

Tip: With a batch size of five million rows the test ran in 18 minutes, down from 19 minutes for a one-million-row batch size, so there is room for experimentation.

Dilbert.com


No comments: