/******************************************************************************* ******************************************************************************* ** ** ** ** ** Copyright 2003-2006 Tom Bascom, Greenfield Technologies ** ** http://www.greenfieldtech.com ** ** ** ** dbStat.p is free software; you can redistribute it and/or modify it ** ** under the terms of the GNU General Public License (GPL) as published ** ** by the Free Software Foundation; either version 2 of the License, or ** ** at your option) any later version. ** ** ** ** dbStat.p is distributed in the hope that it will be useful, but WITHOUT ** ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ** ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ** ** for more details. ** ** ** ** ** ** See http://www.fsf.org for more information about the GPL. ** ** ** ** ** ******************************************************************************* ******************************************************************************* * * dbstat.p * * sample and gather statistics about record count, size and standard deviation * * to run: $ export DB=sports2000 # the database being analyzed $ export LOGDIR=/tmp # where to write the log file $ export SHRTNM=s2k # a short name for the logs $ export DBANAL=s2k.dbanalys.out # optional previously obtained db analysis output $ export TBLIST=customer # optional comma delimited list of tables to report on $ $DLC/bin/_progres -b $DB -p dbstat.p -param "$LOGDIR|$SHRTNM|$DBANAL|$TBLIST" $* >> $LOGDIR/$SHRTNM.err 2>&1 & * */ &global-define SAMPLE 500 /* an integer sample size */ &global-define MAXLOOK 25000 /* the maximum number of attempts */ &global-define GWIDTH 40 /* ASCII-art graph width */ &global-define GHEIGHT 20 /* ASCII-art graph height */ define variable i as integer no-undo. /* number of successful samples */ define variable j as integer no-undo. /* number of sampling attempts */ define variable k as integer no-undo. /* sum of sampled values */ define variable v as integer no-undo extent {&SAMPLE}. /* sampled values used to calc std dev */ define variable s as integer no-undo. /* record length */ define variable x as integer no-undo. /* intermediate value used in calc */ define variable y as decimal no-undo. /* intermediate value used in calc */ define variable z as decimal no-undo format ">>,>>>,>>>,>>9". /* total number of sample attempts */ define variable stime as integer no-undo. /* start time */ define variable max-rid as integer no-undo. /* maximum recid for this storage area */ define variable min-rec as integer no-undo label "Min" format ">>,>>9". /* minimum sampled record size */ define variable max-rec as integer no-undo label "Max" format ">>,>>9". /* maximum sampled record size */ define variable avg-rec as integer no-undo label "Mean" format ">>,>>9". /* average sampled record size */ define variable std-dev as integer no-undo label "Std Dev" format ">>,>>9". /* standard deviation of samples */ define variable rec-cnt as integer no-undo label "Records" format ">>>>>>>>>9". /* estimated number of records in table */ define variable xrec as integer no-undo label "xRecs". /* expected number of records */ define variable xavg as integer no-undo label "xAvg". /* expected average record size */ define variable g as integer no-undo extent {&GWIDTH} format ">>9". define variable graph as character no-undo label "Graph" format "x({&GWIDTH})". define variable bh as handle no-undo. /* a handle for a dynamic buffer */ define variable qh as handle no-undo. /* a handle for a dynamic query */ define variable db_analys as character no-undo. /* name of dbanalys file */ define variable log_dir as character no-undo. define variable log_id as character no-undo. define variable log_name as character no-undo. define variable log_file as character no-undo. define variable tbl_list as character no-undo. assign log_dir = "." log_id = "dbstat." + ldbname( 1 ) . if num-entries( session:parameter, "|" ) >= 1 then log_dir = entry( 1, session:parameter, "|" ). if num-entries( session:parameter, "|" ) >= 2 then log_id = entry( 2, session:parameter, "|" ). if num-entries( session:parameter, "|" ) >= 3 then db_analys = entry( 3, session:parameter, "|" ). if num-entries( session:parameter, "|" ) >= 4 then tbl_list = entry( 4, session:parameter, "|" ). if log_dir = "" then log_dir = ".". if log_id = "" then log_id = "dbstat." + ldbname( 1 ). log_name = log_id + "." + string( month( today ), "99" ) + "." + string( day( today ), "99" ). log_file = log_dir + "/" + log_name. stime = time. /* when did we start running? */ create query qh. output to value( log_file ) unbuffered. for each _file no-lock where not _file._hidden: /* loop through the tables in DICTDB */ if tbl_list <> "" and lookup( _file._file-name, tbl_list ) = 0 then next. /* that's *really* ugly! */ find _storageobject no-lock /* we need to find the *current* */ where _storageobject._db-recid = _file._db-recid /* storage area -- not the *initial* */ and _storageobject._object-type = 1 /* storage area that holds the table */ and _storageobject._object-number = _file._file-num. /* ( _file._ianum = initial area) */ find _area no-lock where _area._area-number = _storageobject._area-number. find _areastatus no-lock where _areastatus._areastatus-areanum = _storageobject._area-number. /* narrow the sample space down by noting that there cannot be a recid * past the area's hiwater mark. it would be nice if we could establish * a hwm on a per table basis but, sadly, that isn't possible :-( */ max-rid = ( _areastatus-hiwater * exp( 2, _area-recbits )) + exp( 2, _area-recbits ) - 1. create buffer bh for table _file._file-name. /* we need a buffer for the table */ qh:set-buffers( bh ). /* set up a dynamic query... */ qh:query-prepare( "for each " + _file._file-name + " no-lock" ). qh:query-open(). assign /* initialize stats and counters */ i = 0 j = 0 k = 0 s = 0 /* reset the successful sample counter */ v = 0 min-rec = 999999 max-rec = 0 . do while s < {&SAMPLE}: /* make sure that there are at least */ /* {&SAMPLE} records in the table */ qh:get-next(). if qh:query-off-end then leave. else s = s + 1. /* count a record */ end. if s = 0 then next. /* skip table if there are no records */ display _file._file-name label "Table Name" format "x(15)" with width 132 . qh:query-close(). /* set up the dynamic query again... */ qh:set-buffers( bh ). /* we will use this if we're in "grab */ qh:query-prepare( "for each " + _file._file-name + " no-lock" ). /* them all" rather than "sample" mode. */ qh:query-open(). stat_loop: do while i < min( s, {&SAMPLE} ): /* don't try to find more records than */ /* exist... */ assign j = j + 1 /* how many times have we looked? */ z = z + 1 /* (in total across all tables...) */ . /* if j > ( {&MAXLOOK} * {&SAMPLE} ) then leave stat_loop. */ /* don't try forever */ if j > ( {&MAXLOOK} ) then leave stat_loop. /* don't try forever */ if s >= {&SAMPLE} then /* if we expect more than {&SAMPLE} */ do: /* records then randomly probe for one */ /* by recid... */ qh:query-close(). qh:set-buffers( bh ). qh:query-prepare( "for each " + _file._file-name + /* look for records in _file-name */ " no-lock where recid( " + /* no-lock, where the RECID is a random */ _file._file-name + " ) = " + /* recid between 1 and the recid of the */ string( random( 1, max-rid )) /* area hi-water mark */ ). qh:query-open(). end. /* otherwise just get them all ;-) */ qh:get-next(). if not qh:query-off-end then /* did we find a record? */ assign i = i + 1 /* count it! */ x = bh:record-length + 2 /* dbanalysis is 2 bytes longer... */ min-rec = min( x, min-rec ) /* is it the smallest so far? */ max-rec = max( x, max-rec ) /* is it the biggest so far? */ k = k + x /* add it to the summation */ v[i] = x /* track the individual values */ . end. qh:query-close(). /* clean up... */ delete object bh. if min-rec = 999999 then next. /* too sparsely populated -- give up */ assign avg-rec = k / i /* compute the mean record size */ g = 0 y = max( 1, (( max-rec - min-rec ) + 1 ) / ( {&GWIDTH} - 1 )) x = 0 . do s = 1 to i: /* calculate the sum of the squares of */ x = x + exp(( avg-rec - v[s] ), 2 ). /* the differences from the average */ g[integer(( v[s] - min-rec ) / y ) + 1] = g[integer(( v[s] - min-rec ) / y ) + 1] + 1. end. std-dev = exp(( x / ( i - 1 )), 0.5 ). /* the standard deviation */ rec-cnt = integer(( if i < {&SAMPLE} then i else (( max-rid / j ) * i ))). /* estimate the record count based on */ /* density of sample... */ display rec-cnt min-rec max-rec avg-rec std-dev . if db_analys > "" then /* compare to known dbanalys output */ do: assign /* we might not find anything (the grep */ xrec = -1 /* command below won't, for instance, */ xavg = -1 /* return lines that are split due to */ . /* long table names). */ input through value( 'grep -i "PUB.' + _file._file-name + ' " ' + db_analys ). /* the trailing " " after the file name */ import ^ xrec ^ ^ ^ xavg no-error. /* prevents substring problems! */ input close. end. x = 0. do i = 1 to {&GWIDTH}: /* find the max value for the y-axis */ x = max( x, g[i] ). /* perhaps the variable ought to be "y" */ end. do j = {&GHEIGHT} to 1 by -1: /* create the chart line by line... */ if rec-cnt >= {&SAMPLE} then /* if there is anything to chart ;-) */ do: display (( j ) * ( x / {&GHEIGHT} )) @ g[1] format ">,>>>,>>9" with no-label. /* label the y-axis */ graph = "". do i = 1 to min( {&GWIDTH}, (( max-rec - min-rec ) + 1 )): /* if columns are less than 1 unit */ /* apart then the width is max - min */ graph = graph + ( if ( g[i] > (( j - 1 ) * ( x / {&GHEIGHT} ))) then /* if we found samples in this range */ "*" /* mark the spot! */ else ( if ( g[i] = 0 ) and ( j = 1 ) then /* if it is the baseline we might want */ " " /* to output "_" */ else /* the bar isn't this high but we need */ " " ) /* to align the next column */ ). end. display graph. /* spit out the line */ end. if j = ( {&GHEIGHT} - 1 ) then /* display expected values obtained */ display /* from dbanalys */ xrec @ rec-cnt xavg @ avg-rec . if j = ( {&GHEIGHT} - 2 ) then /* display variance */ display exp( exp(( xrec - rec-cnt ) / xrec, 2 ), 0.5 ) * 100 format ">,>>9.99%" @ rec-cnt exp( exp(( xavg - avg-rec ) / xavg, 2 ), 0.5 ) * 100 format ">,>>9.99%" @ avg-rec . if rec-cnt >= {&SAMPLE} or ( j >= ( {&GHEIGHT} - 2 )) then down 1. end. if rec-cnt >= {&SAMPLE} then do: /* show how wide each column is */ graph = fill( "-", min( {&GWIDTH}, (( max-rec - min-rec ) + 1 ))). display graph y. down 1. /* show min & max record sizes */ graph = string( min-rec ). graph = graph + fill( " ", min( {&GWIDTH}, (( max-rec - min-rec ) + 1 )) - length( graph ) - 4 ) + string( max-rec, ">>>9" ). display graph. down 1. /* show standard deviations & mean */ graph = fill( "_", min( {&GWIDTH}, (( max-rec - min-rec ) + 1 ))). do k = 1 to 9: x = avg-rec + ( std-dev * k ). if x <= max-rec then substr( graph, integer(( x - min-rec ) / y ) + 1, 1 ) = string( k ). x = avg-rec - ( std-dev * k ). if x >= min-rec then substr( graph, integer(( x - min-rec ) / y ) + 1, 1 ) = string( k ). end. /* insert the mean last so that it overwrites std-dev if values are closely packed */ substr( graph, integer(( avg-rec - min-rec ) / y ) + 1, 1 ) = "^". display graph. down 1. /* show suggested RPB break points */ graph = fill( "-", min( {&GWIDTH}, (( max-rec - min-rec ) + 1 ))). /* standard v9 RPB break points -- for OE10 this should look at the area create & toss limits * */ if min-rec <= 27 and max-rec >= 27 then substr( graph, integer(( 27 - min-rec ) / y ) + 1, 1 ) = string( "8" ). else if min-rec <= 57 and max-rec >= 57 then substr( graph, integer(( 57 - min-rec ) / y ) + 1, 1 ) = string( "7" ). else if min-rec <= 116 and max-rec >= 116 then substr( graph, integer(( 116 - min-rec ) / y ) + 1, 1 ) = string( "6" ). else if min-rec <= 234 and max-rec >= 234 then substr( graph, integer(( 234 - min-rec ) / y ) + 1, 1 ) = string( "5" ). else if min-rec <= 473 and max-rec >= 473 then substr( graph, integer(( 473 - min-rec ) / y ) + 1, 1 ) = string( "4" ). else if min-rec <= 946 and max-rec >= 946 then substr( graph, integer(( 946 - min-rec ) / y ) + 1, 1 ) = string( "3" ). else if min-rec <= 1893 and max-rec >= 1893 then substr( graph, integer(( 1893 - min-rec ) / y ) + 1, 1 ) = string( "2" ). else if min-rec <= 3786 and max-rec >= 3786 then substr( graph, integer(( 3786 - min-rec ) / y ) + 1, 1 ) = string( "1" ). else if min-rec <= 7572 and max-rec >= 7572 then substr( graph, integer(( 7572 - min-rec ) / y ) + 1, 1 ) = string( "0" ). display graph. down 1. down 1. /* skip an extra line */ end. end. delete object qh. /* finish cleaning up */ display {&SAMPLE} z string( time - stime, "hh:mm:ss" ). /* how long did it take? */ output close. if os-getenv( "MAILLOG" ) > "" then os-command value( "mailx -s " + log_name + " " + os-getenv( "MAILLOG" ) + " < " + log_file ). return.