Skip to content

File statistics_cl.c

File List > cubrid > src > storage > statistics_cl.c

Go to the documentation of this file

/*
 * Copyright 2008 Search Solution Corporation
 * Copyright 2016 CUBRID Corporation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

/*
 * statistics_cl.c - statistics manager (client)
 */

#ident "$Id$"

#include "config.h"

#include <stdio.h>

#include "object_representation.h"
#include "statistics.h"
#include "object_primitive.h"
#include "memory_alloc.h"
#include "work_space.h"
#include "schema_manager.h"
#include "network_interface_cl.h"
#include "tz_support.h"
#include "db_date.h"
#include "db.h"
#include "dbtype_function.h"

static CLASS_STATS *stats_client_unpack_statistics (char *buffer);

/*
 * stats_get_statistics () - Get class statistics
 *   return: error code
 *   classoid(in): OID of the class
 *   timestamp(in):
 *   stats_p(in/out):
 *
 * Note: This function provides an easier interface for the client for
 *       obtaining statistics to the client side by taking care of the
 *       communication details . (Note that the upper levels shouldn't have to
 *       worry about the communication buffer.)
 */
int
stats_get_statistics (OID * class_oid_p, unsigned int time_stamp, CLASS_STATS ** stats_p)
{
  char *buffer_p = NULL;
  int length = -1;
  int error;
  *stats_p = NULL;

  error = stats_get_statistics_from_server (class_oid_p, time_stamp, &length, &buffer_p);
  if (error == NO_ERROR && buffer_p != NULL)
    {
      assert (length > 0);

      *stats_p = stats_client_unpack_statistics (buffer_p);
      free_and_init (buffer_p);
    }

  return error;
}

/*
 * qst_client_unpack_statistics () - Unpack the buffer containing statistics
 *   return: CLASS_STATS or NULL in case of error
 *   bufp(in): buffer containing the class statistics
 *
 * Note: This function unpacks the statistics on the buffer received from the
 *       server side, and builds a CLASS_STATS structure on the work space
 *       area. This sturucture is returned to the caller.
 */
static CLASS_STATS *
stats_client_unpack_statistics (char *buf_p)
{
  CLASS_STATS *class_stats_p;
  ATTR_STATS *attr_stats_p;
  BTREE_STATS *btree_stats_p;
  int i, j, k;

  if (buf_p == NULL)
    {
      return NULL;
    }

  class_stats_p = (CLASS_STATS *) db_ws_alloc (sizeof (CLASS_STATS));
  if (class_stats_p == NULL)
    {
      return NULL;
    }

  class_stats_p->time_stamp = (unsigned int) OR_GET_INT (buf_p);
  buf_p += OR_INT_SIZE;

  class_stats_p->heap_num_objects = OR_GET_INT (buf_p);
  buf_p += OR_INT_SIZE;
  if (class_stats_p->heap_num_objects < 0)
    {
      assert (false);
      class_stats_p->heap_num_objects = 0;
    }

  class_stats_p->heap_num_pages = OR_GET_INT (buf_p);
  buf_p += OR_INT_SIZE;
  if (class_stats_p->heap_num_pages < 0)
    {
      assert (false);
      class_stats_p->heap_num_pages = 0;
    }

  /* to get the doubtful statistics to be updated, need to clear timestamp */
  if (class_stats_p->heap_num_objects == 0 || class_stats_p->heap_num_pages == 0)
    {
      class_stats_p->time_stamp = 0;
    }

  class_stats_p->n_attrs = OR_GET_INT (buf_p);
  buf_p += OR_INT_SIZE;
  if (class_stats_p->n_attrs == 0)
    {
      db_ws_free (class_stats_p);
      return NULL;
    }

  class_stats_p->attr_stats = (ATTR_STATS *) db_ws_alloc (class_stats_p->n_attrs * sizeof (ATTR_STATS));
  if (class_stats_p->attr_stats == NULL)
    {
      db_ws_free (class_stats_p);
      return NULL;
    }

  for (i = 0, attr_stats_p = class_stats_p->attr_stats; i < class_stats_p->n_attrs; i++, attr_stats_p++)
    {
      attr_stats_p->id = OR_GET_INT (buf_p);
      buf_p += OR_INT_SIZE;

      attr_stats_p->type = (DB_TYPE) OR_GET_INT (buf_p);
      buf_p += OR_INT_SIZE;

      attr_stats_p->n_btstats = OR_GET_INT (buf_p);
      buf_p += OR_INT_SIZE;

      OR_GET_INT64 (buf_p, &attr_stats_p->ndv);
      buf_p += OR_INT64_SIZE;

      if (attr_stats_p->n_btstats <= 0)
    {
      attr_stats_p->bt_stats = NULL;
      continue;
    }

      attr_stats_p->bt_stats = (BTREE_STATS *) db_ws_alloc (attr_stats_p->n_btstats * sizeof (BTREE_STATS));
      if (attr_stats_p->bt_stats == NULL)
    {
      stats_free_statistics (class_stats_p);
      return NULL;
    }
      memset (attr_stats_p->bt_stats, 0, attr_stats_p->n_btstats * sizeof (BTREE_STATS));

      for (j = 0, btree_stats_p = attr_stats_p->bt_stats; j < attr_stats_p->n_btstats; j++, btree_stats_p++)
    {
      OR_GET_BTID (buf_p, &btree_stats_p->btid);
      buf_p += OR_BTID_ALIGNED_SIZE;

      btree_stats_p->leafs = OR_GET_INT (buf_p);
      buf_p += OR_INT_SIZE;

      btree_stats_p->pages = OR_GET_INT (buf_p);
      buf_p += OR_INT_SIZE;

      btree_stats_p->height = OR_GET_INT (buf_p);
      buf_p += OR_INT_SIZE;

      btree_stats_p->has_function = OR_GET_INT (buf_p);
      buf_p += OR_INT_SIZE;

      btree_stats_p->keys = OR_GET_INT (buf_p);
      buf_p += OR_INT_SIZE;

      btree_stats_p->dedup_idx = OR_GET_INT (buf_p);
      buf_p += OR_INT_SIZE;

      buf_p = or_unpack_domain (buf_p, &btree_stats_p->key_type, 0);

      if (TP_DOMAIN_TYPE (btree_stats_p->key_type) == DB_TYPE_MIDXKEY)
        {
          btree_stats_p->pkeys_size = tp_domain_size (btree_stats_p->key_type->setdomain);
        }
      else
        {
          btree_stats_p->pkeys_size = 1;
        }

      /* cut-off to stats */
      if (btree_stats_p->pkeys_size > BTREE_STATS_PKEYS_NUM)
        {
          btree_stats_p->pkeys_size = BTREE_STATS_PKEYS_NUM;
        }

      btree_stats_p->pkeys = (int *) db_ws_alloc (btree_stats_p->pkeys_size * sizeof (int));
      if (btree_stats_p->pkeys == NULL)
        {
          stats_free_statistics (class_stats_p);
          return NULL;
        }

      assert (btree_stats_p->pkeys_size <= BTREE_STATS_PKEYS_NUM);
      for (k = 0; k < btree_stats_p->pkeys_size; k++)
        {
          btree_stats_p->pkeys[k] = OR_GET_INT (buf_p);
          buf_p += OR_INT_SIZE;
        }
    }
    }

  /* validate key stats info */
  assert (class_stats_p->heap_num_objects >= 0);
  for (i = 0, attr_stats_p = class_stats_p->attr_stats; i < class_stats_p->n_attrs; i++, attr_stats_p++)
    {
      for (j = 0, btree_stats_p = attr_stats_p->bt_stats; j < attr_stats_p->n_btstats; j++, btree_stats_p++)
    {
      assert (btree_stats_p->keys >= 0);
      btree_stats_p->keys = MIN (btree_stats_p->keys, class_stats_p->heap_num_objects);
      for (k = 0; k < btree_stats_p->pkeys_size; k++)
        {
          assert (btree_stats_p->pkeys[k] >= 0);
          btree_stats_p->pkeys[k] = MIN (btree_stats_p->pkeys[k], btree_stats_p->keys);
        }
    }
    }

  return class_stats_p;
}

/*
 * stats_free_statistics () - Frees the given CLASS_STAT structure
 *   return: void
 *   class_statsp(in): class statistics to be freed
 */
void
stats_free_statistics (CLASS_STATS * class_statsp)
{
  ATTR_STATS *attr_statsp;
  int i, j;

  if (class_statsp)
    {
      if (class_statsp->attr_stats)
    {
      for (i = 0, attr_statsp = class_statsp->attr_stats; i < class_statsp->n_attrs; i++, attr_statsp++)
        {
          if (attr_statsp->bt_stats)
        {
          for (j = 0; j < attr_statsp->n_btstats; j++)
            {
              if (attr_statsp->bt_stats[j].pkeys)
            {
              db_ws_free (attr_statsp->bt_stats[j].pkeys);
              attr_statsp->bt_stats[j].pkeys = NULL;
            }
            }

          db_ws_free (attr_statsp->bt_stats);
          attr_statsp->bt_stats = NULL;
        }
        }
      db_ws_free (class_statsp->attr_stats);
      class_statsp->attr_stats = NULL;
    }

      db_ws_free (class_statsp);
    }
}

/*
 * stats_dump () - Dumps the given statistics about a class
 *   return:
 *   classname(in): The name of class to be printed
 *   fp(in):
 */
void
stats_dump (const char *class_name_p, FILE * file_p)
{
  MOP class_mop;
  CLASS_STATS *class_stats_p;
  ATTR_STATS *attr_stats_p;
  BTREE_STATS *bt_stats_p;
  SM_CLASS *smclass_p;
  int i, j, k;
  const char *name_p;
  const char *prefix_p = "";
  time_t tloc;

  class_mop = sm_find_class (class_name_p);
  if (class_mop == NULL)
    {
      fprintf (file_p, "\nERROR: %s\n", er_msg ());
      return;
    }

  smclass_p = sm_get_class_with_statistics (class_mop);
  if (smclass_p == NULL)
    {
      return;
    }

  class_stats_p = smclass_p->stats;
  if (class_stats_p == NULL)
    {
      return;
    }

  tloc = (time_t) class_stats_p->time_stamp;

  fprintf (file_p, "\nCLASS STATISTICS\n");
  fprintf (file_p, "****************\n");
  fprintf (file_p, " Class name: %s", class_name_p);
  if (tloc == 0)
    {
      fprintf (file_p, " (The 'stats' is not updated)\n");
    }
  else
    {
      fprintf (file_p, " Timestamp: %s", ctime (&tloc));
    }
  fprintf (file_p, " Total pages in class heap: %d\n", class_stats_p->heap_num_pages);
  fprintf (file_p, " Total objects: %d\n", class_stats_p->heap_num_objects);
  fprintf (file_p, " Number of attributes: %d\n", class_stats_p->n_attrs);

  for (i = 0; i < class_stats_p->n_attrs; i++)
    {
      attr_stats_p = &(class_stats_p->attr_stats[i]);

      name_p = sm_get_att_name (class_mop, attr_stats_p->id);
      fprintf (file_p, " Attribute: %s (", (name_p ? name_p : "not found"));
      fprintf (file_p, "%s)\n", pr_type_name (attr_stats_p->type));
      fprintf (file_p, "    Number of Distinct Values: %ld\n", attr_stats_p->ndv);

      if (attr_stats_p->n_btstats > 0)
    {
      fprintf (file_p, "    B+tree statistics:\n");

      for (j = 0; j < attr_stats_p->n_btstats; j++)
        {
          bt_stats_p = &(attr_stats_p->bt_stats[j]);

          fprintf (file_p, "        BTID: { %d , %d }\n", bt_stats_p->btid.vfid.volid,
               bt_stats_p->btid.vfid.fileid);
          fprintf (file_p, "        Cardinality: %d (", bt_stats_p->keys);

          prefix_p = "";
          assert (bt_stats_p->pkeys_size <= BTREE_STATS_PKEYS_NUM);
          assert (bt_stats_p->dedup_idx != 0);
          int pkeys_size = (bt_stats_p->dedup_idx >= 0) ? bt_stats_p->dedup_idx : bt_stats_p->pkeys_size;
          for (k = 0; k < pkeys_size; k++)
        {
          fprintf (file_p, "%s%d", prefix_p, bt_stats_p->pkeys[k]);
          prefix_p = ",";
        }
          fprintf (file_p, ") ,");
          fprintf (file_p, " Total pages: %d , Leaf pages: %d , Height: %d\n", bt_stats_p->pages,
               bt_stats_p->leafs, bt_stats_p->height);
        }
    }
      fprintf (file_p, "\n");
    }

  fprintf (file_p, "\n\n");
}

/*
 * stats_ndv_dump () - Dumps the NDV about a class
 *   return:
 *   classname(in): The name of class to be printed
 *   fp(in):
 */
void
stats_ndv_dump (const char *class_name_p, FILE * file_p)
{
  MOP class_mop;
  CLASS_ATTR_NDV class_attr_ndv = CLASS_ATTR_NDV_INITIALIZER;
  int i;

  class_mop = sm_find_class (class_name_p);
  if (class_mop == NULL)
    {
      fprintf (file_p, "\nERROR: %s\n", er_msg ());
      return;
    }

  /* get NDV by query */
  if (stats_get_ndv_by_query (class_mop, &class_attr_ndv, file_p, STATS_WITH_SAMPLING) != NO_ERROR)
    {
      goto end;
    }

  /* print NDV info */
  fprintf (file_p, "\nNumber of Distinct Values\n");
  fprintf (file_p, "****************\n");
  fprintf (file_p, " Class name: %s\n", sm_get_ch_name (class_mop));
  for (i = 0; i < class_attr_ndv.attr_cnt; i++)
    {
      fprintf (file_p, "  %s (%ld)\n", sm_get_att_name (class_mop, class_attr_ndv.attr_ndv[i].id),
           class_attr_ndv.attr_ndv[i].ndv);
    }
  fprintf (file_p, "total count : %ld\n", class_attr_ndv.attr_ndv[i].ndv);
  fprintf (file_p, "\n");

end:
  if (class_attr_ndv.attr_ndv != NULL)
    {
      free_and_init (class_attr_ndv.attr_ndv);
    }
  return;
}

/*
 * stats_get_ndv_by_query () - get NDV by query
 *   return:
 *   class_mop(in):
 *   attr_ndv(out):
 */
int
stats_get_ndv_by_query (const MOP class_mop, CLASS_ATTR_NDV * class_attr_ndv, FILE * file_p, int with_fullscan)
{
  DB_ATTRIBUTE *att;
  int error = NO_ERROR;
  char *select_list = NULL;
  const char *query = NULL;
  char *query_buf = NULL;
  int buf_size = 0;
  DB_QUERY_RESULT *query_result = NULL;
  DB_QUERY_ERROR query_error;
  DB_VALUE value;
  INT64 v1, v2;
  DB_DOMAIN *dom;
  int i;
  const char *class_name_p = NULL;

  /* if class is not normal class */
  if (!db_is_class (class_mop))
    {
      /* The class does not have a heap file (i.e. it has no instances); so no statistics can be obtained for this
       * class; just set to 0 and return. */
      class_attr_ndv->attr_cnt = 0;
      class_attr_ndv->attr_ndv = (ATTR_NDV *) malloc (sizeof (ATTR_NDV));
      class_attr_ndv->attr_ndv[0].ndv = 0;
      class_attr_ndv->attr_ndv[0].id = 0;
      goto end;
    }

  /* get class_name */
  class_name_p = db_get_class_name (class_mop);
  /* count number of the columns */
  class_attr_ndv->attr_cnt = 0;
  att = (DB_ATTRIBUTE *) db_get_attributes_force (class_mop);
  while (att != NULL)
    {
      class_attr_ndv->attr_cnt++;
      att = db_attribute_next (att);
    }

  class_attr_ndv->attr_ndv = (ATTR_NDV *) malloc (sizeof (ATTR_NDV) * (class_attr_ndv->attr_cnt + 1));
  if (class_attr_ndv->attr_ndv == NULL)
    {
      return ER_FAILED;
    }

  select_list = stats_make_select_list_for_ndv (class_mop, &class_attr_ndv->attr_ndv);
  if (select_list == NULL)
    {
      return ER_FAILED;
    }

  /* create sampling SQL statement */
  if (with_fullscan == STATS_WITH_FULLSCAN)
    {
      query = "SELECT %s FROM [%s]";
    }
  else
    {
      query = "SELECT /*+ SAMPLING_SCAN */ %s FROM [%s]";
    }

  buf_size = strlen (select_list) + strlen (class_name_p) + 40;
  query_buf = (char *) malloc (sizeof (char) * buf_size);
  if (query_buf == NULL)
    {
      error = ER_FAILED;
      goto end;
    }
  snprintf (query_buf, buf_size, query, select_list, class_name_p);

  if (file_p != NULL)
    {
      fprintf (file_p, "Query : %s\n", query_buf);
    }

  /* execute sampling SQL */
  error = db_compile_and_execute_local (query_buf, &query_result, &query_error);
  if (error < NO_ERROR)
    {
      goto end;
    }

  /* save result to NDV info */
  error = db_query_first_tuple (query_result);
  if (error != DB_CURSOR_SUCCESS)
    {
      goto end;
    }

  /* get NDV from tuple */
  for (i = 0; i < class_attr_ndv->attr_cnt; i++)
    {
      error = db_query_get_tuple_value (query_result, i, &value);
      if (error != NO_ERROR)
    {
      goto end;
    }
      /* if NDV is 0, it is all null values. */
      class_attr_ndv->attr_ndv[i].ndv = DB_GET_BIGINT (&value) == 0 ? 1 : DB_GET_BIGINT (&value);
    }

  /* get count(*) */
  error = db_query_get_tuple_value (query_result, i, &value);
  if (error != NO_ERROR)
    {
      goto end;
    }
  class_attr_ndv->attr_ndv[i].ndv = DB_GET_BIGINT (&value);

end:
  if (select_list)
    {
      free_and_init (select_list);
    }
  if (query_buf)
    {
      free_and_init (query_buf);
    }
  if (query_result)
    {
      db_query_end (query_result);
      query_result = NULL;
    }

  return error;
}

/*
 * stats_make_select_list_for_ndv () - make select-list for ndv
 *   return:
 *   class_mop(in): ....
 *   select_list(in/out):
 */
char *
stats_make_select_list_for_ndv (const MOP class_mop, ATTR_NDV ** attr_ndv)
{
  DB_ATTRIBUTE *att;
  char column[DB_MAX_IDENTIFIER_LENGTH + 20] = { '\0' };
  DB_DOMAIN *dom;
  size_t buf_size = 1024;
  ATTR_NDV *att_ndv = *attr_ndv;
  int i = 0;
  char *select_list = NULL, *select_buf = NULL;

  select_list = (char *) malloc (sizeof (char) * buf_size);
  if (select_list == NULL)
    {
      return NULL;
    }
  /* init select_list */
  *select_list = '\0';

  /* make select_list */
  att = (DB_ATTRIBUTE *) db_get_attributes_force (class_mop);
  while (att != NULL)
    {
      /* check if type is varchar or lob or json. */
      dom = db_attribute_domain (att);
      if (TP_IS_LOB_TYPE (TP_DOMAIN_TYPE (dom)) || TP_DOMAIN_TYPE (dom) == DB_TYPE_JSON ||
      (TP_IS_CHAR_TYPE (TP_DOMAIN_TYPE (dom)) && dom->precision > STATS_MAX_PRECISION))
    {
      /* These types are not gathered for statistics. */
      snprintf (column, 23, "cast (-1 as BIGINT), ");
    }
      else
    {
      /* make column */
      snprintf (column, DB_MAX_IDENTIFIER_LENGTH + 20, "count(distinct [%s]), ", db_attribute_name (att));
    }

      /* alloc memory */
      if (strlen (select_list) + strlen (column) > buf_size)
    {
      buf_size += 1024;
      select_buf = (char *) realloc (select_list, sizeof (char) * buf_size);
      if (select_buf == NULL)
        {
          goto end;
        }
      else
        {
          select_list = select_buf;
        }
    }

      /* concat column */
      strcat (select_list, column);

      /* set column id */
      att_ndv[i++].id = db_attribute_id (att);

      /* advance to next attribute */
      att = db_attribute_next (att);
    }

  /* add "count(*)" */
  if (strlen (select_list) + strlen ("count(*)") > buf_size)
    {
      buf_size += 10;
      select_list = (char *) realloc (select_list, sizeof (char) * buf_size);
      if (select_list == NULL)
    {
      goto end;
    }
    }
  att_ndv[i].id = -1;
  strcat (select_list, "count(*)");

  return select_list;

end:
  if (select_list)
    {
      free_and_init (select_list);
    }
  return NULL;
}