Skip to content

File histogram_builder.cpp

File List > cubrid > src > optimizer > histogram > histogram_builder.cpp

Go to the documentation of this file

/*
 * Copyright 2008 Search Solution Corporation
 * Copyright 2016 CUBRID Corporation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

/*
 * histogram_builder.cpp - Histogram builder implementation
 */

#include "histogram_builder.hpp"
#include "histogram_reader.hpp"
#include <cstring>
#include "object_domain.h"

namespace hist
{
  // ---------- endian writers for buffer (explicit specializations before use) ----------
  template<>
  void HistogramBuilder::write<std::int32_t> (char *&dest, std::int32_t v)
  {
    OR_PUT_INT (dest, v);
    dest += OR_INT64_SIZE; // keep 8-byte slot for data_hi (4B value + 4B padding)
  }

  template<>
  void HistogramBuilder::write<std::int64_t> (char *&dest, std::int64_t v)
  {
    OR_PUT_INT64 (dest, &v);
    dest += OR_INT64_SIZE;
  }

  template<>
  void HistogramBuilder::write<std::uint64_t> (char *&dest, std::uint64_t v)
  {
    OR_PUT_INT64 (dest, reinterpret_cast<const std::int64_t *> (&v));
    dest += OR_INT64_SIZE;
  }

  template<>
  void HistogramBuilder::write<double> (char *&dest, double v)
  {
    OR_PUT_DOUBLE (dest, v);
    dest += OR_DOUBLE_SIZE;
  }

  template<>
  void HistogramBuilder::write<std::string> (char *&dest, std::string v)
  {
    // write length and offset or inline data
    OR_PUT_INT (dest, v.length());
    dest += OR_INT_SIZE;
    if (v.length() <= 4)
      {
    // inline data
    memcpy (dest, v.data(), v.length());
      }
    else
      {
    // str blob data (length + pointer)
    OR_PUT_INT (dest, cur_str_off_);
    cur_str_off_ += v.length();
      }
    dest += OR_INT_SIZE;
  }

  void HistogramBuilder::add (HistogramTypes data_hi, std::int64_t cumulative, std::int64_t approx_ndv)
  {
    assert (cumulative >= 0);
    buckets_.push_back (Bucket{data_hi, cumulative, approx_ndv});
  }

  char *HistogramBuilder::build (THREAD_ENTRY *thread_p, DB_TYPE type, int *histogram_total_length)
  {
    /* ---- precompute record sizes  ---- */
    const std::uint32_t bucket_area_size = hist::BUCKET_RECORD_SIZE * buckets_.size();

    /* ---- header ---- */
    HeaderV1 H{};
    std::memcpy (H.magic, "HST1", 4);
    H.version  = htonl (1);
    H.nbuckets = htonl (static_cast<std::uint32_t> (buckets_.size()));
    H.type = htonl (static_cast<std::uint32_t> (type));
    H.str_size = 0;
    H.total_size = 0;

    /* ---- records ---- */
    char *buffer = static_cast<char *> (db_private_alloc (thread_p, sizeof (HeaderV1) + bucket_area_size));
    if (buffer == NULL)
      {
    return NULL;
      }
    std::memset (buffer, 0, sizeof (HeaderV1) + bucket_area_size); // must be initialized to zero
    char *end_buffer = buffer + sizeof (HeaderV1) + bucket_area_size;
    char *buffer_ptr = buffer + sizeof (HeaderV1);
    char *str_blob_ptr;

    /* ---- index-based loop for safer access ---- */
    for (size_t i = 0; i < buckets_.size(); ++i)
      {
    const Bucket b = buckets_[i];
    switch (type)
      {
      /* ---- int64_t value ---- */
      case DB_TYPE_INTEGER:
      case DB_TYPE_SHORT:
      case DB_TYPE_BIGINT:
      {
        if (std::holds_alternative<std::int64_t> (b.data_hi))
          {
        // ---- int64_t value to int32_t value ----
        std::int64_t val = std::get<std::int64_t> (b.data_hi);
        write<std::int64_t> (buffer_ptr, static_cast<std::int64_t> (val));
          }
        else
          {
        db_private_free (thread_p, buffer);
        assert (false);
        return NULL;
          }
      }
      break;
      /* ---- double value ---- */
      case DB_TYPE_DOUBLE:
      case DB_TYPE_FLOAT:
      case DB_TYPE_NUMERIC:
      {
        if (std::holds_alternative<double> (b.data_hi))
          {
        write<double> (buffer_ptr, std::get<double> (b.data_hi));
          }
        else
          {
        db_private_free (thread_p, buffer);
        assert (false);
        return NULL;
          }
      }
      break;
      /* ---- string value ---- */
      case DB_TYPE_STRING:
      case DB_TYPE_BIT:
      case DB_TYPE_VARBIT:
      case DB_TYPE_CHAR:
      {
        if (std::holds_alternative<std::string> (b.data_hi))
          {
        write<std::string> (buffer_ptr, std::get<std::string> (b.data_hi));
          }
        else if (std::holds_alternative<std::string_view> (b.data_hi))
          {
        std::string_view sv = std::get<std::string_view> (b.data_hi);
        write<std::string> (buffer_ptr, std::string (sv));
          }
        else
          {
        db_private_free (thread_p, buffer);
        assert (false);
        return NULL;
          }
      }
      break;
      /* ---- uint64_t value ---- */
      case DB_TYPE_TIME:
      case DB_TYPE_TIMESTAMP:
      case DB_TYPE_TIMESTAMPLTZ:
      case DB_TYPE_DATE:
      case DB_TYPE_TIMESTAMPTZ:
      case DB_TYPE_DATETIME:
      {
        if (std::holds_alternative<std::uint64_t> (b.data_hi))
          {
        write<std::uint64_t> (buffer_ptr, std::get<std::uint64_t> (b.data_hi));
          }
        else
          {
        db_private_free (thread_p, buffer);
        assert (false);
        return NULL;
          }
      }
      break;
      default:
        /* never reach here */
        db_private_free (thread_p, buffer);
        assert (false);
        return NULL;
      }
    write<std::int64_t> (buffer_ptr, b.cumulative);
    write<std::int64_t> (buffer_ptr, b.approx_ndv);
      }

    assert (buffer_ptr == end_buffer);

    /* ---- build string blob ---- */
    if (cur_str_off_ > 0)
      {
    str_blob_ptr = static_cast<char *> (db_private_alloc (thread_p, cur_str_off_));
    if (str_blob_ptr == NULL)
      {
        db_private_free (thread_p, buffer);
        return NULL;
      }
    char *cur_str_blob_ptr = str_blob_ptr;
    std::memset (str_blob_ptr, 0, cur_str_off_); // must be initialized to zero
    char *str_blob_ptr_end = str_blob_ptr + cur_str_off_;
    for (const auto &b : buckets_)
      {
        std::string str_val;
        if (std::holds_alternative<std::string> (b.data_hi))
          {
        str_val = std::get<std::string> (b.data_hi);
          }
        else if (std::holds_alternative<std::string_view> (b.data_hi))
          {
        str_val = std::string (std::get<std::string_view> (b.data_hi));
          }
        else
          {
        db_private_free (thread_p, buffer);
        db_private_free (thread_p, str_blob_ptr);
        assert (false);
        return NULL;
          }

        if (str_val.length() > 4)
          {
        memcpy (cur_str_blob_ptr, str_val.data(), str_val.length());
        cur_str_blob_ptr += str_val.length();
          }
      }
    /* ---- write string ---- */
    assert (cur_str_blob_ptr == str_blob_ptr_end);
    buffer = static_cast<char *> (db_private_realloc (thread_p, buffer,
                      sizeof (HeaderV1) + bucket_area_size + cur_str_off_));
    if (buffer == NULL)
      {
        db_private_free (thread_p, str_blob_ptr);
        return NULL;
      }
    memcpy (buffer + sizeof (HeaderV1) + bucket_area_size, str_blob_ptr, cur_str_off_);
    db_private_free (thread_p, str_blob_ptr);
      }

    /* ---- write header ---- */
    H.str_size = htonl (static_cast<std::uint32_t> (cur_str_off_));
    H.total_size = htonl (static_cast<std::uint32_t> (sizeof (HeaderV1) + bucket_area_size + cur_str_off_));
    memcpy (buffer, &H, sizeof (HeaderV1));
    *histogram_total_length = sizeof (HeaderV1) + bucket_area_size + cur_str_off_;

    return buffer;
  }
} // namespace hist