Skip to content

File px_scan_index_leaf_slot_walker.cpp

File List > cubrid > src > query > parallel > px_scan > index > px_scan_index_leaf_slot_walker.cpp

Go to the documentation of this file

/*
 *
 * Copyright 2016 CUBRID Corporation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

/* px_scan_index_leaf_slot_walker.cpp — leaf-page slot loop + range/filter + OID gather + process_oid. */

#include "px_scan_index_leaf_slot_walker.hpp"

#include "px_scan_index_overflow_drain_fsm.hpp"
#include "px_scan_input_handler_index.hpp"

#include "btree.h"
#include "btree_load.h"
#include "dbtype.h"
#include "error_code.h"
#include "error_manager.h"
#include "fetch.h"
#include "heap_file.h"
#include "memory_alloc.h"
#include "object_primitive.h"
#include "page_buffer.h"
#include "query_evaluator.h"
#include "scan_manager.h"
#include "slotted_page.h"
#include "storage_common.h"

#include <vector>

// XXX: SHOULD BE THE LAST INCLUDE HEADER
#include "memory_wrapper.hpp"

namespace parallel_index_scan
{
  namespace
  {
    struct leaf_collect_helper
    {
      std::vector<OID> *oid_vec;
      MVCC_SNAPSHOT *snapshot;
    };
  }

  leaf_slot_walker::leaf_slot_walker ()
    : m_scan_id (nullptr),
      m_vd (nullptr),
      m_btid_int (nullptr),
      m_input_handler (nullptr),
      m_drain_fsm (nullptr),
      m_page (nullptr),
      m_num_keys (0),
      m_current_slot (1),
      m_current_range_idx (0),
      m_is_covering (false),
      m_use_desc_index (false),
      m_slot_key_valid (false),
      m_slot_clear_key (false)
  {
    db_make_null (&m_slot_key);
  }

  void
  leaf_slot_walker::wire (SCAN_ID *scan_id, val_descr *vd, parallel_scan::input_handler_index *handler,
              overflow_drain_fsm *fsm)
  {
    m_scan_id = scan_id;
    m_vd = vd;
    m_input_handler = handler;
    m_drain_fsm = fsm;

    INDX_INFO *indx_info = (scan_id != nullptr) ? scan_id->s.isid.indx_info : nullptr;
    m_is_covering = (indx_info != nullptr && indx_info->coverage != 0);
    m_use_desc_index = (indx_info != nullptr && indx_info->use_desc_index != 0);

    /* m_btid_int set lazily on first set_page / from FSM late-joiner entry. */
    m_btid_int = nullptr;
  }

  void
  leaf_slot_walker::unwire ()
  {
    m_scan_id = nullptr;
    m_vd = nullptr;
    m_btid_int = nullptr;
    m_input_handler = nullptr;
    m_drain_fsm = nullptr;
  }

  void
  leaf_slot_walker::cleanup_on_reset (THREAD_ENTRY *thread_p)
  {
    if (m_page != nullptr)
      {
    pgbuf_unfix (thread_p, m_page);
    m_page = nullptr;
      }
    if (m_slot_key_valid && m_slot_clear_key)
      {
    pr_clear_value (&m_slot_key);
      }
    m_slot_key_valid = false;
    m_slot_clear_key = false;
  }

  int
  leaf_slot_walker::set_page (THREAD_ENTRY *thread_p, PAGE_PTR page, INT16 slot_hint)
  {
    assert (page != nullptr);

    if (m_input_handler != nullptr && m_btid_int == nullptr)
      {
    m_btid_int = m_input_handler->get_btid_int ();
      }

    if (m_page != nullptr)
      {
    pgbuf_unfix (thread_p, m_page);
    m_page = nullptr;
      }

    if (m_slot_key_valid && m_slot_clear_key)
      {
    pr_clear_value (&m_slot_key);
      }
    m_slot_key_valid = false;
    m_slot_clear_key = false;

    m_page = page;
    m_num_keys = btree_node_number_of_keys (thread_p, m_page);

    /* slot_hint > m_num_keys (BTREE_KEY_BIGGER) keeps loop entry-guard false → immediate leaf-chain advance. */
    if (slot_hint != NULL_SLOTID && slot_hint >= 1)
      {
    m_current_slot = slot_hint;
      }
    else
      {
    m_current_slot = m_use_desc_index ? m_num_keys : 1;
      }

    /* m_current_range_idx: only set_range_idx may reset it, and only on fetch's descent branch (range_idx >= 0). */
    return NO_ERROR;
  }

  /* mirrors btree_apply_key_range_and_filter on storage-order keys (part_key_desc swap done in convert_all_key_ranges). */
  int
  leaf_slot_walker::check_key_in_range (DB_VALUE *key, bool *in_range, bool *past_upper, int *matched_range_idx)
  {
    *in_range = false;
    *past_upper = false;
    if (matched_range_idx)
      {
    *matched_range_idx = -1;
      }

    TP_DOMAIN *key_domain = m_btid_int->key_type;
    key_val_range *ranges = m_input_handler->get_key_val_ranges ();
    int num_ranges = m_input_handler->get_num_key_ranges ();
    const bool use_desc_index = m_use_desc_index;
    /* gap-eager trigger compares post-advance m_current_range_idx against this entry snapshot — strictly thread-local, no mutex needed. */
    int entry_range_idx = m_current_range_idx;

    /* index→walk order: btree_compare_key already gives index order (per-col flip via dom_is_desc[0]); negate when walking reverse. mirrors btree.c:16515-16518. */
    auto post_flip = [use_desc_index] (DB_VALUE_COMPARE_RESULT c)
    {
      if (!use_desc_index || c == DB_EQ || c == DB_UNK)
    {
      return c;
    }
      return (c == DB_GT) ? DB_LT : (c == DB_LT ? DB_GT : c);
    };

    /* Keys arrive in B-tree storage order; iterate ranges forward (post-flip handles per-column DESC). */
    for (int i = m_current_range_idx; i < num_ranges; i++)
      {
    key_val_range *kvr = &ranges[i];

    if (kvr->range == NA_NA)
      {
        continue;
      }

    if (kvr->range == INF_INF)
      {
        *in_range = true;
        return NO_ERROR;
      }

    DB_VALUE_COMPARE_RESULT c;
    int start_col = 0;

    bool lower_ok = true;
    switch (kvr->range)
      {
      case GE_LE:
      case GE_LT:
      case GE_INF:
        if (!DB_IS_NULL (&kvr->key1))
          {
        start_col = 0;
        c = btree_compare_key (key, &kvr->key1, key_domain, 1, 1, &start_col);
        if (c == DB_UNK)
          {
            return ER_FAILED;
          }
        c = post_flip (c);
        if (c == DB_LT)
          {
            lower_ok = false;
          }
          }
        break;
      case GT_LE:
      case GT_LT:
      case GT_INF:
        if (!DB_IS_NULL (&kvr->key1))
          {
        start_col = 0;
        c = btree_compare_key (key, &kvr->key1, key_domain, 1, 1, &start_col);
        if (c == DB_UNK)
          {
            return ER_FAILED;
          }
        c = post_flip (c);
        if (c == DB_LT || c == DB_EQ)
          {
            lower_ok = false;
          }
          }
        break;
      case INF_LE:
      case INF_LT:
      case INF_INF:
        break;
      case EQ_NA:
        if (!DB_IS_NULL (&kvr->key1))
          {
        start_col = 0;
        c = btree_compare_key (key, &kvr->key1, key_domain, 1, 1, &start_col);
        if (c == DB_UNK)
          {
            return ER_FAILED;
          }
        c = post_flip (c);
        if (c == DB_LT)
          {
            lower_ok = false;
          }
          }
        break;
      default:
        break;
      }

    if (!lower_ok)
      {
        return NO_ERROR;
      }

    bool upper_ok = true;
    switch (kvr->range)
      {
      case GE_LE:
      case GT_LE:
      case INF_LE:
        if (!DB_IS_NULL (&kvr->key2))
          {
        start_col = 0;
        c = btree_compare_key (&kvr->key2, key, key_domain, 1, 1, &start_col);
        if (c == DB_UNK)
          {
            return ER_FAILED;
          }
        c = post_flip (c);
        if (c == DB_LT)
          {
            upper_ok = false;
            m_current_range_idx = i + 1;
          }
          }
        break;
      case GE_LT:
      case GT_LT:
      case INF_LT:
        if (!DB_IS_NULL (&kvr->key2))
          {
        start_col = 0;
        c = btree_compare_key (&kvr->key2, key, key_domain, 1, 1, &start_col);
        if (c == DB_UNK)
          {
            return ER_FAILED;
          }
        c = post_flip (c);
        if (c == DB_LT || c == DB_EQ)
          {
            upper_ok = false;
            m_current_range_idx = i + 1;
          }
          }
        break;
      case GE_INF:
      case GT_INF:
      case INF_INF:
        break;
      case EQ_NA:
        if (!DB_IS_NULL (&kvr->key1))
          {
        start_col = 0;
        c = btree_compare_key (key, &kvr->key1, key_domain, 1, 1, &start_col);
        if (c == DB_UNK)
          {
            return ER_FAILED;
          }
        c = post_flip (c);
        if (c != DB_EQ)
          {
            upper_ok = false;
            if (c == DB_GT)
              {
            m_current_range_idx = i + 1;
              }
          }
          }
        break;
      default:
        break;
      }

    if (upper_ok)
      {
        *in_range = true;
        if (matched_range_idx)
          {
        *matched_range_idx = i;
          }
        return NO_ERROR;
      }

    /* (vpid, range_idx) unit: stop+signal on mid-leaf advance; this worker handles only entry_range_idx slots, next range belongs to the descent worker. */
    if (m_current_range_idx > entry_range_idx)
      {
        *past_upper = true;
        return NO_ERROR;
      }
      }

    *past_upper = true;
    return NO_ERROR;
  }

  /* MVCC pre-filter; without it filtered-index updated versions leak into heap_get_visible_version. */
  int
  leaf_slot_walker::collect_oid_callback (THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record,
                      char *object_ptr, OID *oid, OID *class_oid,
                      BTREE_MVCC_INFO *mvcc_info, bool *stop, void *args)
  {
    auto *helper = static_cast<leaf_collect_helper *> (args);

    if (helper->snapshot != nullptr)
      {
    MVCC_REC_HEADER mvcc_header;
    btree_mvcc_info_to_heap_mvcc_header (mvcc_info, &mvcc_header);
    if (helper->snapshot->snapshot_fnc (thread_p, &mvcc_header, helper->snapshot) != SNAPSHOT_SATISFIED)
      {
        return NO_ERROR;
      }
      }

    helper->oid_vec->push_back (*oid);
    return NO_ERROR;
  }

  SCAN_CODE
  leaf_slot_walker::process_oid (THREAD_ENTRY *thread_p, OID *oid)
  {
    INDX_SCAN_ID *isidp = &m_scan_id->s.isid;

    if (m_is_covering)
      {
    /* MVCC pre-filtered in collect_oid_callback. */
    HEAP_CACHE_ATTRINFO *attr_info = nullptr;
    REGU_VARIABLE_LIST regu_list = nullptr;

    if (isidp->rest_attrs.num_attrs > 0)
      {
        attr_info = isidp->rest_attrs.attr_cache;
        regu_list = isidp->rest_regu_list;
      }
    else if (isidp->pred_attrs.num_attrs > 0)
      {
        attr_info = isidp->pred_attrs.attr_cache;
        regu_list = isidp->scan_pred.regu_list;
      }

    if (attr_info != nullptr)
      {
        int read_err = btree_attrinfo_read_dbvalues (thread_p, &m_slot_key, nullptr,
               isidp->bt_attr_ids, isidp->bt_num_attrs, attr_info,
               isidp->indx_cov.func_index_col_id, nullptr);
        if (read_err != NO_ERROR)
          {
        return S_ERROR;
          }
      }

    if (isidp->scan_pred.pr_eval_fnc != nullptr && isidp->scan_pred.pred_expr != nullptr)
      {
        DB_LOGICAL ev_res = (*isidp->scan_pred.pr_eval_fnc) (thread_p, isidp->scan_pred.pred_expr,
                m_vd, oid);
        ev_res = update_logical_result (thread_p, ev_res, (int *) &m_scan_id->qualification);
        if (ev_res != V_TRUE)
          {
        if (ev_res == V_ERROR)
          {
            return S_ERROR;
          }
        return S_END;  /* skip */
          }
      }

    m_scan_id->scan_stats.data_qualified_rows++;
    m_scan_id->scan_stats.qualified_rows++;

    if (regu_list != nullptr && m_scan_id->val_list != nullptr)
      {
        if (fetch_val_list (thread_p, regu_list, m_vd, &isidp->cls_oid, oid, nullptr, PEEK) != NO_ERROR)
          {
        return S_ERROR;
          }
      }

    return S_SUCCESS;
      }

    /* Non-covering index path: read values from heap record */
    RECDES heap_recdes = RECDES_INITIALIZER;
    if (m_scan_id->fixed == false)
      {
    heap_recdes.data = nullptr;
      }

    SCAN_CODE sp_scan = heap_get_visible_version (thread_p, oid, nullptr, &heap_recdes,
            &isidp->scan_cache, m_scan_id->fixed, NULL_CHN);
    if (sp_scan == S_SNAPSHOT_NOT_SATISFIED || sp_scan == S_DOESNT_EXIST)
      {
    return S_END;  /* skip this OID */
      }
    if (sp_scan == S_ERROR)
      {
    if (er_errid () == ER_HEAP_UNKNOWN_OBJECT)
      {
        er_clear ();
        return S_END;  /* skip */
      }
    return S_ERROR;
      }

    if (isidp->scan_pred.pr_eval_fnc != nullptr && isidp->scan_pred.pred_expr != nullptr)
      {
    FILTER_INFO data_filter;
    memset (&data_filter, 0, sizeof (data_filter));
    data_filter.scan_pred = &isidp->scan_pred;
    data_filter.scan_attrs = &isidp->pred_attrs;
    data_filter.val_list = m_scan_id->val_list;
    data_filter.val_descr = m_vd;
    data_filter.class_oid = &isidp->cls_oid;

    DB_LOGICAL ev_res = eval_data_filter (thread_p, oid, &heap_recdes, &isidp->scan_cache, &data_filter);
    ev_res = update_logical_result (thread_p, ev_res, (int *) &m_scan_id->qualification);
    if (ev_res != V_TRUE)
      {
        if (ev_res == V_ERROR)
          {
        return S_ERROR;
          }
        return S_END;  /* skip */
      }
      }

    m_scan_id->scan_stats.data_qualified_rows++;
    m_scan_id->scan_stats.qualified_rows++;

    if (isidp->rest_regu_list != nullptr)
      {
    if (heap_attrinfo_read_dbvalues (thread_p, oid, &heap_recdes, isidp->rest_attrs.attr_cache) != NO_ERROR)
      {
        return S_ERROR;
      }

    if (m_scan_id->val_list != nullptr)
      {
        if (fetch_val_list (thread_p, isidp->rest_regu_list, m_vd, &isidp->cls_oid, oid,
                nullptr, PEEK) != NO_ERROR)
          {
        return S_ERROR;
          }
      }
      }

    return S_SUCCESS;
  }

  /* Walk remaining slots on m_page until a row qualifies (delegates OID drain to FSM). */
  SCAN_CODE
  leaf_slot_walker::scan_next_slot (THREAD_ENTRY *thread_p)
  {
    INDX_SCAN_ID *isidp = &m_scan_id->s.isid;

    /* Clear previous slot's key if any. */
    if (m_slot_key_valid && m_slot_clear_key)
      {
    pr_clear_value (&m_slot_key);
      }
    m_slot_key_valid = false;
    m_slot_clear_key = false;

    while (m_use_desc_index ? (m_current_slot >= 1) : (m_current_slot <= m_num_keys))
      {
    RECDES rec;
    rec.data = nullptr;
    rec.area_size = -1;

    if (spage_get_record (thread_p, m_page, m_current_slot, &rec, PEEK) != S_SUCCESS)
      {
        m_use_desc_index ? m_current_slot-- : m_current_slot++;
        continue;
      }

    /* fence keys duplicate adjacent-leaf keys; counting them double-inflates aggregate / group-by. */
    if (btree_leaf_record_is_fence (&rec))
      {
        m_use_desc_index ? m_current_slot-- : m_current_slot++;
        continue;
      }

    DB_VALUE key;
    db_make_null (&key);
    LEAF_REC leaf_rec_info;
    bool clear_key = false;
    int offset = 0;

    int rerr = btree_read_record (thread_p, m_btid_int, m_page, &rec, &key,
                      &leaf_rec_info, BTREE_LEAF_NODE,
                      &clear_key, &offset, COPY, nullptr);
    m_use_desc_index ? m_current_slot-- : m_current_slot++;

    if (rerr != NO_ERROR)
      {
        if (clear_key)
          {
        pr_clear_value (&key);
          }
        if (m_page != nullptr)
          {
        pgbuf_unfix (thread_p, m_page);
        m_page = nullptr;
          }
        m_input_handler->signal_chain_ended (m_current_range_idx);
        return S_ERROR;
      }

    m_scan_id->scan_stats.read_keys++;

    bool in_range = false;
    bool past_upper = false;
    int matched_range_idx = -1;
    int kr_err = check_key_in_range (&key, &in_range, &past_upper, &matched_range_idx);
    if (kr_err != NO_ERROR)
      {
        if (clear_key)
          {
        pr_clear_value (&key);
          }
        if (m_page != nullptr)
          {
        pgbuf_unfix (thread_p, m_page);
        m_page = nullptr;
          }
        m_input_handler->signal_chain_ended (m_current_range_idx);
        return S_ERROR;
      }

    if (!in_range)
      {
        if (clear_key)
          {
        pr_clear_value (&key);
          }
        if (past_upper)
          {
        if (m_page != nullptr)
          {
            pgbuf_unfix (thread_p, m_page);
            m_page = nullptr;
          }
        m_input_handler->signal_chain_ended (m_current_range_idx);
        return S_END;
          }
        continue;
      }

    /* mirrors btree_apply_key_range_and_filter need_to_check_null (btree.c:16549–16614); ISS/ILS gated upstream. */
    if (matched_range_idx >= 0 && DB_VALUE_DOMAIN_TYPE (&key) == DB_TYPE_MIDXKEY)
      {
        key_val_range *kvr = &m_input_handler->get_key_val_ranges ()[matched_range_idx];
        if (kvr->num_index_term > 0)
          {
        DB_MIDXKEY *mkey = db_get_midxkey (&key);
        DB_VALUE ep;
        if (mkey != nullptr
            && pr_midxkey_get_element_nocopy (mkey, kvr->num_index_term - 1, &ep, NULL, NULL) == NO_ERROR)
          {
            if (DB_IS_NULL (&ep))
              {
            bool allow_null = false;
            if (prm_get_bool_value (PRM_ID_ORACLE_STYLE_EMPTY_STRING) && ep.need_clear)
              {
                DB_TYPE etype = DB_VALUE_DOMAIN_TYPE (&ep);
                if (QSTR_IS_ANY_CHAR_OR_BIT (etype) && ep.data.ch.medium.buf != nullptr)
                  {
                allow_null = true;  /* Oracle-style empty string */
                  }
              }
            if (!allow_null)
              {
                if (clear_key)
                  {
                pr_clear_value (&key);
                  }
                continue;
              }
              }
            if (!DB_IS_NULL (&ep) && ep.need_clear)
              {
            pr_clear_value (&ep);
              }
          }
          }
      }

    if (isidp->key_pred.pr_eval_fnc != nullptr && isidp->key_pred.pred_expr != nullptr)
      {
        FILTER_INFO key_filter;
        memset (&key_filter, 0, sizeof (key_filter));
        key_filter.scan_pred = &isidp->key_pred;
        key_filter.scan_attrs = &isidp->key_attrs;
        key_filter.val_list = m_scan_id->val_list;
        key_filter.val_descr = m_vd;
        key_filter.class_oid = &isidp->cls_oid;
        key_filter.btree_attr_ids = isidp->bt_attr_ids;
        key_filter.num_vstr_ptr = &isidp->num_vstr;
        key_filter.vstr_ids = isidp->vstr_ids;
        key_filter.btree_num_attrs = isidp->bt_num_attrs;
        key_filter.func_idx_col_id = isidp->indx_info->func_idx_col_id;

        DB_LOGICAL ev_res = eval_key_filter (thread_p, &key, 0, nullptr, &key_filter);
        if (ev_res != V_TRUE)
          {
        if (clear_key)
          {
            pr_clear_value (&key);
          }
        if (ev_res == V_ERROR)
          {
            if (m_page != nullptr)
              {
            pgbuf_unfix (thread_p, m_page);
            m_page = nullptr;
              }
            m_input_handler->signal_chain_ended (m_current_range_idx);
            return S_ERROR;
          }
        continue;
          }
      }

    /* mirrors btree.c:25414 — increment only after key filter passes. */
    m_scan_id->scan_stats.qualified_keys++;

    /* Gather leaf-resident OIDs only; overflow pulled lazily after leaf-OID buffer drains. */
    std::vector<OID> leaf_oids;
    leaf_collect_helper oid_helper;
    oid_helper.oid_vec = &leaf_oids;
    oid_helper.snapshot = isidp->scan_cache.mvcc_snapshot;

    bool record_stop = false;
    int proc_err = btree_record_process_objects (thread_p, m_btid_int, BTREE_LEAF_NODE,
               &rec, offset, &record_stop,
               collect_oid_callback, &oid_helper);
    if (proc_err != NO_ERROR)
      {
        if (clear_key)
          {
        pr_clear_value (&key);
          }
        if (m_page != nullptr)
          {
        pgbuf_unfix (thread_p, m_page);
        m_page = nullptr;
          }
        m_input_handler->signal_chain_ended (m_current_range_idx);
        return S_ERROR;
      }

    m_scan_id->scan_stats.key_qualified_rows += leaf_oids.size ();
    m_scan_id->scan_stats.read_rows += leaf_oids.size ();

    /* Save the key for process_oid (covering-index PEEK reads via btree_attrinfo_read_dbvalues). */
    m_slot_key = key;
    m_slot_key_valid = true;
    m_slot_clear_key = clear_key;

    /* Hand the OID buffer + pending overflow chain head to the FSM; try_publish happens after leaf-OID drain. */
    m_drain_fsm->begin_leaf_drain (std::move (leaf_oids), leaf_rec_info.ovfl);

    SCAN_CODE drain_sc = m_drain_fsm->drain_next_oid (thread_p);
    if (drain_sc != S_END)
      {
        return drain_sc;
      }
    /* S_END from drain — advance to next slot. */
      }

    /* Page exhausted; chain-walk naturally — do NOT signal chain_ended (leaf_ended stays false so next fetch fixes m_current_leaf_vpid). */
    if (m_page != nullptr)
      {
    pgbuf_unfix (thread_p, m_page);
    m_page = nullptr;
      }
    return S_END;
  }
}