CUBRID Engine  latest
btree.c
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Search Solution Corporation
3  * Copyright 2016 CUBRID Corporation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 /*
20  * btree.c - B+-Tree manager
21  */
22 
23 #ident "$Id$"
24 
25 #include "btree.h"
26 
27 #include "btree_load.h"
28 #include "config.h"
29 #include "db_value_printer.hpp"
30 #include "file_manager.h"
31 #include "slotted_page.h"
32 #include "log_append.hpp"
33 #include "log_manager.h"
34 #include "overflow_file.h"
35 #include "xserver_interface.h"
36 #include "scan_manager.h"
37 #include "fetch.h"
38 #include "locator_sr.h"
39 #include "network_interface_sr.h" /* TODO: remove; used for xcallback_console_print */
40 #include "utility.h"
41 #include "transform.h"
42 #include "partition_sr.h"
43 #include "porting_inline.hpp"
44 #include "query_executor.h"
45 #include "query_opfunc.h"
46 #include "object_primitive.h"
47 #include "object_representation.h"
48 #include "perf_monitor.h"
49 #include "regu_var.hpp"
50 #include "fault_injection.h"
51 #include "dbtype.h"
52 #include "thread_manager.hpp"
53 
54 #include <assert.h>
55 #include <algorithm>
56 #include <cinttypes>
57 #include <stdlib.h>
58 #include <string.h>
59 
60 #define BTREE_HEALTH_CHECK
61 
62 #define BTREE_DEBUG_DUMP_SIMPLE 0x0001 /* simple message in SMO */
63 #define BTREE_DEBUG_DUMP_FULL 0x0002 /* full dump in insert or delete */
64 
65 #define BTREE_DEBUG_HEALTH_SIMPLE 0x0010 /* simple health check in SMO */
66 #define BTREE_DEBUG_HEALTH_FULL 0x0020 /* full health check (traverse all slot in page) */
67 
68 #define BTREE_DEBUG_TEST_SPLIT 0x0100 /* full split test */
69 
70 #define BTREE_SPLIT_LOWER_BOUND 0.20f
71 #define BTREE_SPLIT_UPPER_BOUND (1.0f - BTREE_SPLIT_LOWER_BOUND)
72 
73 #define BTREE_SPLIT_MIN_PIVOT 0.05f
74 #define BTREE_SPLIT_MAX_PIVOT (1.0f - BTREE_SPLIT_MIN_PIVOT)
75 
76 #define BTREE_SPLIT_DEFAULT_PIVOT 0.5f
77 #define DISK_PAGE_BITS (DB_PAGESIZE * CHAR_BIT) /* Num of bits per page */
78 
79 #define BTREE_NODE_MAX_SPLIT_SIZE(thread_p, page_ptr) \
80  (db_page_size() - spage_header_size() - spage_get_space_for_record(thread_p, (page_ptr), HEADER))
81 
82 #define OID_MSG_BUF_SIZE 64
83 
84 #define MIN_KEY_SIZE DB_ALIGN (1, BTREE_MAX_ALIGN)
85 #define MIN_LEAF_REC_SIZE (OR_OID_SIZE + MIN_KEY_SIZE)
86 #define MAX_LEAF_REC_NUM (IO_MAX_PAGE_SIZE / MIN_LEAF_REC_SIZE)
87 
88 #define MAX_MERGE_ALIGN_WASTE \
89  ((DB_PAGESIZE/MIN_LEAF_REC_SIZE) * (BTREE_MAX_ALIGN - 1))
90 
91 /* Merge two nodes when a page containing both is this empty. */
92 #define CAN_MERGE_WHEN_EMPTY \
93  (MAX (DB_PAGESIZE * 0.33, MAX_MERGE_ALIGN_WASTE * 1.3))
94 /* Force merging two node when a page containing bot is this empty. */
95 #define FORCE_MERGE_WHEN_EMPTY \
96  (MAX (DB_PAGESIZE * 0.66, MAX_MERGE_ALIGN_WASTE * 1.3))
97 
98 /*
99  * Page header information related defines
100  */
101 #define NOT_FOUND -1
102 
103 /* B'0001 0000 0000 0000' */
104 #define BTREE_LEAF_RECORD_FENCE ((short) 0x1000)
105 /* B'0010 0000 0000 0000' */
106 #define BTREE_LEAF_RECORD_OVERFLOW_OIDS ((short) 0x2000)
107 /* B'0100 0000 0000 0000' */
108 #define BTREE_LEAF_RECORD_OVERFLOW_KEY ((short) 0x4000)
109 /* B'1000 0000 0000 0000' */
110 #define BTREE_LEAF_RECORD_CLASS_OID ((short) 0x8000)
111 /* B'1111 0000 0000 0000' */
112 #define BTREE_LEAF_RECORD_MASK ((short) 0xF000)
113 
114 /* B'0100 0000 0000 0000' */
115 #define BTREE_OID_HAS_MVCC_INSID ((short) 0x4000)
116 /* B'1000 0000 0000 0000' */
117 #define BTREE_OID_HAS_MVCC_DELID ((short) 0x8000)
118 /* B'1100 0000 0000 0000' */
119 #define BTREE_OID_MVCC_FLAGS_MASK ((short) 0xC000)
120 
121 #define BTREE_OID_HAS_MVCC_INSID_AND_DELID \
122  (BTREE_OID_HAS_MVCC_INSID | BTREE_OID_HAS_MVCC_DELID)
123 
124 /* The maximum number of OID's in a page */
125 #define BTREE_MAX_OID_COUNT IO_MAX_PAGE_SIZE / OR_OID_SIZE
126 
127 /* Clear MVCC flags from object OID */
128 #define BTREE_OID_CLEAR_MVCC_FLAGS(oid_ptr) \
129  ((oid_ptr)->volid &= ~BTREE_OID_MVCC_FLAGS_MASK)
130 /* Clear record flags from object OID */
131 #define BTREE_OID_CLEAR_RECORD_FLAGS(oid_ptr) \
132  ((oid_ptr)->slotid &= ~BTREE_LEAF_RECORD_MASK)
133 /* Clear all flags (mvcc & record) from object OID. */
134 #define BTREE_OID_CLEAR_ALL_FLAGS(oid_ptr) \
135  do \
136  { \
137  BTREE_OID_CLEAR_MVCC_FLAGS (oid_ptr); \
138  BTREE_OID_CLEAR_RECORD_FLAGS (oid_ptr); \
139  } \
140  while (0)
141 /* Check if MVCC flags are set into b-tree object OID. */
142 #define BTREE_OID_IS_MVCC_FLAG_SET(oid_ptr, mvcc_flag) \
143  (((oid_ptr)->volid & (mvcc_flag)) == (mvcc_flag))
144 /* Check if record flags are set into b-tree object OID. */
145 #define BTREE_OID_IS_RECORD_FLAG_SET(oid_ptr, mvcc_flag) \
146  (((oid_ptr)->slotid & (mvcc_flag)) == (mvcc_flag))
147 
148 /* Set b-tree flags into an OID. */
149 #define BTREE_OID_SET_MVCC_FLAG(oid_ptr, mvcc_flag) \
150  ((oid_ptr)->volid |= (mvcc_flag))
151 #define BTREE_OID_SET_RECORD_FLAG(oid_ptr, mvcc_flag) \
152  ((oid_ptr)->slotid |= (mvcc_flag))
153 
154 /* Get b-tree flags from an OID. */
155 #define BTREE_OID_GET_MVCC_FLAGS(oid_ptr) \
156  ((oid_ptr)->volid & BTREE_OID_MVCC_FLAGS_MASK)
157 #define BTREE_OID_GET_RECORD_FLAGS(oid_ptr) \
158  ((oid_ptr)->slotid & BTREE_LEAF_RECORD_MASK)
159 
160 /* Check MVCC flags in an b-tree MVCC info. */
161 #define BTREE_MVCC_INFO_HAS_INSID(mvcc_info) \
162  (((mvcc_info)->flags & BTREE_OID_HAS_MVCC_INSID) != 0)
163 #define BTREE_MVCC_INFO_HAS_DELID(mvcc_info) \
164  (((mvcc_info)->flags & BTREE_OID_HAS_MVCC_DELID) != 0)
165 
166 /* CLear MVCC flags in a b-tree MVCC info. */
167 #define BTREE_MVCC_INFO_CLEAR_INSID(mvcc_info) \
168  ((mvcc_info)->flags &= ~BTREE_OID_HAS_MVCC_INSID)
169 #define BTREE_MVCC_INFO_CLEAR_DELID(mvcc_info) \
170  ((mvcc_info)->flags &= ~BTREE_OID_HAS_MVCC_DELID)
171 
172 /* Check if insert MVCCID is valid but not visible to everyone. */
173 #define BTREE_MVCC_INFO_IS_INSID_NOT_ALL_VISIBLE(mvcc_info) \
174  (BTREE_MVCC_INFO_HAS_INSID (mvcc_info) && MVCCID_IS_NOT_ALL_VISIBLE ((mvcc_info)->insert_mvccid))
175 
176 /* Check if delete MVCCID is valid. */
177 #define BTREE_MVCC_INFO_IS_DELID_VALID(mvcc_info) \
178  (BTREE_MVCC_INFO_HAS_DELID (mvcc_info) && (mvcc_info)->delete_mvccid != MVCCID_NULL)
179 
180 /* Insert MVCCID based on b-tree mvcc info. */
181 #define BTREE_MVCC_INFO_INSID(mvcc_info) \
182  (BTREE_MVCC_INFO_HAS_INSID (mvcc_info) ? (mvcc_info)->insert_mvccid : MVCCID_ALL_VISIBLE)
183 
184 /* Delete MVCC based on b-tree mvcc info. */
185 #define BTREE_MVCC_INFO_DELID(mvcc_info) \
186  (BTREE_MVCC_INFO_HAS_DELID (mvcc_info) ? (mvcc_info)->delete_mvccid : MVCCID_NULL)
187 
188 /* Set b-tree MVCC info as if it has fixed size (it includes both insert and
189  * delete MVCCID.
190  */
191 #define BTREE_MVCC_INFO_SET_FIXED_SIZE(mvcc_info) \
192  do \
193  { \
194  if (!BTREE_MVCC_INFO_HAS_INSID (mvcc_info)) \
195  { \
196  (mvcc_info)->insert_mvccid = MVCCID_ALL_VISIBLE; \
197  } \
198  if (!BTREE_MVCC_INFO_HAS_DELID (mvcc_info)) \
199  { \
200  (mvcc_info)->delete_mvccid = MVCCID_NULL; \
201  } \
202  (mvcc_info)->flags = BTREE_OID_HAS_MVCC_INSID_AND_DELID; \
203  } \
204  while (false)
205 
206 /* Clear unnecessary flags from b-tree MVCC info. */
207 #define BTREE_MVCC_INFO_CLEAR_FIXED_SIZE(mvcc_info) \
208  do \
209  { \
210  if (!BTREE_MVCC_INFO_IS_INSID_NOT_ALL_VISIBLE (mvcc_info)) \
211  { \
212  BTREE_MVCC_INFO_CLEAR_INSID(mvcc_info); \
213  } \
214  if (!BTREE_MVCC_INFO_IS_DELID_VALID (mvcc_info)) \
215  { \
216  BTREE_MVCC_INFO_CLEAR_DELID(mvcc_info); \
217  } \
218  } \
219  while (false)
220 
221 /* Set insert MVCCID into b-tree mvcc info. */
222 #define BTREE_MVCC_INFO_SET_INSID(mvcc_info, insid) \
223  do \
224  { \
225  (mvcc_info)->flags |= BTREE_OID_HAS_MVCC_INSID; \
226  (mvcc_info)->insert_mvccid = insid; \
227  } \
228  while (false)
229 
230 /* Set delete MVCCID into b-tree mvcc info. */
231 #define BTREE_MVCC_INFO_SET_DELID(mvcc_info, delid) \
232  do \
233  { \
234  (mvcc_info)->flags |= BTREE_OID_HAS_MVCC_DELID; \
235  (mvcc_info)->delete_mvccid = delid; \
236  } \
237  while (false)
238 
239 /* Get an object OID from a b-tree record. If MVCC is enabled, mvcc flags are
240  * cleared.
241  */
242 #define BTREE_GET_OID(buf, oid_ptr) \
243  do \
244  { \
245  OR_GET_OID (buf, oid_ptr); \
246  BTREE_OID_CLEAR_MVCC_FLAGS (oid_ptr); \
247  } \
248  while (0)
249 
250 /* Initialize OR_BUF to process a b-tree record. */
251 #define BTREE_RECORD_OR_BUF_INIT(buf, btree_rec) \
252  do \
253  { \
254  int size = (btree_rec)->length; \
255  if (btree_leaf_is_flaged (btree_rec, BTREE_LEAF_RECORD_OVERFLOW_OIDS)) \
256  { \
257  size -= DB_ALIGN (DISK_VPID_SIZE, BTREE_MAX_ALIGN); \
258  } \
259  OR_BUF_INIT (buf, (btree_rec)->data, size); \
260  } \
261  while (false)
262 
263 /* Get MVCC size from leaf record flags */
264 /* Size is:
265  * 2 * OR_MVCCID_SIZE if both MVCC flags are set
266  * 0 if no MVCC flags are set
267  * OR_MVCCID_SIZE otherwise (one flag is set).
268  */
269 #define BTREE_GET_MVCC_INFO_SIZE_FROM_FLAGS(mvcc_flags) \
270  (((mvcc_flags) & BTREE_OID_HAS_MVCC_INSID_AND_DELID) == BTREE_OID_HAS_MVCC_INSID_AND_DELID \
271  ? 2 * OR_MVCCID_SIZE : ((mvcc_flags) == 0 ? 0 : OR_MVCCID_SIZE))
272 
273 /* Check if page is a valid b-tree leaf node. Usually called after unfix and
274  * re-fix without validation.
275  */
276 /* TODO: Is this expensive? Can we find a faster way to check it?
277  * (e.g. save last deallocation page LSA - it will be then enough to
278  * compare with an LSA saved before unfixing).
279  */
280 #define BTREE_IS_PAGE_VALID_LEAF(thread_p, page) \
281  ((page) != NULL \
282  && pgbuf_get_page_ptype (thread_p, page) == PAGE_BTREE \
283  && spage_get_slot (page, HEADER) != NULL \
284  && spage_get_slot (page, HEADER)->record_length == sizeof (BTREE_NODE_HEADER) \
285  && (btree_get_node_header (thread_p, page))->node_level == 1)
286 
289 { /* Recovery set of recdes structure */
290  INT16 rec_cnt; /* number of RECDESs stored */
291  INT16 first_slotid; /* first slot id */
292 };
293 
294 typedef enum
295 {
299 
300 typedef enum
301 {
305 
306 typedef enum
307 {
312 
313 /* RECINS_STRUCT - redo b-tree insert recovery structure.
314  */
317 { /* Recovery leaf record oid insertion structure */
318  OID class_oid; /* class oid only in case of unique index */
319  OID oid; /* oid to be inserted to the record */
320  VPID ovfl_vpid; /* Next Overflow pageid */
321  INT16 flags; /* Flags to describe different context of recovered insert object: - oid inserted - is
322  * overflow changed - is new overflow - record type (regular or overflow) - insert OID
323  * mode */
324 };
325 #define RECINS_STRUCT_INITIALIZER \
326  { OID_INITIALIZER, OID_INITIALIZER, VPID_INITIALIZER, 0 }
327 
328 /* Redo recovery of insert delete MVCCID */
329 #define BTID_DOMAIN_CHECK_MAX_SIZE 1024
330 
331 /* Offset of the fields in the Leaf/NonLeaf Record Recovery Log Data */
332 #define OFFS1 0 /* Node Type Offset: Leaf/NonLeaf Information */
333 #define OFFS2 2 /* RECDES Type Offset */
334 #define OFFS3 4 /* RECDES Data Offset */
335 
336 /* for Leaf Page Key Insertions */
337 #define LOFFS1 0 /* Key Len Offset */
338 #define LOFFS2 2 /* Node Type Offset: Leaf/NonLeaf Information */
339 #define LOFFS3 4 /* RECDES Type Offset */
340 #define LOFFS4 6 /* RECDES Data Offset */
341 
342 /* B+tree statistical information environment */
345 {
349  DB_VALUE pkeys_val[BTREE_STATS_PKEYS_NUM]; /* partial key-value */
350 };
351 
352 /* Structure used by btree_range_search to initialize and handle variables
353  * needed throughout the process.
354  */
357 {
358  OID *mem_oid_ptr; /* Pointer to OID memory storage */
359  int pg_oid_cnt; /* The capacity of OID memory storage */
360  int oids_cnt; /* Current count of stored OID's */
361  int oid_size; /* Size of one OID */
362  int cp_oid_cnt; /* The OID count that can be stored in the current step */
363  int rec_oid_cnt; /* The OID count in current record */
364  char *rec_oid_ptr; /* Pointer in record to current OID */
365  bool swap_key_range; /* Swaps key range if true */
366  bool is_key_range_satisfied; /* Does current key satisfy range */
367  bool is_key_filter_satisfied; /* Does current key satisfy filter */
368  bool is_condition_satisfied; /* Does current key satisfy range and filter */
369  RECDES rec; /* Current record */
370  LEAF_REC leaf_pnt; /* Leaf record pointer OID overflows */
371  int offset; /* Offset in record to the first OID */
372  OID class_oid; /* Class identifier for current object */
373  OID inst_oid; /* Current object identifier */
374  BTREE_NODE_TYPE node_type; /* Current node type: leaf or overflow */
375  bool iss_get_first_result_only; /* Index skip scan special case */
376  bool restart_on_first; /* restart after first OID */
377  int CLS_satisfied; /* All conditions are satisfied */
378  OID saved_class_oid; /* Saved class identifier */
379  OID saved_inst_oid; /* Saved object identifier */
380  char oid_space[2 * OR_OID_SIZE]; /* OID buffer to store "last" index key */
381  DB_VALUE prev_key; /* Previous key */
382  bool clear_prev_key; /* Previous key needs clear if true */
383  LOG_LSA prev_leaf_lsa; /* LSA of previous page */
384  LOG_LSA ovfl_page_lsa; /* LSA of overflow page */
385  bool keep_on_copying; /* True when OID storage exceeds it's default maximum size and need to stop current
386  * iteration of range search after this key */
387  OID ck_pseudo_oid; /* Current key pseudo identifier */
388  OID saved_ck_pseudo_oid; /* Saved current key pseudo identifier */
389  OID nk_pseudo_oid; /* Next key pseudo identifier */
390  OID saved_nk_pseudo_oid; /* Saved next key pseudo identifier */
391  OID saved_nk_class_oid; /* Saved class oid for next key */
392 
393  bool end_of_leaf_level; /* True if end of leaf level was reached */
394  bool curr_key_locked; /* Is current key locked */
395  bool next_key_locked; /* Is next key locked */
396  bool current_lock_request; /* Current key needs locking */
397  bool read_prev_key; /* Previous key is read */
398 };
399 
402 {
403  int indexes_count; /* The total of indexes */
404  bool is_all; /* Get all indexes or get a specified index */
405  char *index_name; /* Index name which user specified */
406  OID *class_oids; /* Class oids array */
407  int class_oid_count; /* The count of above oids array */
408  int show_type; /* Show type */
409 };
410 
411 /* BTREE_SEARCH_KEY_HELPER -
412  * Structure usually used to return the result of search key functions.
413  */
416 {
418  {
419  NO_FENCE_KEY = 0,
420  HAS_FENCE_KEY
421  };
422 
423  BTREE_SEARCH result; /* Result of key search. */
424  PGSLOTID slotid; /* Slot ID of found key or slot ID of the biggest key smaller then key (if not found). */
425 
427 };
428 /* BTREE_SEARCH_KEY_HELPER static initializer. */
429 // *INDENT-OFF*
430 #define BTREE_SEARCH_KEY_HELPER_INITIALIZER \
431  { BTREE_KEY_NOTFOUND, NULL_SLOTID, btree_search_key_helper::NO_FENCE_KEY }
432 // *INDENT-ON*
433 
434 /* BTREE_FIND_UNIQUE_HELPER -
435  * Structure used by find unique functions.
436  *
437  * Functions:
438  * btree_key_find_unique_version_oid.
439  * btree_key_find_and_lock_unique.
440  * btree_key_find_and_lock_unique_of_unique.
441  * btree_key_find_and_lock_unique_of_non_unique.
442  */
445 {
446  OID oid; /* OID of found object (if found). */
447  OID match_class_oid; /* Object is only considered if its class OID matches this class OID. */
448  LOCK lock_mode; /* Lock mode for found unique object. */
449  MVCC_SNAPSHOT *snapshot; /* Snapshot used to filter objects not visible. If NULL, objects are not filtered. */
450  bool found_object; /* Set to true if object was found. */
451 
452  PERF_UTIME_TRACKER time_track;
453 
454 #if defined (SERVER_MODE)
455  OID locked_oid; /* Locked object. */
456  OID locked_class_oid; /* Locked object class OID. */
457 #endif /* SERVER_MODE */
458 };
459 /* BTREE_FIND_UNIQUE_HELPER static initializer. */
460 #if defined (SERVER_MODE)
461 #define BTREE_FIND_UNIQUE_HELPER_INITIALIZER \
462  { OID_INITIALIZER, /* oid */ \
463  OID_INITIALIZER, /* match_class_oid */ \
464  NULL_LOCK, /* lock_mode */ \
465  NULL, /* snapshot */ \
466  false, /* found_object */ \
467  PERF_UTIME_TRACKER_INITIALIZER, /* time_track */ \
468  OID_INITIALIZER, /* locked_oid */ \
469  OID_INITIALIZER /* locked_class_oid */ \
470  }
471 #else /* !SERVER_MODE */ /* SA_MODE */
472 #define BTREE_FIND_UNIQUE_HELPER_INITIALIZER \
473  { OID_INITIALIZER, /* oid */ \
474  OID_INITIALIZER, /* match_class_oid */ \
475  NULL_LOCK, /* lock_mode */ \
476  NULL, /* snapshot */ \
477  false, /* found_object */ \
478  PERF_UTIME_TRACKER_INITIALIZER /* time_track */ \
479  }
480 #endif /* !SA_MODE */
481 
482 /* BTREE_REC_SATISFIES_SNAPSHOT_HELPER -
483  * Structure used as helper for btree_record_satisfies_snapshot function.
484  */
487 {
488  MVCC_SNAPSHOT *snapshot; /* Input: MVCC snapshot used to filter objects not visible for current transaction. */
489  OID match_class_oid; /* Object class OID must match this class OID. Can be applied only to unique indexes. */
490  OID *oid_ptr; /* OID buffer used to output OID's of visible objects. */
491  int oid_cnt; /* Visible OID counter. */
492  int oid_capacity; /* OID buffer capacity. */
493 };
494 /* BTREE_REC_SATISFIES_SNAPSHOT_HELPER static initializer. */
495 #define BTREE_REC_SATISFIES_SNAPSHOT_HELPER_INITIALIZER \
496  { NULL /* snapshot */, OID_INITIALIZER /* match_class_oid */, \
497  NULL /* oid_ptr */, 0 /* oid_cnt */, 0 /* oid_capacity */ }
498 
499 /*
500  * btree_search_key_and_apply_functions () argument functions.
501  */
502 
503 /* BTREE_ROOT_WITH_KEY_FUNCTION -
504  * btree_search_key_and_apply_functions internal function called on root page,
505  * before starting to advance on pages.
506  *
507  * Arguments:
508  * thread_p (in) : Thread entry.
509  * btid (in) : B-tree identifier.
510  * btid_int (out) : Output b-tree info.
511  * key (in) : Key value.
512  * is_leaf (out) : Output true if root is leaf node.
513  * key_slotid (out) : Output slotid of key if found, NULL_SLOTID otherwise.
514  * stop (out) : Output true when advancing in b-tree should be
515  * stopped.
516  * restart (out) : Output true when advancing in b-tree must be
517  * restarted starting with root.
518  * other_args (in/out) : Function specific arguments.
519  *
520  * List of functions:
521  * btree_get_root_with_key.
522  * btree_fix_root_for_insert.
523  */
525  PAGE_PTR * root_page, bool * is_leaf, BTREE_SEARCH_KEY_HELPER * search_key,
526  bool * stop, bool * restart, void *other_args);
527 
528 /* BTREE_ADVANCE_WITH_KEY_FUNCTION -
529  * btree_search_key_and_apply_functions internal function called to advance
530  * from root to leaf level of b-tree by following the key. The function can
531  * modify the structure of b-tree, will advance one level at each call and
532  * it will ultimately stop at the leaf page were the key belongs or would
533  * belong if it existed.
534  *
535  * Arguments:
536  * thread_p (in) : Thread entry.
537  * btid_int (int) : B-tree info.
538  * key (in) : Key value.
539  * is_leaf (out) : Output true if root is leaf node.
540  * key_slotid (out) : Output slotid of key if found, NULL_SLOTID otherwise.
541  * stop (out) : Output true when advancing in b-tree should be
542  * stopped.
543  * restart (out) : Output true when advancing in b-tree must be
544  * restarted starting with root.
545  * other_args (in/out) : Function specific arguments.
546  *
547  * List of functions:
548  * btree_advance_and_find_key.
549  * btree_split_node_and_advance.
550  */
552  PAGE_PTR * crt_page, PAGE_PTR * advance_to_page, bool * is_leaf,
553  BTREE_SEARCH_KEY_HELPER * search_key, bool * stop, bool * restart,
554  void *other_args);
555 
556 /* BTREE_PROCESS_KEY_FUNCTION -
557  * btree_search_key_and_apply_functions internal function called after
558  * advancing in leaf page. It should process or manipulate data for the given
559  * key.
560  *
561  * Arguments:
562  * thread_p (in) : Thread entry.
563  * btid_int (int) : B-tree info.
564  * key (in) : Key value.
565  * key_slotid (in) : Slot ID of key if it was found, NULL_SLOTID otherwise.
566  * restart (out) : Output true when advancing in b-tree must be restarted
567  * starting with root.
568  * args (in/out) : Function specific arguments.
569  *
570  * Functions:
571  * btree_key_find_unique_version_oid.
572  * btree_key_find_and_lock_unique
573  * btree_key_find_and_lock_unique_of_unique.
574  * btree_key_find_and_lock_unique_of_non_unique
575  * btree_key_insert_new_object.
576  * btree_key_find_and_insert_delete_mvccid.
577  */
579  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
580  void *other_args);
581 
582 /* BTREE_PROCESS_OBJECT_FUNCTION -
583  * btree_record_process_objects internal function called for each object found
584  * in b-tree leaf/overflow records. It should process or manipulate object
585  * data in record.
586  *
587  * Functions:
588  * btree_record_satisfies_snapshot.
589  * btree_select_visible_object_for_range_scan.
590  * btree_fk_object_does_exist.
591  */
593  char *object_ptr, OID * oid, OID * class_oid, BTREE_MVCC_INFO * mvcc_info,
594  bool * stop, void *args);
595 
596 /* Type of b-tree scans. */
597 /* Covering index. */
598 #define BTS_IS_INDEX_COVERED(bts) \
599  ((bts) != NULL && (bts)->index_scan_idp != NULL && SCAN_IS_INDEX_COVERED ((bts)->index_scan_idp))
600 /* Multiple ranges optimization. */
601 #define BTS_IS_INDEX_MRO(bts) \
602  ((bts) != NULL && (bts)->index_scan_idp != NULL && SCAN_IS_INDEX_MRO ((bts)->index_scan_idp))
603 /* Index skip scan. */
604 #define BTS_IS_INDEX_ISS(bts) \
605  ((bts) != NULL && (bts)->index_scan_idp != NULL && SCAN_IS_INDEX_ISS ((bts)->index_scan_idp))
606 /* Index loose scan. */
607 #define BTS_IS_INDEX_ILS(bts) \
608  ((bts) != NULL && (bts)->index_scan_idp != NULL && SCAN_IS_INDEX_ILS ((bts)->index_scan_idp) \
609  && BTS_IS_INDEX_COVERED(bts))
610 #define BTS_NEED_COUNT_ONLY(bts) \
611  ((bts) != NULL && (bts)->index_scan_idp != NULL && (bts)->index_scan_idp->need_count_only)
612 
613 /* Increment read OID counters for b-tree scan. */
614 #define BTS_INCREMENT_READ_OIDS(bts) \
615  do \
616  { \
617  (bts)->n_oids_read++; \
618  (bts)->n_oids_read_last_iteration++; \
619  } \
620  while (false)
621 
622 /* Soft capacity of OID buffer. It is used to stop one scan iteration as a
623  * general rule. There is an exception when hard capacity is applied.
624  */
625 #define BTS_IS_SOFT_CAPACITY_ENOUGH(bts, count) \
626  ((count) <= (BTS_IS_INDEX_COVERED (bts) \
627  ? /* Covering index: use max tuples as soft limit. */ (bts)->index_scan_idp->indx_cov.max_tuples \
628  : /* Normal scan: use max_oid_cnt as soft limit. */ (bts)->index_scan_idp->oid_list->max_oid_cnt))
629 
630 /* Hard capacity is the maximum number that can fit the OID buffer. It is
631  * used when the number of objects in a single key does not fit the soft
632  * capacity.
633  * This key objects will be selected from leaf and overflows as long as they
634  * fit the hard capacity. This provides enough space for at least one leaf
635  * and one overflow page or for two full overflow pages.
636  * There is no limit as hard capacity for covering index. Since it uses a
637  * list file, its capacity is considered infinite.
638  */
639 #define BTS_IS_HARD_CAPACITY_ENOUGH(bts, count) \
640  (BTS_IS_INDEX_COVERED (bts) \
641  ? /* Covering index: no hard limit. */ true \
642  : /* Normal scan: use buffer capacity as hard limit. */ (count) <= (bts)->index_scan_idp->oid_list->capacity)
643 
644 /* Save an object selected during scan into object buffer. This can only be
645  * used by two types of scans:
646  * 1. Regular range scans.
647  * 2. Index skip scan, if current operations is ISS_OP_DO_RANGE_SEARCH.
648  */
649 #define BTS_SAVE_OID_IN_BUFFER(bts, oid) \
650  do \
651  { \
652  /* Assert this is not used in an inappropriate context. */ \
653  assert (!BTS_IS_INDEX_COVERED (bts)); \
654  assert (!BTS_IS_INDEX_MRO (bts)); \
655  assert (!BTS_IS_INDEX_ISS (bts) || bts->index_scan_idp->iss.current_op == ISS_OP_DO_RANGE_SEARCH); \
656  COPY_OID ((bts)->oid_ptr, oid); \
657  (bts)->oid_ptr++; \
658  BTS_INCREMENT_READ_OIDS (bts); \
659  assert ((bts)->n_oids_read_last_iteration <= (bts)->index_scan_idp->oid_list->capacity); \
660  assert (((bts)->oid_ptr - (bts)->index_scan_idp->oid_list->oidp) <= (bts)->index_scan_idp->oid_list->capacity); \
661  /* Should we also increment (bts)->index_scan_idp->oid_list.oid_cnt? */ \
662  } \
663  while (false)
664 
665 //
666 // bts_reset_scan - reset b-tree scan (clear progress)
667 //
668 // thread_p (in) : thread entry
669 // bts (in) : b-tree scan
670 static void
672 {
673  /* Reset bts->is_scan_started. */
674  bts->is_scan_started = false;
675  /* No current leaf node. */
676  VPID_SET_NULL (&(bts)->C_vpid);
677  if (bts->C_page != NULL)
678  {
679  pgbuf_unfix_and_init (thread_p, bts->C_page);
680  }
681 }
682 
683 /* BTREE_FIND_FK_OBJECT -
684  * Structure used to find if a key of foreign key index has any objects.
685  */
688 {
690 #if defined (SERVER_MODE)
691  OID locked_object;
692  LOCK lock_mode;
693 #endif /* SERVER_MODE */
694 };
695 /* BTREE_FIND_FK_OBJECT static initializer */
696 #if defined (SERVER_MODE)
697 #define BTREE_FIND_FK_OBJECT_INITIALIZER \
698  { OID_INITIALIZER, OID_INITIALIZER, NULL_LOCK }
699 #else /* !SERVER_MODE */ /* SA_MODE */
700 #define BTREE_FIND_FK_OBJECT_INITIALIZER \
701  { OID_INITIALIZER }
702 #endif /* SA_MODE */
703 
704 /* BTREE_INSERT_HELPER -
705  * Structure used inside btree_insert_internal functions to group required
706  * data into one argument.
707  */
710 {
711  BTREE_OBJECT_INFO obj_info; /* B-tree object info. Keeps old version for mvcc update same key. */
712  BTREE_OP_PURPOSE purpose; /* Purpose/context for calling btree_insert_internal. */
713  int op_type; /* Single-multi insert/modify operation type. */
714  btree_unique_stats *unique_stats_info; /* Unique statistics kept when operation type is not single. */
715  int key_len_in_page; /* Packed length of key being inserted. */
716 
717  PGBUF_LATCH_MODE nonleaf_latch_mode; /* Default page latch mode while advancing through non-leaf nodes. */
718 
719  bool is_first_try; /* True if this is first attempt to fix root page. B-tree information is loaded only
720  * first time. */
721  bool need_update_max_key_len; /* Set to true when a node max key length must be updated. All children nodes will also
722  * update max key length. */
723  bool is_crt_node_write_latched; /* Set to true when a node is latched exclusively. Then promotion will not be
724  * required. */
725  bool is_root; /* True if current node is root. */
726 
727  bool is_unique_key_added_or_deleted; /* Set to true when keys are inserted for the first time or when they are
728  * deleted. */
729  bool is_unique_multi_update; /* Multi-update of unique index. More than one visible object may be allowed, as long
730  * as at the end of execution, unique constraint is not violated. */
731  bool is_ha_enabled; /* An exception to above rule. If HA is enabled, no unique constraint violation is
732  * allowed at any time. */
733 
734  bool log_operations; /* er_log. */
735  bool is_null; /* is key NULL. */
736  char *printed_key; /* Printed key. */
737  SHA1Hash printed_key_sha1; /* SHA1 of printed key - useful for very large keys */
738 
740 
741  /* Recovery data. */
750 
751  /* Performance tracker. */
752  PERF_UTIME_TRACKER time_track;
753 
754 #if defined (SERVER_MODE)
755  OID saved_locked_oid; /* Save locked object from unique index key. */
756  OID saved_locked_class_oid; /* Save class of locked object. */
757 #endif /* SERVER_MODE */
758 
759  // *INDENT-OFF*
761  // *INDENT-ON*
762 };
763 
764 // *INDENT-OFF*
766  : obj_info (BTREE_OBJECT_INFO_INITIALIZER)
767  , purpose (BTREE_OP_NO_OP)
768  , op_type (0)
769  , unique_stats_info (NULL)
770  , key_len_in_page (0)
771  , nonleaf_latch_mode (PGBUF_LATCH_READ)
772  , is_first_try (true)
773  , need_update_max_key_len (false)
774  , is_crt_node_write_latched (false)
775  , is_root (false)
776  , is_unique_key_added_or_deleted (true)
777  , is_unique_multi_update (false)
778  , is_ha_enabled (false)
779  , log_operations (false)
780  , is_null (false)
781  , printed_key (NULL)
782  , printed_key_sha1 (SHA1_HASH_INITIALIZER)
783  , insert_list (NULL)
784  , leaf_addr (LOG_DATA_ADDR_INITIALIZER)
785  , rcvindex (RV_NOT_DEFINED)
786  , rv_keyval_data (NULL)
787  , rv_keyval_data_length (0)
788  , rv_redo_data (NULL)
789  , rv_redo_data_ptr (NULL)
790  , compensate_undo_nxlsa (LSA_INITIALIZER)
791  , is_system_op_started (false)
792  , time_track (PERF_UTIME_TRACKER_INITIALIZER)
793 #if defined (SERVER_MODE)
794  , saved_locked_oid (OID_INITIALIZER)
795  , saved_locked_class_oid (OID_INITIALIZER)
796 #endif
797 {
798 }
799 // *INDENT-ON*
800 
801 #define BTREE_INSERT_OID(ins_helper) \
802  (&((ins_helper)->obj_info.oid))
803 #define BTREE_INSERT_CLASS_OID(ins_helper) \
804  (&((ins_helper)->obj_info.class_oid))
805 #define BTREE_INSERT_MVCC_INFO(ins_helper) \
806  (&((ins_helper)->obj_info.mvcc_info))
807 
808 /* BTREE_DELETE_HELPER -
809  * Structure used inside btree_delete_internal functions to group required
810  * data into one argument.
811  */
814 {
815  BTREE_OBJECT_INFO object_info; /* Object info required for b-tree. */
816  BTREE_OBJECT_INFO second_object_info; /* Object info required for undo insert to unique index. */
817  BTREE_OP_PURPOSE purpose; /* Purpose of delete operation. */
818  PGBUF_LATCH_MODE nonleaf_latch_mode; /* Latch mode used to for non-leaf nodes. */
819  int op_type; /* Operation type. */
820  btree_unique_stats *unique_stats_info; /* Used to collect statistics of multi-row operations in unique
821  * indexes. */
822  BTREE_MVCC_INFO match_mvccinfo; /* Used to match MVCC information when searching for object in index key. */
823  OR_BUF *buffered_key; /* Buffered key value. */
824  char *printed_key; /* Key printed value. */
825  SHA1Hash printed_key_sha1; /* SHA1 of printed key - useful for very large keys */
826  bool log_operations; /* Debugging purpose logging. */
827  bool is_root; /* True if current node is root. */
828  bool is_first_search; /* True for the first b-tree traversal. */
829  bool check_key_deleted; /* Set to true if it is possible to have more than one visible object in unique key
830  * (MULTI_ROW_UPDATE). */
831  bool is_key_deleted; /* Used to correct collected statistics when key is not actually deleted. */
832 
833  /* Recovery structures. */
841 
842  /* Performance tracker. */
843  PERF_UTIME_TRACKER time_track;
844 
845  // *INDENT-OFF*
847  // *INDENT-ON*
848 };
849 
850 // *INDENT-OFF*
852  : object_info (BTREE_OBJECT_INFO_INITIALIZER)
853  , second_object_info (BTREE_OBJECT_INFO_INITIALIZER)
858  , match_mvccinfo (BTREE_MVCC_INFO_INITIALIZER)
859  , buffered_key (NULL)
860  , printed_key (NULL)
862  , log_operations (false)
863  , is_root (false)
864  , is_first_search (true)
865  , check_key_deleted (false)
866  , is_key_deleted (false)
868  , rv_keyval_data (NULL)
870  , rv_redo_data (NULL)
872  , reference_lsa (LSA_INITIALIZER)
873  , is_system_op_started (false)
874  , time_track (PERF_UTIME_TRACKER_INITIALIZER)
875 {
876 }
877 // *INDENT-ON*
878 
879 #define BTREE_DELETE_OID(helper) \
880  (&((helper)->object_info.oid))
881 #define BTREE_DELETE_CLASS_OID(helper) \
882  (&((helper)->object_info.class_oid))
883 #define BTREE_DELETE_MVCC_INFO(helper) \
884  (&((helper)->object_info.mvcc_info))
885 
886 // Performance tracking template functions
887 // Helper is either BTREE_INSERT_HELPER or BTREE_DELETE_HELPER
888 template < typename Helper > static inline void
889 btree_perf_track_time (THREAD_ENTRY * thread_p, Helper * helper)
890 {
891  PERF_UTIME_TRACKER_TIME (thread_p, &helper->time_track, PSTAT_BT_LEAF);
892  switch (helper->purpose)
893  {
898  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_INSERT);
899  break;
901  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_MVCC_DELETE);
902  break;
904  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_MARK_DELETE);
905  break;
908  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_UNDO_DELETE);
909  break;
913  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_DELETE);
914  break;
917  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_UNDO_INSERT);
918  break;
920  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_UNDO_MVCC_DELETE);
921  break;
923  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_VACUUM);
924  break;
926  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_VACUUM_INSID);
927  break;
928  default:
929  assert (false);
930  }
931 }
932 
933 template < typename Helper > static inline void
934 btree_perf_track_traverse_time (THREAD_ENTRY * thread_p, Helper * helper)
935 {
936  PERF_UTIME_TRACKER_TIME (thread_p, &helper->time_track, PSTAT_BT_TRAVERSE);
937  switch (helper->purpose)
938  {
944  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_INSERT_TRAVERSE);
945  break;
947  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_MVCC_DELETE_TRAVERSE);
948  break;
950  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_MARK_DELETE_TRAVERSE);
951  break;
953  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_UNDO_DELETE_TRAVERSE);
954  break;
958  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_DELETE_TRAVERSE);
959  break;
962  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_UNDO_INSERT_TRAVERSE);
963  break;
965  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_UNDO_MVCC_DELETE_TRAVERSE);
966  break;
968  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_VACUUM_TRAVERSE);
969  break;
971  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &helper->time_track, PSTAT_BT_VACUUM_INSID_TRAVERSE);
972  break;
973  default:
974  assert (false);
975  }
976 }
977 
978 static inline void
979 btree_perf_ovf_oids_fix_time (THREAD_ENTRY * thread_p, PERF_UTIME_TRACKER * track)
980 {
981  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, track, PSTAT_BT_FIX_OVF_OIDS);
982 }
983 
984 static inline void
985 btree_perf_unique_lock_time (THREAD_ENTRY * thread_p, PERF_UTIME_TRACKER * track, LOCK lock)
986 {
987  if (lock == S_LOCK)
988  {
989  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, track, PSTAT_BT_UNIQUE_RLOCKS);
990  }
991  else
992  {
993  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, track, PSTAT_BT_UNIQUE_WLOCKS);
994  }
995 }
996 
997 /* B-tree redo recovery flags. They are additional to
998  * LOG_RV_RECORD_MODIFY_MASK.
999  */
1000 /* Flag record belongs to overflow node. */
1001 #define BTREE_RV_OVERFLOW_FLAG 0x2000
1002 /* Flag redo data contains debugging info. */
1003 #define BTREE_RV_DEBUG_INFO_FLAG 0x1000
1004 /* Exclusive b-tree redo flags mask. */
1005 #define BTREE_RV_EXCLUSIVE_FLAGS_MASK 0x3C00
1006 /* The available flags are 0x0800 and 0x0400. B-tree recovery needs 10 bits
1007  * for around 820 maximum possible slots.
1008  * IO_MAX_PAGE_SIZE / (slot size + min record size) = 16k/20 ~= 820.
1009  *
1010  * NOTE: 0x0800 flag is already used for insert new key and MVCC delete recovery.
1011  */
1012 
1013 /* B-tree redo recovery flags mask. */
1014 #define BTREE_RV_FLAGS_MASK \
1015  (LOG_RV_RECORD_MODIFY_MASK | BTREE_RV_EXCLUSIVE_FLAGS_MASK)
1016 
1017 /* Set overflow flag for redo. */
1018 #define BTREE_RV_SET_OVERFLOW_NODE(addr) \
1019  ((addr)->offset |= BTREE_RV_OVERFLOW_FLAG)
1020 
1021 #if !defined (NDEBUG)
1022 /* Set debug info for redo.*/
1023 #define BTREE_RV_REDO_SET_DEBUG_INFO(addr, rv_ptr, btid_int, id) \
1024  do \
1025  { \
1026  assert ((addr) != NULL); \
1027  assert ((rv_ptr) != NULL); \
1028  assert ((btid_int) != NULL); \
1029  assert (!BTREE_RV_HAS_DEBUG_INFO ((addr)->offset)); \
1030  if (or_packed_domain_size (btid_int->key_type, 0) > BTID_DOMAIN_CHECK_MAX_SIZE) \
1031  { \
1032  /* Too much space required. Give up packing debug info. */ \
1033  break; \
1034  } \
1035  /* Put debug ID. */ \
1036  OR_PUT_INT (rv_ptr, id); \
1037  (rv_ptr) += OR_INT_SIZE; \
1038  /* Put unique_pk */ \
1039  OR_PUT_INT (rv_ptr, (btid_int)->unique_pk); \
1040  (rv_ptr) += OR_INT_SIZE; \
1041  if (BTREE_IS_UNIQUE ((btid_int)->unique_pk)) \
1042  { \
1043  /* Put topclass_oid. */ \
1044  OR_PUT_OID (rv_ptr, &(btid_int)->topclass_oid); \
1045  (rv_ptr) += OR_OID_SIZE; \
1046  } \
1047  /* Put key type. */ \
1048  (rv_ptr) = or_pack_domain (rv_ptr, btid_int->key_type, 0, 0); \
1049  (rv_ptr) = PTR_ALIGN (rv_ptr, INT_ALIGNMENT); \
1050  (addr)->offset |= BTREE_RV_DEBUG_INFO_FLAG; \
1051  } \
1052  while (false)
1053 
1054 /* Save debug info for redo and possible undo. Expected rv_undo_pptr is a
1055  * char** argument (that can be NULL).
1056  */
1057 #define BTREE_RV_UNDOREDO_SET_DEBUG_INFO(addr, rv_redo_ptr, rv_undo_ptr, btid_int, id) \
1058  do \
1059  { \
1060  char *save_rv_redo_ptr = (rv_redo_ptr); \
1061  BTREE_RV_REDO_SET_DEBUG_INFO (addr, rv_redo_ptr, btid_int, id); \
1062  if ((rv_undo_ptr) != NULL) \
1063  { \
1064  memcpy (rv_undo_ptr, save_rv_redo_ptr, CAST_BUFLEN ((rv_redo_ptr) - save_rv_redo_ptr)); \
1065  (rv_undo_ptr) += CAST_BUFLEN ((rv_redo_ptr) - save_rv_redo_ptr); \
1066  } \
1067  } \
1068  while (false)
1069 
1070 
1071 #define BTREE_RV_DEBUG_INFO_MAX_SIZE \
1072  (OR_INT_SIZE /* Debug ID. */ \
1073  + OR_INT_SIZE /* unique_pk */ \
1074  + OR_OID_SIZE /* topclass_oid */ \
1075  + BTID_DOMAIN_CHECK_MAX_SIZE /* key_type. */)
1076 #endif /* !NDEBUG */
1077 
1078 /* Is flag for overflow node set? */
1079 #define BTREE_RV_IS_OVERFLOW_NODE(flags) \
1080  ((flags & BTREE_RV_OVERFLOW_FLAG) != 0)
1081 /* Is flag for debug info set? */
1082 #define BTREE_RV_HAS_DEBUG_INFO(flags) \
1083  ((flags & BTREE_RV_DEBUG_INFO_FLAG) != 0)
1084 
1085 /* Flag used only in context of insert new key. */
1086 /* The flag is used to update page maximum key length. */
1087 #define BTREE_RV_UPDATE_MAX_KEY_LEN 0x0800
1088 #define BTREE_RV_SET_UPDATE_MAX_KEY_LEN(addr) \
1089  ((addr)->offset |= BTREE_RV_UPDATE_MAX_KEY_LEN)
1090 #define BTREE_RV_IS_UPDATE_MAX_KEY_LEN(flags) \
1091  ((flags & BTREE_RV_UPDATE_MAX_KEY_LEN) != 0)
1092 
1093 /* Flag used only in context of MVCC delete. */
1094 /* The flag is used to undo delete object inserted by same transaction. The insert ID must also match. */
1095 #define BTREE_RV_UNDO_MVCCDEL_MYOBJ 0x0800
1096 #define BTREE_RV_SET_UNDO_MVCCDEL_MYOBJ(addr) \
1097  ((addr)->offset |= BTREE_RV_UNDO_MVCCDEL_MYOBJ)
1098 #define BTREE_RV_IS_UNDO_MVCCDEL_MYOBJ(flags) \
1099  ((flags & BTREE_RV_UNDO_MVCCDEL_MYOBJ) != 0)
1100 
1101 /* Default buffer size of redo recovery changes. Should cover all cases. */
1102 /* Just a rough estimation */
1103 const size_t BTREE_RV_BUFFER_SIZE =
1104 #if defined (NDEBUG)
1106 #else /* !NDEBUG */
1108 #endif /* !NDEBUG */
1109 
1110 static void
1111 BTREE_RV_GET_DATA_LENGTH (const char *rv_ptr, const char *rv_start, int &rv_length)
1112 {
1113  assert (rv_ptr != NULL);
1114  assert (rv_start != NULL);
1115  rv_length = CAST_BUFLEN (rv_ptr - rv_start);
1116  assert (0 <= rv_length && (size_t) rv_length <= BTREE_RV_BUFFER_SIZE);
1117 }
1118 
1119 /* Debug identifiers to help with detecting recovery issues. */
1121 {
1140 };
1142 
1143 /* b-tree debug logging */
1144 #define btree_log_if_enabled(...) \
1145  if (prm_get_bool_value(PRM_ID_LOG_BTREE_OPS)) _er_log_debug (ARG_FILE_LINE, __VA_ARGS__)
1146 #define btree_log(prefix, msg, ...) \
1147  _er_log_debug (ARG_FILE_LINE, prefix LOG_THREAD_TRAN_MSG ": " msg "\n", \
1148  LOG_THREAD_TRAN_ARGS (thread_get_thread_entry_info ()), __VA_ARGS__)
1149 #define btree_insert_log(helper, msg, ...) \
1150  if ((helper)->log_operations) btree_log ("BTREE_INSERT ", msg, __VA_ARGS__)
1151 #define btree_delete_log(helper, msg, ...) \
1152  if ((helper)->log_operations) btree_log ("BTREE_DELETE ", msg, __VA_ARGS__)
1153 
1154 /* logging btid */
1155 #define BTREE_ID_MSG "index = %d, %d|%d"
1156 
1157 /* logging b-tree mvcc info */
1158 #define BTREE_MVCC_INFO_AS_ARGS(mvcc_info) \
1159  (unsigned long long) BTREE_MVCC_INFO_INSID (mvcc_info), (unsigned long long) BTREE_MVCC_INFO_DELID (mvcc_info)
1160 
1161 /* logging b-tree object info */
1162 #define BTREE_OBJINFO_MSG(name) \
1163  name " { OID = %d|%d|%d, CLASS = %d|%d|%d, MVCC_INFO = %llu|%llu } "
1164 #define BTREE_OBJINFO_AS_ARGS(objinfo) \
1165  OID_AS_ARGS (&((objinfo)->oid)), \
1166  OID_AS_ARGS (&((objinfo)->class_oid)), \
1167  BTREE_MVCC_INFO_AS_ARGS (&((objinfo)->mvcc_info))
1168 
1169 /* logging a key value (stored as char *) */
1170 #define BTREE_PRINT_KEY_MSG(key) key " = %.32s"
1171 #define BTREE_PRINT_KEY_ARGS(key) (key) != NULL ? (key) : "** UNKNOWN KEY **"
1172 
1173 /* logging insert helper */
1174 #define BTREE_INSERT_HELPER_MSG(tabs) \
1175  tabs "INSERT HELPER: \n" \
1176  tabs "\t" BTREE_OBJINFO_MSG("obj_info") "\n" \
1177  tabs "\t" "purpose = %s \n" \
1178  tabs "\t" "op_type = %s \n" \
1179  tabs "\t" BTREE_PRINT_KEY_MSG("printed_key") "... (sha1 = %08x | %08x | %08x | %08x | %08x) \n"
1180 #define BTREE_INSERT_HELPER_AS_ARGS(helper) \
1181  BTREE_OBJINFO_AS_ARGS (&(helper)->obj_info), \
1182  btree_purpose_to_string ((helper)->purpose), \
1183  btree_op_type_to_string ((helper)->op_type), \
1184  BTREE_PRINT_KEY_ARGS((helper)->printed_key), SHA1_AS_ARGS (&(helper)->printed_key_sha1)
1185 
1186 /* logging delete helper */
1187 #define BTREE_DELETE_HELPER_MSG(tabs) \
1188  tabs "DELETE HELPER: \n" \
1189  tabs "\t" BTREE_OBJINFO_MSG("object_info") "\n" \
1190  tabs "\t" "purpose = %s \n " \
1191  tabs "\t" "op_type = %s \n" \
1192  tabs "\t" BTREE_PRINT_KEY_MSG("printed_key") "... (sha1 = %08x | %08x | %08x | %08x | %08x) \n" \
1193  tabs "\t" "match_mvccinfo = %llu|%llu \n"
1194 #define BTREE_DELETE_HELPER_AS_ARGS(helper) \
1195  BTREE_OBJINFO_AS_ARGS (&(helper)->object_info), \
1196  btree_purpose_to_string ((helper)->purpose), \
1197  btree_op_type_to_string ((helper)->op_type), \
1198  BTREE_PRINT_KEY_ARGS((helper)->printed_key), SHA1_AS_ARGS (&(helper)->printed_key_sha1), \
1199  BTREE_MVCC_INFO_AS_ARGS (&(helper)->match_mvccinfo)
1200 
1201 /* log changes during insert */
1202 #define BTREE_INSERT_MODIFY_MSG(desc) \
1203  desc ": \n" \
1204  BTREE_INSERT_HELPER_MSG("\t") \
1205  "\t" PGBUF_PAGE_MODIFY_MSG("%s page") "\n" \
1206  "\t" "slot = %d \n" \
1207  "\t" "record new size = %d \n" \
1208  "\t" BTREE_ID_MSG "\n"
1209 #define BTREE_INSERT_MODIFY_ARGS(thread_p, helper, page, save_lsa, is_leaf, slotid, new_size, btid) \
1210  BTREE_INSERT_HELPER_AS_ARGS (helper), \
1211  (is_leaf) ? "leaf" : "overflow", PGBUF_PAGE_MODIFY_ARGS(page, save_lsa), \
1212  slotid, \
1213  new_size, \
1214  BTID_AS_ARGS (btid)
1215 
1216 /* log changes during delete */
1217 #define BTREE_DELETE_MODIFY_MSG(desc) \
1218  desc ": \n" \
1219  BTREE_DELETE_HELPER_MSG("\t") \
1220  "\t" PGBUF_PAGE_MODIFY_MSG("%s page") "\n" \
1221  "\t" "slot = %d \n" \
1222  "\t" "record new size = %d \n" \
1223  "\t" BTREE_ID_MSG "\n"
1224 #define BTREE_DELETE_MODIFY_ARGS(thread_p, helper, page, save_lsa, is_leaf, slotid, new_size, btid) \
1225  BTREE_DELETE_HELPER_AS_ARGS (helper), \
1226  (is_leaf) ? "leaf" : "overflow", PGBUF_PAGE_MODIFY_ARGS(page, save_lsa), \
1227  slotid, \
1228  new_size, \
1229  BTID_AS_ARGS (btid)
1230 
1231 /*
1232  * Online index loading
1233  */
1234 
1235 /* Online index states */
1236 /* Include MVCCID_ALL_VISIBLE when we set a flag. */
1240 const MVCCID BTREE_ONLINE_INDEX_FLAG_MASK = 0xC000000000000000;
1241 const MVCCID BTREE_ONLINE_INDEX_MVCCID_MASK = ~0xC000000000000000;
1242 
1245 {
1248 };
1249 
1250 /*
1251  * Static functions
1252  */
1253 
1255  VPID * root_vpid_p, BTREE_ROOT_HEADER ** root_header_p,
1256  BTID_INT * btid_int_p) __attribute__ ((ALWAYS_INLINE));
1257 
1259 
1260 STATIC_INLINE int btree_count_oids (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record, char *object_ptr,
1261  OID * oid, OID * class_oid, MVCC_REC_HEADER * mvcc_header, bool * stop, void *args)
1263 
1264 static int btree_store_overflow_key (THREAD_ENTRY * thread_p, BTID_INT * btid, DB_VALUE * key, int size,
1265  BTREE_NODE_TYPE node_type, VPID * firstpg_vpid);
1266 static int btree_load_overflow_key (THREAD_ENTRY * thread_p, BTID_INT * btid, VPID * firstpg_vpid, DB_VALUE * key,
1267  BTREE_NODE_TYPE node_type);
1268 static int btree_delete_overflow_key (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr, INT16 slot_id,
1269  BTREE_NODE_TYPE node_type);
1271 static void btree_read_fixed_portion_of_non_leaf_record (RECDES * rec, NON_LEAF_REC * nlf_rec);
1274 static void btree_append_oid (RECDES * rec, OID * oid);
1275 STATIC_INLINE void btree_add_mvccid (RECDES * rec, int oid_offset, int mvccid_offset, MVCCID mvccid, short flag,
1276  char **rv_undo_data_ptr, char **rv_redo_data_ptr) __attribute__ ((ALWAYS_INLINE));
1277 STATIC_INLINE void btree_set_mvccid (RECDES * rec, int mvccid_offset, MVCCID * p_mvccid,
1278  char **rv_undo_data_ptr, char **rv_redo_data_ptr) __attribute__ ((ALWAYS_INLINE));
1279 static inline void btree_remove_mvccid (RECDES * record, int oid_offset, int mvccid_offset, short flag,
1280  char **rv_undo_data_ptr, char **rv_redo_data_ptr);
1281 static void btree_record_append_object (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record,
1283  char **rv_undo_data_ptr, char **rv_redo_data_ptr);
1284 static void btree_insert_object_ordered_by_oid (THREAD_ENTRY * thread_p, RECDES * record, BTID_INT * btid_int,
1285  BTREE_OBJECT_INFO * object_info, char **rv_undo_data_ptr,
1286  char **rv_redo_data_ptr, int *offset_to_objptr);
1288  VPID * first_overflow_vpid, VPID * near_vpid, VPID * new_vpid,
1289  PAGE_PTR * new_page_ptr);
1291  DB_VALUE * key, void *rec_header, BTREE_NODE_TYPE node_type,
1292  bool * clear_key, int *offset, int copy);
1293 static PAGE_PTR btree_get_new_page (THREAD_ENTRY * thread_p, BTID_INT * btid, VPID * vpid, VPID * near_vpid);
1294 static int btree_search_nonleaf_page (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr, DB_VALUE * key,
1295  INT16 * slot_id, VPID * child_vpid, page_key_boundary * page_bounds);
1296 static int btree_search_leaf_page (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr, DB_VALUE * key,
1297  BTREE_SEARCH_KEY_HELPER * search_key);
1299  DB_VALUE * key, BTREE_SEARCH_KEY_HELPER * search_key);
1300 static int xbtree_test_unique (THREAD_ENTRY * thread_p, BTID * btid);
1301 #if defined(ENABLE_UNUSED_FUNCTION)
1302 static int btree_get_subtree_stats (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR pg_ptr, BTREE_STATS_ENV * env);
1303 #endif
1304 static int btree_get_stats_midxkey (THREAD_ENTRY * thread_p, BTREE_STATS_ENV * env, DB_MIDXKEY * midxkey);
1306 static int btree_get_stats_with_AR_sampling (THREAD_ENTRY * thread_p, BTREE_STATS_ENV * env);
1307 static int btree_get_stats_with_fullscan (THREAD_ENTRY * thread_p, BTREE_STATS_ENV * env);
1308 static DISK_ISVALID btree_check_page_key (THREAD_ENTRY * thread_p, const OID * class_oid_p, BTID_INT * btid,
1309  const char *btname, PAGE_PTR page_ptr, VPID * page_vpid);
1310 static DISK_ISVALID btree_check_pages (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR pg_ptr, VPID * pg_vpid);
1311 static DISK_ISVALID btree_verify_subtree (THREAD_ENTRY * thread_p, const OID * class_oid_p, BTID_INT * btid,
1312  const char *btname, PAGE_PTR pg_ptr, VPID * pg_vpid, BTREE_NODE_INFO * INFO);
1313 static int btree_get_subtree_capacity (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR pg_ptr, BTREE_CAPACITY * cpc);
1314 static void btree_print_space (FILE * fp, int n);
1315 static int btree_delete_meta_record (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr, int slot_id);
1316 static int btree_merge_root (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR P, PAGE_PTR Q, PAGE_PTR R);
1317 static int btree_merge_node (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR P, PAGE_PTR Q, PAGE_PTR R,
1318  INT16 p_slot_id, VPID * child_vpid, BTREE_MERGE_STATUS status);
1319 static int btree_node_size_uncompressed (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr);
1321 static DB_VALUE *btree_find_split_point (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr, int *mid_slot,
1322  DB_VALUE * key, BTREE_INSERT_HELPER * helper, bool * clear_midkey);
1323 static int btree_split_next_pivot (BTREE_NODE_SPLIT_INFO * split_info, float new_value, int max_index);
1324 static int btree_split_find_pivot (int total, BTREE_NODE_SPLIT_INFO * split_info);
1325 static int btree_split_node (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR P, PAGE_PTR Q, PAGE_PTR R,
1326  VPID * P_vpid, VPID * Q_vpid, VPID * R_vpid, INT16 p_slot_id, BTREE_NODE_TYPE node_type,
1327  DB_VALUE * key, BTREE_INSERT_HELPER * helper, VPID * child_vpid);
1328 static int btree_split_root (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR P, PAGE_PTR Q, PAGE_PTR R,
1329  VPID * P_vpid, VPID * Q_vpid, VPID * R_vpid, BTREE_NODE_TYPE node_type, DB_VALUE * key,
1330  BTREE_INSERT_HELPER * helper, VPID * child_vpid);
1331 static int btree_find_lower_bound_leaf (THREAD_ENTRY * thread_p, BTREE_SCAN * BTS, BTREE_STATS * stat_info_p);
1332 static PAGE_PTR btree_find_leftmost_leaf (THREAD_ENTRY * thread_p, BTID * btid, VPID * pg_vpid,
1333  BTREE_STATS * stat_info_p);
1334 static PAGE_PTR btree_find_rightmost_leaf (THREAD_ENTRY * thread_p, BTID * btid, VPID * pg_vpid,
1335  BTREE_STATS * stat_info_p);
1336 static PAGE_PTR btree_find_AR_sampling_leaf (THREAD_ENTRY * thread_p, BTID * btid, VPID * pg_vpid,
1337  BTREE_STATS * stat_info_p, bool * found_p);
1338 static PAGE_PTR btree_find_boundary_leaf (THREAD_ENTRY * thread_p, BTID * btid, VPID * pg_vpid, BTREE_STATS * stat_info,
1339  BTREE_BOUNDARY where);
1340 static int btree_find_next_index_record (THREAD_ENTRY * thread_p, BTREE_SCAN * bts);
1341 static int btree_find_next_index_record_holding_current (THREAD_ENTRY * thread_p, BTREE_SCAN * bts, RECDES * peek_rec);
1343  PAGE_PTR first_page);
1344 static int btree_apply_key_range_and_filter (THREAD_ENTRY * thread_p, BTREE_SCAN * bts, bool is_iss,
1345  bool * key_range_satisfied, bool * key_filter_satisfied,
1346  bool need_to_check_null);
1347 static int btree_dump_curr_key (THREAD_ENTRY * thread_p, BTREE_SCAN * bts, FILTER_INFO * filter, OID * oid,
1348  INDX_SCAN_ID * iscan_id);
1349 static DISK_ISVALID btree_find_key_from_leaf (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR pg_ptr, int key_cnt,
1350  OID * oid, DB_VALUE * key, bool * clear_key);
1351 static DISK_ISVALID btree_find_key_from_nleaf (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR pg_ptr, int key_cnt,
1352  OID * oid, DB_VALUE * key, bool * clear_key);
1353 static DISK_ISVALID btree_find_key_from_page (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR pg_ptr, OID * oid,
1354  DB_VALUE * key, bool * clear_key);
1355 
1356 /* Dump & verify routines */
1357 static void btree_dump_root_header (THREAD_ENTRY * thread_p, FILE * fp, PAGE_PTR page_ptr);
1358 static void btree_dump_leaf_record (THREAD_ENTRY * thread_p, FILE * fp, BTID_INT * btid, RECDES * rec, int n);
1359 static void btree_dump_non_leaf_record (THREAD_ENTRY * thread_p, FILE * fp, BTID_INT * btid, RECDES * rec, int n,
1360  int print_key);
1361 static void btree_dump_page (THREAD_ENTRY * thread_p, FILE * fp, const OID * class_oid_p, BTID_INT * btid,
1362  const char *btname, PAGE_PTR page_ptr, VPID * pg_vpid, int depth, int level);
1363 
1364 static void btree_dump_page_with_subtree (THREAD_ENTRY * thread_p, FILE * fp, BTID_INT * btid, PAGE_PTR pg_ptr,
1365  VPID * pg_vpid, int depth, int level);
1366 
1367 #if !defined(NDEBUG)
1368 static DB_VALUE *btree_set_split_point (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr, INT16 mid_slot,
1369  DB_VALUE * key, bool * clear_midkey);
1370 static void btree_split_test (THREAD_ENTRY * thread_p, BTID_INT * btid, DB_VALUE * key, VPID * S_vpid, PAGE_PTR S_page,
1371  BTREE_NODE_TYPE node_type);
1372 static int btree_verify_node (THREAD_ENTRY * thread_p, BTID_INT * btid_int, PAGE_PTR page_ptr);
1373 static int btree_verify_nonleaf_node (THREAD_ENTRY * thread_p, BTID_INT * btid_int, PAGE_PTR page_ptr);
1374 static int btree_verify_leaf_node (THREAD_ENTRY * thread_p, BTID_INT * btid_int, PAGE_PTR page_ptr);
1375 #endif
1376 
1377 static void btree_set_unknown_key_error (THREAD_ENTRY * thread_p, BTID * btid, DB_VALUE * key, const char *debug_msg);
1378 static int btree_rv_write_log_record_for_key_insert (char *log_rec, int *log_length, INT16 key_len, RECDES * recp);
1379 
1380 static int btree_rv_write_log_record (char *log_rec, int *log_length, RECDES * recp, BTREE_NODE_TYPE node_type);
1381 
1382 static int btree_find_oid_and_its_page (THREAD_ENTRY * thread_p, BTID_INT * btid_int, OID * oid, PAGE_PTR leaf_page,
1384  RECDES * leaf_record, LEAF_REC * leaf_rec_info, int after_key_offset,
1385  PAGE_PTR * found_page, PAGE_PTR * prev_page, int *offset_to_object,
1386  BTREE_MVCC_INFO * object_mvcc_info);
1389  bool * is_match);
1390 static int btree_find_oid_from_leaf (THREAD_ENTRY * thread_p, BTID_INT * btid, RECDES * leaf_record,
1391  int after_key_offset, OID * oid, BTREE_MVCC_INFO * match_mvccinfo,
1392  BTREE_OP_PURPOSE purpose, int *offset_to_object, BTREE_MVCC_INFO * mvcc_info);
1393 static int btree_find_oid_from_ovfl (THREAD_ENTRY * thread_p, BTID_INT * btid_int, PAGE_PTR overflow_page, OID * oid,
1394  BTREE_OP_PURPOSE purpose, BTREE_MVCC_INFO * match_mvccinfo, int *offset_to_object,
1397 static int btree_record_get_last_object (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * recp,
1398  BTREE_NODE_TYPE node_type, int after_key_offset, OID * oidp, OID * class_oid,
1399  BTREE_MVCC_INFO * mvcc_info, int *last_oid_mvcc_offset);
1400 static void btree_record_remove_last_object (THREAD_ENTRY * thread_p, BTID_INT * btid, RECDES * recp,
1401  BTREE_NODE_TYPE node_type, int last_oid_mvcc_offset,
1402  char **rv_undo_data_ptr, char **rv_redo_data_ptr);
1403 static char *btree_leaf_get_nth_oid_ptr (BTID_INT * btid, RECDES * recp, BTREE_NODE_TYPE node_type, int oid_list_offset,
1404  int n);
1405 static void btree_leaf_set_flag (RECDES * recp, short record_flag);
1406 static void btree_leaf_clear_flag (RECDES * recp, short record_flag);
1407 static short btree_leaf_get_flag (RECDES * recp);
1408 static bool btree_leaf_is_flaged (RECDES * recp, short record_flag);
1409 static void btree_record_object_set_mvcc_flags (char *data, short mvcc_flags);
1410 static void btree_record_object_clear_mvcc_flags (char *rec_data, short mvcc_flags);
1412 static INLINE bool btree_record_object_is_flagged (char *data, short mvcc_flag) __attribute__ ((ALWAYS_INLINE));
1414  char **rv_undo_data_ptr, char **rv_redo_data_ptr);
1415 static int btree_record_get_num_oids (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * rec, int offset,
1416  BTREE_NODE_TYPE node_type);
1417 static int btree_record_get_num_visible_oids (THREAD_ENTRY * thread_p, BTID_INT * btid, RECDES * rec, int oid_offset,
1418  BTREE_NODE_TYPE node_type, int *max_visible_oids,
1419  MVCC_SNAPSHOT * mvcc_snapshot, int *num_visible);
1420 static int btree_get_num_visible_oids_from_all_ovf (THREAD_ENTRY * thread_p, BTID_INT * btid, VPID * first_ovfl_vpid,
1421  int *num_visible_oids, int *max_visible_oids,
1424 static int btree_set_vpid_previous_vpid (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_p, VPID * prev);
1425 static int btree_compare_individual_key_value (DB_VALUE * key1, DB_VALUE * key2, TP_DOMAIN * key_domain);
1426 static int btree_get_next_page_vpid (THREAD_ENTRY * thread_p, PAGE_PTR leaf_page, VPID * next_vpid);
1427 static PAGE_PTR btree_get_next_page (THREAD_ENTRY * thread_p, PAGE_PTR page_p);
1428 static int btree_range_opt_check_add_index_key (THREAD_ENTRY * thread_p, BTREE_SCAN * bts,
1429  MULTI_RANGE_OPT * multi_range_opt, OID * p_new_oid, bool * key_added);
1430 static int btree_top_n_items_binary_search (RANGE_OPT_ITEM ** top_n_items, int *att_idxs, TP_DOMAIN ** domains,
1431  bool * desc_order, DB_VALUE * new_key_values, int num_keys, int first,
1432  int last, int *new_pos);
1433 static int btree_iss_set_key (BTREE_SCAN * bts, INDEX_SKIP_SCAN * iss);
1434 static int btree_insert_mvcc_delid_into_page (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr,
1435  BTREE_NODE_TYPE node_type, DB_VALUE * key,
1436  BTREE_INSERT_HELPER * insert_helper, PGSLOTID slot_id, RECDES * rec,
1437  int oid_offset);
1440  BTREE_INSERT_HELPER * insert_helper,
1441  BTREE_SEARCH_KEY_HELPER * search_key, RECDES * leaf_rec,
1442  VPID * first_ovfl_vpid);
1443 static int btree_key_append_object_to_overflow (THREAD_ENTRY * thread_p, BTID_INT * btid_int, PAGE_PTR ovfl_page,
1445 static int btree_find_free_overflow_oids_page (THREAD_ENTRY * thread_p, BTID_INT * btid, VPID * first_ovfl_vpid,
1446  PAGE_PTR * overflow_page);
1447 
1448 static int btree_delete_key_from_leaf (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR leaf_pg,
1449  LEAF_REC * leafrec_pnt, BTREE_DELETE_HELPER * delete_helper,
1450  BTREE_SEARCH_KEY_HELPER * search_key);
1452  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR leaf_page,
1453  BTREE_SEARCH_KEY_HELPER * search_key, RECDES * leaf_rec,
1454  VPID * ovfl_vpid);
1455 static int btree_modify_leaf_ovfl_vpid (THREAD_ENTRY * thread_p, BTID_INT * btid_int,
1456  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR leaf_page, RECDES * leaf_record,
1457  BTREE_SEARCH_KEY_HELPER * search_key, VPID * next_ovfl_vpid);
1458 static int btree_modify_overflow_link (THREAD_ENTRY * thread_p, BTID_INT * btid_int,
1459  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR ovfl_page, VPID * next_ovfl_vpid);
1460 
1461 static DISK_ISVALID btree_repair_prev_link_by_btid (THREAD_ENTRY * thread_p, BTID * btid, bool repair,
1462  char *index_name);
1463 static DISK_ISVALID btree_repair_prev_link_by_class_oid (THREAD_ENTRY * thread_p, OID * oid, BTID * idx_btid,
1464  bool repair);
1465 static bool btree_node_is_compressed (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr);
1466 static int btree_node_common_prefix (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr);
1467 static int btree_recompress_record (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record, DB_VALUE * fence_key,
1468  int old_prefix, int new_prefix);
1469 static int btree_compress_node (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr);
1470 static const char *node_type_to_string (short node_type);
1471 static char *key_type_to_string (char *buf, int buf_size, TP_DOMAIN * key_type);
1472 static int index_attrs_to_string (char *buf, int buf_size, OR_INDEX * index_p, RECDES * recdes);
1473 static SCAN_CODE btree_scan_for_show_index_header (THREAD_ENTRY * thread_p, DB_VALUE ** out_values, int out_cnt,
1474  const char *class_name, OR_INDEX * index_p, OID * class_oid_p);
1475 static SCAN_CODE btree_scan_for_show_index_capacity (THREAD_ENTRY * thread_p, DB_VALUE ** out_values, int out_cnt,
1476  const char *class_name, OR_INDEX * index_p);
1477 static bool btree_leaf_lsa_eq (THREAD_ENTRY * thread_p, LOG_LSA * a, LOG_LSA * b);
1478 
1479 #if !defined(NDEBUG)
1480 static int btree_get_node_level (THREAD_ENTRY * thread_p, PAGE_PTR page_ptr);
1481 #endif
1482 
1484  int offset, BTREE_NODE_TYPE node_type, OID * oid, OID * class_oid,
1485  int max_oids);
1487  VPID * first_ovfl_vpid, OID * oid, OID * class_oid);
1488 
1490 static int btree_or_get_mvccinfo (OR_BUF * buf, BTREE_MVCC_INFO * mvcc_info, short btree_mvcc_flags);
1491 static int btree_or_put_object (OR_BUF * buf, BTID_INT * btid_int, BTREE_NODE_TYPE node_type,
1493 static int btree_or_get_object (OR_BUF * buf, BTID_INT * btid_int, BTREE_NODE_TYPE node_type, int after_key_offset,
1494  OID * oid, OID * class_oid, BTREE_MVCC_INFO * mvcc_info);
1495 static char *btree_unpack_object (char *ptr, BTID_INT * btid_int, BTREE_NODE_TYPE node_type, RECDES * record,
1496  int after_key_offset, OID * oid, OID * class_oid, BTREE_MVCC_INFO * mvcc_info);
1497 static char *btree_pack_object (char *ptr, BTID_INT * btid_int, BTREE_NODE_TYPE node_type, RECDES * record,
1499 
1501  DB_VALUE * key, BTREE_ROOT_WITH_KEY_FUNCTION * root_fnct,
1502  void *root_args, BTREE_ADVANCE_WITH_KEY_FUNCTION * advance_fnct,
1503  void *advance_args, BTREE_PROCESS_KEY_FUNCTION * leaf_fnct,
1504  void *process_key_args, BTREE_SEARCH_KEY_HELPER * search_key,
1505  PAGE_PTR * leaf_page_ptr);
1506 static int btree_get_root_with_key (THREAD_ENTRY * thread_p, BTID * btid, BTID_INT * btid_int, DB_VALUE * key,
1507  PAGE_PTR * root_page, bool * is_leaf, BTREE_SEARCH_KEY_HELPER * search_key,
1508  bool * stop, bool * restart, void *other_args);
1509 static int btree_advance_and_find_key (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key,
1510  PAGE_PTR * crt_page, PAGE_PTR * advance_to_page, bool * is_leaf,
1511  BTREE_SEARCH_KEY_HELPER * search_key, bool * stop, bool * restart,
1512  void *other_args);
1514  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key,
1515  bool * restart, void *other_args);
1516 static int btree_key_find_and_lock_unique (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key,
1517  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
1518  void *other_args);
1520  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key,
1521  bool * restart, void *other_args);
1523  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key,
1524  bool * restart, void *other_args);
1525 #if defined (SERVER_MODE)
1526 static int btree_key_lock_object (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key, PAGE_PTR * leaf_page,
1527  PAGE_PTR * overflow_page, OID * oid, OID * class_oid, LOCK lock_mode,
1528  BTREE_SEARCH_KEY_HELPER * search_key, bool try_cond_lock, bool * restart,
1529  bool * was_page_refixed);
1530 #endif /* SERVER_MODE */
1531 
1532 static int btree_key_process_objects (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * leaf_record,
1533  int after_key_offset, LEAF_REC * leaf_info, BTREE_PROCESS_OBJECT_FUNCTION * func,
1534  void *args);
1535 static int btree_record_process_objects (THREAD_ENTRY * thread_p, BTID_INT * btid_int, BTREE_NODE_TYPE node_type,
1536  RECDES * record, int after_key_offset, bool * stop,
1537  BTREE_PROCESS_OBJECT_FUNCTION * func, void *args);
1538 static int btree_record_satisfies_snapshot (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record,
1539  char *object_ptr, OID * oid, OID * class_oid, BTREE_MVCC_INFO * mvcc_info,
1540  bool * stop, void *args);
1541 
1542 static int btree_range_scan_read_record (THREAD_ENTRY * thread_p, BTREE_SCAN * bts);
1544 static int btree_range_scan_descending_fix_prev_leaf (THREAD_ENTRY * thread_p, BTREE_SCAN * bts, int *key_count,
1545  BTREE_NODE_HEADER ** node_header_ptr, VPID * next_vpid);
1546 static int btree_range_scan_start (THREAD_ENTRY * thread_p, BTREE_SCAN * bts);
1547 static int btree_range_scan_resume (THREAD_ENTRY * thread_p, BTREE_SCAN * bts);
1549 static int btree_scan_update_range (THREAD_ENTRY * thread_p, BTREE_SCAN * bts, key_val_range * kv_range);
1550 static int btree_ils_adjust_range (THREAD_ENTRY * thread_p, BTREE_SCAN * bts);
1551 
1553  char *object_ptr, OID * oid, OID * class_oid,
1554  BTREE_MVCC_INFO * mvcc_info, bool * stop, void *args);
1555 static int btree_range_scan_find_fk_any_object (THREAD_ENTRY * thread_p, BTREE_SCAN * bts);
1556 static int btree_fk_object_does_exist (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record, char *object_ptr,
1557  OID * oid, OID * class_oid, BTREE_MVCC_INFO * mvcc_info, bool * stop,
1558  void *args);
1559 
1560 static int btree_insert_internal (THREAD_ENTRY * thread_p, BTID * btid, DB_VALUE * key, OID * class_oid, OID * oid,
1561  int op_type, btree_unique_stats * unique_stat_info, int *unique,
1563 static int btree_undo_delete_physical (THREAD_ENTRY * thread_p, BTID * btid, DB_VALUE * key, OID * class_oid, OID * oid,
1564  BTREE_MVCC_INFO * mvcc_info, LOG_LSA * undo_nxlsa);
1565 static int btree_fix_root_for_insert (THREAD_ENTRY * thread_p, BTID * btid, BTID_INT * btid_int, DB_VALUE * key,
1566  PAGE_PTR * root_page, bool * is_leaf, BTREE_SEARCH_KEY_HELPER * search_key,
1567  bool * stop, bool * restart, void *other_args);
1568 static int btree_split_node_and_advance (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key,
1569  PAGE_PTR * crt_page, PAGE_PTR * advance_to_page, bool * is_leaf,
1570  BTREE_SEARCH_KEY_HELPER * search_key, bool * stop, bool * restart,
1571  void *other_args);
1572 static int btree_get_max_new_data_size (THREAD_ENTRY * thread_p, BTID_INT * btid_int, PAGE_PTR page,
1573  BTREE_NODE_TYPE node_type, int key_len, BTREE_INSERT_HELPER * helper,
1574  bool known_to_be_found);
1575 static int btree_key_insert_new_object (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key,
1576  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
1577  void *other_args);
1579  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key,
1580  bool * restart, void *other_args);
1582  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
1583  void *other_args);
1584 static int btree_key_insert_new_key (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key, PAGE_PTR leaf_page,
1585  BTREE_INSERT_HELPER * insert_helper, BTREE_SEARCH_KEY_HELPER * search_key);
1587  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key,
1588  bool * restart, void *other_args);
1589 static int btree_key_insert_delete_mvccid (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key,
1590  PAGE_PTR leaf_page, BTREE_SEARCH_KEY_HELPER * search_key,
1591  BTREE_INSERT_HELPER * insert_helper, RECDES * leaf_record,
1592  PAGE_PTR object_page, int offset_to_found_object);
1594  PAGE_PTR * leaf, bool * restart,
1595  BTREE_SEARCH_KEY_HELPER * search_key,
1596  BTREE_INSERT_HELPER * insert_helper, RECDES * leaf_record);
1597 static int btree_key_append_object_unique (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key, PAGE_PTR leaf,
1598  BTREE_SEARCH_KEY_HELPER * search_key, RECDES * leaf_record,
1599  LEAF_REC * leaf_record_info, int offset_after_key,
1600  BTREE_INSERT_HELPER * insert_helper, BTREE_OBJECT_INFO * first_object);
1602  PAGE_PTR leaf, BTREE_SEARCH_KEY_HELPER * search_key,
1603  RECDES * leaf_record, int offset_after_key, LEAF_REC * leaf_info,
1604  BTREE_OBJECT_INFO * btree_obj, BTREE_INSERT_HELPER * insert_helper);
1606  PAGE_PTR leaf, BTREE_SEARCH_KEY_HELPER * search_key, RECDES * leaf_record,
1607  LEAF_REC * leaf_record_info, int offset_after_key,
1608  BTREE_INSERT_HELPER * insert_helper);
1610  PAGE_PTR leaf, BTREE_SEARCH_KEY_HELPER * search_key, RECDES * leaf_record,
1611  LEAF_REC * leaf_record_info, BTREE_INSERT_HELPER * insert_helper,
1612  BTREE_OBJECT_INFO * append_object);
1613 #if defined (SERVER_MODE)
1614 static bool btree_key_insert_does_leaf_need_split (THREAD_ENTRY * thread_p, BTID_INT * btid_int, PAGE_PTR leaf_page,
1615  BTREE_INSERT_HELPER * insert_helper,
1616  BTREE_SEARCH_KEY_HELPER * search_key);
1617 #endif /* SERVER_MODE */
1618 #if !defined (NDEBUG)
1619 static void btree_key_record_check_no_visible (THREAD_ENTRY * thread_p, BTID_INT * btid_int, PAGE_PTR leaf_page,
1620  PGSLOTID slotid);
1621 #endif /* !NDEBUG */
1622 
1623 static int btree_delete_internal (THREAD_ENTRY * thread_p, BTID * btid, OID * oid, OID * class_oid,
1624  BTREE_MVCC_INFO * mvcc_info, DB_VALUE * key, OR_BUF * buffered_key, int *unique,
1625  int op_type, btree_unique_stats * unique_stat_info, BTREE_MVCC_INFO * match_mvccinfo,
1626  LOG_LSA * undo_nxlsa, BTREE_OBJECT_INFO * second_obj_info, BTREE_OP_PURPOSE purpose);
1627 static int btree_fix_root_for_delete (THREAD_ENTRY * thread_p, BTID * btid, BTID_INT * btid_int, DB_VALUE * key,
1628  PAGE_PTR * root_page, bool * is_leaf, BTREE_SEARCH_KEY_HELPER * search_key,
1629  bool * stop, bool * restart, void *other_args);
1630 static int btree_merge_node_and_advance (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key,
1631  PAGE_PTR * crt_page, PAGE_PTR * advance_to_page, bool * is_leaf,
1632  BTREE_SEARCH_KEY_HELPER * search_key, bool * stop, bool * restart,
1633  void *other_args);
1634 static int btree_key_delete_remove_object (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key,
1635  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
1636  void *other_args);
1638  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key,
1639  bool * restart, void *other_args);
1641  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR leaf_page,
1642  RECDES * leaf_record, BTREE_SEARCH_KEY_HELPER * search_key,
1643  OID * last_oid, OID * last_class_oid,
1644  BTREE_MVCC_INFO * last_mvcc_info, int offset_to_last_object);
1645 static int btree_record_remove_object (THREAD_ENTRY * thread_p, BTID_INT * btid_int,
1646  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR page, RECDES * record,
1647  BTREE_SEARCH_KEY_HELPER * search_key, BTREE_NODE_TYPE node_type,
1648  int offset_to_object, LOG_DATA_ADDR * addr);
1649 static void btree_record_remove_object_internal (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record,
1650  BTREE_NODE_TYPE node_type, int offset_to_object, char **rv_undo_data,
1651  char **rv_redo_data, int *displacement);
1652 static int btree_key_remove_object (THREAD_ENTRY * thread_p, DB_VALUE * key, BTID_INT * btid_int,
1653  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR leaf_page, RECDES * leaf_record,
1654  LEAF_REC * leaf_info, int offset_after_key, BTREE_SEARCH_KEY_HELPER * search_key,
1655  PAGE_PTR * overflow_page, PAGE_PTR prev_page, BTREE_NODE_TYPE node_type,
1656  int offset_to_object);
1657 static int btree_overflow_remove_object (THREAD_ENTRY * thread_p, DB_VALUE * key, BTID_INT * btid_int,
1658  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR * overflow_page,
1659  PAGE_PTR prev_page, PAGE_PTR leaf_page, RECDES * leaf_record,
1660  BTREE_SEARCH_KEY_HELPER * search_key, int offset_to_object);
1661 static int btree_leaf_remove_object (THREAD_ENTRY * thread_p, DB_VALUE * key, BTID_INT * btid_int,
1662  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR leaf_page, RECDES * leaf_record,
1663  LEAF_REC * leaf_rec_info, int offset_after_key,
1664  BTREE_SEARCH_KEY_HELPER * search_key, int offset_to_object);
1665 static int btree_key_remove_insert_mvccid (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key,
1666  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
1667  void *other_args);
1668 static void btree_record_remove_insid (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record,
1669  BTREE_NODE_TYPE node_type, int offset_to_object, char **rv_undo_data,
1670  char **rv_redo_data, int *displacement);
1671 static int btree_key_remove_delete_mvccid (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key,
1672  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
1673  void *other_args);
1674 static void btree_record_remove_delid (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record,
1675  BTREE_NODE_TYPE node_type, int offset_to_object, char **rv_undo_data,
1676  char **rv_redo_data);
1677 static void btree_record_add_delid (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record,
1678  BTREE_NODE_TYPE node_type, int offset_to_object, MVCCID delete_mvccid,
1679  char **rv_undo_data, char **rv_redo_data);
1680 static int btree_undo_mvcc_delete (THREAD_ENTRY * thread_p, BTID * btid, OR_BUF * buffered_key, OID * oid,
1681  OID * class_oid, BTREE_MVCC_INFO * match_mvccinfo, LOG_LSA * undo_nxlsa);
1682 static int btree_undo_insert_object (THREAD_ENTRY * thread_p, BTID * btid, OR_BUF * buffered_key, OID * oid,
1683  OID * class_oid, MVCCID insert_mvccid, LOG_LSA * undo_nxlsa);
1685  BTREE_OBJECT_INFO * inserted_object,
1686  BTREE_OBJECT_INFO * second_object, MVCCID insert_mvccid,
1687  LOG_LSA * undo_nxlsa);
1689  BTREE_DELETE_HELPER * delete_helper,
1690  BTREE_SEARCH_KEY_HELPER * search_key, PAGE_PTR leaf_page,
1691  RECDES * leaf_record, PAGE_PTR overflow_page,
1692  RECDES * overflow_record, BTREE_NODE_TYPE node_type,
1693  int offset_to_object);
1695  BTREE_DELETE_HELPER * helper, PAGE_PTR leaf_page,
1696  RECDES * leaf_record, BTREE_NODE_TYPE node_type,
1697  PAGE_PTR overflow_page, RECDES * overflow_record,
1698  int offset_to_object, char **rv_undo_data, char **rv_redo_data);
1700  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR page,
1701  RECDES * record, PGSLOTID slotid, BTREE_NODE_TYPE node_type,
1702  int offset_to_object);
1704  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR overflow_page,
1705  RECDES * overflow_record, int *offset_to_replaced_object,
1706  BTREE_OBJECT_INFO * replacing_object);
1707 static void btree_record_replace_object (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record,
1708  BTREE_NODE_TYPE node_type, int *offset_to_replaced,
1709  BTREE_OBJECT_INFO * replacement, char **rv_undo_data, char **rv_redo_data);
1710 
1711 static int btree_rv_record_modify_internal (THREAD_ENTRY * thread_p, LOG_RCV * rcv, bool is_undo);
1712 static int btree_delete_postponed (THREAD_ENTRY * thread_p, BTID * btid, OR_BUF * buffered_key,
1713  BTREE_OBJECT_INFO * btree_obj, MVCCID tran_mvccid, LOG_LSA * reference_lsa);
1714 
1715 static MVCCID btree_get_creator_mvccid (THREAD_ENTRY * thread_p, PAGE_PTR root_page);
1716 static int btree_seq_find_oid_from_ovfl (THREAD_ENTRY * thread_p, BTID_INT * btid_int, OID * oid, RECDES * ovf_record,
1717  char *initial_oid_ptr, char *oid_ptr_lower_bound, char *oid_ptr_upper_bound,
1719  int *offset_to_object, BTREE_MVCC_INFO * mvcc_info);
1720 
1727 
1728 static bool btree_is_class_oid_packed (BTID_INT * btid_int, RECDES * record, BTREE_NODE_TYPE node_type, bool is_first);
1729 static bool btree_is_fixed_size (BTID_INT * btid_int, RECDES * record, BTREE_NODE_TYPE node_type, bool is_first);
1735 static void btree_rv_log_delete_object (THREAD_ENTRY * thread_p, const BTREE_DELETE_HELPER & delete_helper,
1736  LOG_DATA_ADDR & addr, int undo_length, int redo_length, const char *undo_data,
1737  const char *redo_data);
1738 static void btree_rv_log_insert_object (THREAD_ENTRY * thread_p, const BTREE_INSERT_HELPER & insert_helper,
1739  LOG_DATA_ADDR & addr, int undo_length, int redo_length, const char *undo_data,
1740  const char *redo_data);
1741 
1742 static inline void btree_online_index_check_state (MVCCID state);
1743 static inline bool btree_online_index_is_insert_flag_state (MVCCID state);
1744 static inline bool btree_online_index_is_delete_flag_state (MVCCID state);
1745 static inline bool btree_online_index_is_normal_state (MVCCID state);
1746 static inline void btree_online_index_set_insert_flag_state (MVCCID & state);
1747 static inline void btree_online_index_set_delete_flag_state (MVCCID & state);
1748 static inline void btree_online_index_set_normal_state (MVCCID & state);
1749 static void btree_online_index_change_state (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record,
1750  BTREE_NODE_TYPE node_type, int offset_to_object, MVCCID new_state,
1751  char **rv_undo_data, char **rv_redo_data);
1752 
1753 static int btree_find_oid_with_page_and_record (THREAD_ENTRY * thread_p, BTID_INT * btid_int, OID * oid,
1754  PAGE_PTR leaf_page, BTREE_OP_PURPOSE purpose,
1755  BTREE_MVCC_INFO * match_mvccinfo, RECDES * record, LEAF_REC * leaf_info,
1756  int offset_after_key, PAGE_PTR * found_page, PAGE_PTR * prev_page,
1757  int *offset_to_object, BTREE_MVCC_INFO * object_mvcc_info,
1758  RECDES * new_record);
1759 
1761  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key,
1762  bool * restart, void *other_args);
1763 
1765  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key,
1766  bool * restart, void *other_args);
1767 
1769  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key,
1770  bool * restart, void *other_args);
1771 
1772 static inline void btree_insert_helper_to_delete_helper (BTREE_INSERT_HELPER * insert_helper,
1773  BTREE_DELETE_HELPER * delete_helper);
1774 static inline void btree_delete_helper_to_insert_helper (BTREE_DELETE_HELPER * delete_helper,
1775  BTREE_INSERT_HELPER * insert_helper);
1776 
1778 static bool btree_is_single_object_key (THREAD_ENTRY * thread_p, BTID_INT * btid_int, BTREE_NODE_TYPE node_type,
1779  RECDES * record, int offset_after_key);
1780 
1781 static bool btree_check_locking_for_insert_unique (THREAD_ENTRY * thread_p, const BTREE_INSERT_HELPER * insert_helper);
1782 static bool btree_check_locking_for_delete_unique (THREAD_ENTRY * thread_p, const BTREE_DELETE_HELPER * delete_helper);
1783 
1784 /*
1785  * btree_fix_root_with_info () - Fix b-tree root page and output its VPID, header and b-tree info if requested.
1786  *
1787  * return : Root page pointer or NULL if fix failed.
1788  * thread_p (in) : Thread entry.
1789  * btid (in) : B-tree identifier.
1790  * latch_mode (in) : Page latch mode.
1791  * root_vpid (out) : Output VPID of root page if not NULL.
1792  * root_header_p (out) : Output root header if not NULL.
1793  * btid_int_p (out) : Output b-tree data if not NULL.
1794  */
1796 btree_fix_root_with_info (THREAD_ENTRY * thread_p, BTID * btid, PGBUF_LATCH_MODE latch_mode, VPID * root_vpid_p,
1797  BTREE_ROOT_HEADER ** root_header_p, BTID_INT * btid_int_p)
1798 {
1799  PAGE_PTR root_page = NULL;
1800  VPID vpid;
1801  BTREE_ROOT_HEADER *root_header = NULL;
1802 
1803  /* Assert expected arguments. */
1804  assert (btid != NULL);
1805 
1806  /* Get root page VPID. */
1807  if (root_vpid_p == NULL)
1808  {
1809  root_vpid_p = &vpid;
1810  }
1811  root_vpid_p->pageid = btid->root_pageid;
1812  root_vpid_p->volid = btid->vfid.volid;
1813 
1814  /* Fix root page. */
1815  root_page = pgbuf_fix (thread_p, root_vpid_p, OLD_PAGE, latch_mode, PGBUF_UNCONDITIONAL_LATCH);
1816  if (root_page == NULL)
1817  {
1818  /* Failed fixing root page. */
1819  ASSERT_ERROR ();
1820  return NULL;
1821  }
1822 
1823  /* Get root header */
1824  root_header = btree_get_root_header (thread_p, root_page);
1825  if (root_header == NULL)
1826  {
1827  /* Error getting root header. */
1828  assert (false);
1829  pgbuf_unfix (thread_p, root_page);
1830  return NULL;
1831  }
1832  if (root_header_p != NULL)
1833  {
1834  /* Output root header. */
1835  *root_header_p = root_header;
1836  }
1837 
1838  if (btid_int_p != NULL)
1839  {
1840  /* Get b-tree info. */
1841  btid_int_p->sys_btid = btid;
1842  if (btree_glean_root_header_info (thread_p, root_header, btid_int_p) != NO_ERROR)
1843  {
1844  assert (false);
1845  pgbuf_unfix (thread_p, root_page);
1846  return NULL;
1847  }
1848  }
1849 
1850  /* Return fixed root page. */
1851  return root_page;
1852 }
1853 
1854 /*
1855  * btree_is_fence_key () - Return whether the key is fence or not.
1856  *
1857  * return : True if key is fence.
1858  * leaf_page (in) : Leaf node page.
1859  * slotid (in) : Key slot ID.
1860  */
1861 STATIC_INLINE bool
1863 {
1864  SPAGE_SLOT *slotp = NULL;
1865  OID first_oid;
1866 
1867  assert (leaf_page != NULL);
1868 
1869  /* Get slot. */
1870  slotp = spage_get_slot (leaf_page, slotid);
1871  if (slotp == NULL)
1872  {
1873  /* Unexpected error. */
1874  assert (false);
1875  return false;
1876  }
1877  assert (slotp->offset_to_record > 0 && slotp->offset_to_record < (unsigned int) DB_PAGESIZE);
1878  assert (slotp->record_type == REC_HOME);
1879  assert (slotp->record_length >= OR_OID_SIZE);
1880 
1881  (void) or_unpack_oid (leaf_page + slotp->offset_to_record, &first_oid);
1882 
1883  /* Return if fence record flag is set. */
1885 }
1886 
1887 #if !defined(NDEBUG)
1888 /*
1889  * btree_get_node_level () -
1890  *
1891  * return:
1892  * page_ptr(in):
1893  *
1894  */
1895 static int
1897 {
1898  BTREE_NODE_HEADER *header = NULL;
1899 
1900  header = btree_get_node_header (thread_p, page_ptr);
1901  if (header == NULL)
1902  {
1903  return -1;
1904  }
1905 
1906  assert (header->node_level > 0);
1907 
1908  return header->node_level;
1909 }
1910 #endif
1911 
1912 /*
1913  * btree_clear_key_value () -
1914  * return: cleared flag
1915  * clear_flag (in/out):
1916  * key_value (in/out):
1917  */
1918 bool
1919 btree_clear_key_value (bool * clear_flag, DB_VALUE * key_value)
1920 {
1921  if (*clear_flag == true || key_value->need_clear == true)
1922  {
1923  pr_clear_value (key_value);
1924  *clear_flag = false;
1925  }
1926  // also set null
1927  db_make_null (key_value);
1928  return *clear_flag;
1929 }
1930 
1931 /*
1932  * btree_init_temp_key_value () -
1933  * return: void
1934  * clear_flag (in/out):
1935  * key_value (in/out):
1936  */
1937 void
1938 btree_init_temp_key_value (bool * clear_flag, DB_VALUE * key_value)
1939 {
1940  db_make_null (key_value);
1941  *clear_flag = false;
1942 }
1943 
1944 /*
1945  * btree_create_overflow_key_file () - Create file for overflow keyes
1946  *
1947  * return : Error code
1948  * btid (in): B-tree info
1949  *
1950  * Note: An overflow key file is created (permanently) and the VFID is written to the root header for the btree.
1951  */
1952 int
1954 {
1955  FILE_DESCRIPTORS des;
1956  int error_code = NO_ERROR;
1957  TDE_ALGORITHM tde_algo = TDE_ALGORITHM_NONE;
1958 
1959  VFID_SET_NULL (&btid->ovfid);
1960 
1961  /* initialize description of overflow heap file */
1962  memset (&des, 0, sizeof (des));
1963  des.btree_key_overflow.btid = *btid->sys_btid; /* structure copy */
1966  /* create file with at least 3 pages */
1967  error_code = file_create_with_npages (thread_p, FILE_BTREE_OVERFLOW_KEY, 3, &des, &btid->ovfid);
1968  if (error_code != NO_ERROR)
1969  {
1970  return error_code;
1971  }
1972  error_code = heap_get_class_tde_algorithm (thread_p, &btid->topclass_oid, &tde_algo);
1973  if (error_code != NO_ERROR)
1974  {
1975  VFID_SET_NULL (&btid->ovfid);
1976  return error_code;
1977  }
1978  error_code = file_apply_tde_algorithm (thread_p, &btid->ovfid, tde_algo);
1979  if (error_code != NO_ERROR)
1980  {
1981  VFID_SET_NULL (&btid->ovfid);
1982  return error_code;
1983  }
1984  return error_code;
1985 }
1986 
1987 /*
1988  * btree_store_overflow_key () -
1989  * return: NO_ERROR
1990  * btid(in): B+tree index identifier
1991  * key(in): Pointer to the overflow key memory area
1992  * size(in): Overflow key memory area size
1993  * node_type(in): Type of node
1994  * first_overflow_page_vpid(out): Set to the first overflow key page identifier
1995  *
1996  * Note: The overflow key given is stored in a chain of pages.
1997  */
1998 static int
1999 btree_store_overflow_key (THREAD_ENTRY * thread_p, BTID_INT * btid, DB_VALUE * key, int size, BTREE_NODE_TYPE node_type,
2000  VPID * first_overflow_page_vpid)
2001 {
2002  RECDES rec;
2003  OR_BUF buf;
2004  VFID overflow_file_vfid;
2005  int ret = NO_ERROR;
2007  PR_TYPE *pr_type;
2008  DB_TYPE src_type, dst_type;
2009  DB_VALUE new_key;
2010  DB_VALUE *key_ptr = key;
2011 
2012  assert (!VFID_ISNULL (&btid->ovfid));
2013 
2014  if (node_type == BTREE_LEAF_NODE)
2015  {
2016  tp_domain = btid->key_type;
2017  }
2018  else
2019  {
2020  tp_domain = btid->nonleaf_key_type;
2021  }
2022 
2023  pr_type = tp_domain->type;
2024 
2025  src_type = DB_VALUE_DOMAIN_TYPE (key);
2026  dst_type = pr_type->id;
2027 
2028  if (src_type != dst_type)
2029  {
2030  TP_DOMAIN_STATUS status;
2031 
2032  assert (pr_is_string_type (src_type));
2033  assert (pr_is_string_type (dst_type));
2034 
2035  key_ptr = &new_key;
2036  status = tp_value_cast (key, key_ptr, tp_domain, false);
2037  if (status != DOMAIN_COMPATIBLE)
2038  {
2039  assert (false);
2040  goto exit_on_error;
2041  }
2042 
2043  size = btree_get_disk_size_of_key (key_ptr);
2044  }
2045 
2046  overflow_file_vfid = btid->ovfid; /* structure copy */
2047 
2048  rec.area_size = size;
2049  rec.data = (char *) db_private_alloc (thread_p, size);
2050  if (rec.data == NULL)
2051  {
2053  goto exit_on_error;
2054  }
2055 
2056  or_init (&buf, rec.data, rec.area_size);
2057 
2058  if (pr_type->index_writeval (&buf, key_ptr) != NO_ERROR)
2059  {
2060  goto exit_on_error;
2061  }
2062 
2063  rec.length = (int) (buf.ptr - buf.buffer);
2064 
2065  if (overflow_insert (thread_p, &overflow_file_vfid, first_overflow_page_vpid, &rec, FILE_BTREE_OVERFLOW_KEY)
2066  != NO_ERROR)
2067  {
2068  ASSERT_ERROR ();
2069  goto exit_on_error;
2070  }
2071 
2072  if (rec.data)
2073  {
2074  db_private_free_and_init (thread_p, rec.data);
2075  }
2076 
2077  if (key_ptr != key)
2078  {
2079  pr_clear_value (key_ptr);
2080  }
2081 
2082  return ret;
2083 
2084 exit_on_error:
2085 
2086  if (rec.data)
2087  {
2088  db_private_free_and_init (thread_p, rec.data);
2089  }
2090 
2091  if (key_ptr != key)
2092  {
2093  pr_clear_value (key_ptr);
2094  }
2095 
2096  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
2097 }
2098 
2099 /*
2100  * btree_load_overflow_key () -
2101  * return: NO_ERROR
2102  * btid(in): B+tree index identifier
2103  * first_overflow_page_vpid(in): Overflow key first page identifier
2104  * key(out): Set to the overflow key memory area
2105  *
2106  * Note: The overflow key is loaded from the pages.
2107  */
2108 static int
2109 btree_load_overflow_key (THREAD_ENTRY * thread_p, BTID_INT * btid, VPID * first_overflow_page_vpid, DB_VALUE * key,
2110  BTREE_NODE_TYPE node_type)
2111 {
2112  RECDES rec;
2113  OR_BUF buf;
2114  PR_TYPE *pr_type;
2115  int ret = NO_ERROR;
2116 
2117  if (node_type == BTREE_LEAF_NODE)
2118  {
2119  pr_type = btid->key_type->type;
2120  }
2121  else
2122  {
2123  pr_type = btid->nonleaf_key_type->type;
2124  }
2125 
2126  rec.area_size = overflow_get_length (thread_p, first_overflow_page_vpid);
2127  if (rec.area_size == -1)
2128  {
2129  return ER_FAILED;
2130  }
2131 
2132  rec.data = (char *) db_private_alloc (thread_p, rec.area_size);
2133  if (rec.data == NULL)
2134  {
2136  goto exit_on_error;
2137  }
2138 
2139  if (overflow_get (thread_p, first_overflow_page_vpid, &rec, NULL) != S_SUCCESS)
2140  {
2141  goto exit_on_error;
2142  }
2143 
2144  or_init (&buf, rec.data, rec.length);
2145 
2146  /* we always copy overflow keys */
2147  if (pr_type->index_readval (&buf, key, btid->key_type, -1, true, NULL, 0) != NO_ERROR)
2148  {
2149  goto exit_on_error;
2150  }
2151 
2152  if (rec.data)
2153  {
2154  db_private_free_and_init (thread_p, rec.data);
2155  }
2156 
2157  return NO_ERROR;
2158 
2159 exit_on_error:
2160 
2161  if (rec.data)
2162  {
2163  db_private_free_and_init (thread_p, rec.data);
2164  }
2165 
2166  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
2167 }
2168 
2169 /*
2170  * btree_delete_overflow_key () -
2171  * return: NO_ERROR
2172  * btid(in): B+tree index identifier
2173  * page_ptr(in): Page that contains the overflow key
2174  * slot_id(in): Slot that contains the overflow key
2175  * node_type(in): Leaf or NonLeaf page
2176  *
2177  * Note: The overflow key is deleted. This routine will not delete the btree slot containing the key.
2178  */
2179 static int
2180 btree_delete_overflow_key (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr, INT16 slot_id,
2181  BTREE_NODE_TYPE node_type)
2182 {
2183  RECDES rec;
2184  VPID page_vpid;
2185  char *start_ptr;
2186  OR_BUF buf;
2187  int rc = NO_ERROR;
2188 
2189  assert (slot_id > 0);
2190 
2191  rec.area_size = -1;
2192 
2193  /* first read the record to get first page identifier */
2194  if (spage_get_record (thread_p, page_ptr, slot_id, &rec, PEEK) != S_SUCCESS)
2195  {
2196  goto exit_on_error;
2197  }
2198 
2199  /* get first page identifier */
2200  if (node_type == BTREE_LEAF_NODE)
2201  {
2202  int mvccids_size = 0;
2204 
2206  {
2207  mvccids_size += OR_MVCCID_SIZE;
2208  }
2209 
2211  {
2212  mvccids_size += OR_MVCCID_SIZE;
2213  }
2214 
2216  {
2217  start_ptr = rec.data + (2 * OR_OID_SIZE) + mvccids_size;
2218  }
2219  else
2220  {
2221  start_ptr = rec.data + OR_OID_SIZE + mvccids_size;
2222  }
2223  }
2224  else
2225  {
2226  start_ptr = rec.data + NON_LEAF_RECORD_SIZE;
2227  }
2228 
2229  or_init (&buf, start_ptr, DISK_VPID_SIZE);
2230 
2231  page_vpid.pageid = or_get_int (&buf, &rc);
2232  if (rc == NO_ERROR)
2233  {
2234  page_vpid.volid = or_get_short (&buf, &rc);
2235  }
2236  if (rc != NO_ERROR)
2237  {
2238  goto exit_on_error;
2239  }
2240 
2241  if (overflow_delete (thread_p, &(btid->ovfid), &page_vpid) == NULL)
2242  {
2243  goto exit_on_error;
2244  }
2245 
2246  return NO_ERROR;
2247 
2248 exit_on_error:
2249 
2250  return (rc == NO_ERROR && (rc = er_errid ()) == NO_ERROR) ? ER_FAILED : rc;
2251 }
2252 
2253 /*
2254  * Common utility routines
2255  */
2256 
2257 
2258 
2259 /*
2260  * btree_leaf_get_vpid_for_overflow_oids () -
2261  * return: error code or NO_ERROR
2262  * rec(in):
2263  * ovfl_vpid(out):
2264  */
2265 static int
2267 {
2268  OR_BUF buf;
2269  int rc = NO_ERROR;
2270 
2272 
2273  or_init (&buf, rec->data + rec->length - DISK_VPID_ALIGNED_SIZE, DISK_VPID_SIZE);
2274 
2275  ovfl_vpid->pageid = or_get_int (&buf, &rc);
2276  if (rc == NO_ERROR)
2277  {
2278  ovfl_vpid->volid = or_get_short (&buf, &rc);
2279  }
2280 
2281  return rc;
2282 }
2283 
2284 /*
2285  * btree_leaf_record_change_overflow_link () - Modify the link of leaf record.
2286  *
2287  * return : Void.
2288  * thread_p (in) : Thread entry.
2289  * btid_int (in) : B-tree info.
2290  * leaf_record (in) : Leaf record.
2291  * new_overflow_vpid (in) : New overflow link.
2292  * rv_undo_data_ptr (in) : If not null, outputs undo recovery data for changes made.
2293  * rv_redo_data_ptr (in) : If not null, outputs redo recovery data for changes made.
2294  */
2295 void
2297  VPID * new_overflow_vpid, char **rv_undo_data_ptr, char **rv_redo_data_ptr)
2298 {
2299  char *ovf_link_ptr = NULL;
2300  bool undo_logging = false;
2301  bool redo_logging = false;
2302 
2303  /* Assert expected arguments. */
2304  assert (btid_int != NULL);
2305  assert (leaf_record != NULL);
2306  assert (new_overflow_vpid != NULL);
2307 
2308  undo_logging = rv_undo_data_ptr != NULL && *rv_undo_data_ptr != NULL;
2309  redo_logging = rv_redo_data_ptr != NULL && *rv_redo_data_ptr != NULL;
2310 
2312  {
2313  /* Leaf record already had overflow link. */
2314  if (VPID_ISNULL (new_overflow_vpid))
2315  {
2316  /* Undo logging. */
2317  if (undo_logging)
2318  {
2319  /* Log link removal. */
2320  ovf_link_ptr = leaf_record->data + leaf_record->length - DISK_VPID_ALIGNED_SIZE;
2321  *rv_undo_data_ptr =
2322  log_rv_pack_undo_record_changes (*rv_undo_data_ptr, leaf_record->length - DISK_VPID_ALIGNED_SIZE,
2323  DISK_VPID_ALIGNED_SIZE, 0, ovf_link_ptr);
2324 
2325  /* Log clear flag. */
2326  *rv_undo_data_ptr =
2328  leaf_record->data + OR_OID_SLOTID);
2329  }
2330  /* Remove overflow VPID. */
2331  leaf_record->length -= DISK_VPID_ALIGNED_SIZE;
2332 
2333  /* Both insert and delete MVCCID's should exist. */
2335  /* TODO: Object is no longer fixed size and we may be able to remove unnecessary info. */
2336 
2337  /* Clear BTREE_LEAF_RECORD_OVERFLOW_OIDS flag. */
2339 
2340  /* Redo logging. */
2341  if (redo_logging)
2342  {
2343  /* Log link removal. */
2344  *rv_redo_data_ptr = log_rv_pack_redo_record_changes (*rv_redo_data_ptr, leaf_record->length,
2346  NULL /* just * remove. */ );
2347 
2348  /* Log clear flag. */
2349  *rv_redo_data_ptr =
2351  leaf_record->data + OR_OID_SLOTID);
2352  }
2353  }
2354  else
2355  {
2356  assert (disk_is_page_sector_reserved (thread_p, new_overflow_vpid->volid, new_overflow_vpid->pageid)
2357  != DISK_INVALID);
2358 
2359  /* Update existing link. */
2360  ovf_link_ptr = leaf_record->data + leaf_record->length - DISK_VPID_ALIGNED_SIZE;
2361 
2362  /* Undo logging. */
2363  if (undo_logging)
2364  {
2365  *rv_undo_data_ptr =
2366  log_rv_pack_undo_record_changes (*rv_undo_data_ptr, leaf_record->length - DISK_VPID_ALIGNED_SIZE,
2368  }
2369 
2370  OR_PUT_VPID_ALIGNED (ovf_link_ptr, new_overflow_vpid);
2371 
2372  /* Redo logging. */
2373  if (redo_logging)
2374  {
2375  *rv_redo_data_ptr =
2376  log_rv_pack_redo_record_changes (*rv_redo_data_ptr, leaf_record->length - DISK_VPID_ALIGNED_SIZE,
2378  }
2379  }
2380  }
2381  else
2382  {
2383  /* Leaf record didn't have overflow link. */
2384  assert (!VPID_ISNULL (new_overflow_vpid));
2385  assert (disk_is_page_sector_reserved (thread_p, new_overflow_vpid->volid, new_overflow_vpid->pageid)
2386  != DISK_INVALID);
2387 
2388  /* Undo logging for added link. */
2389  if (undo_logging)
2390  {
2391  *rv_undo_data_ptr =
2392  log_rv_pack_undo_record_changes (*rv_undo_data_ptr, leaf_record->length, 0, DISK_VPID_ALIGNED_SIZE, NULL);
2393  }
2394 
2395  /* Add overflow VPID. */
2396  ovf_link_ptr = leaf_record->data + leaf_record->length;
2397  OR_PUT_VPID_ALIGNED (ovf_link_ptr, new_overflow_vpid);
2398  /* Update record length. */
2399  leaf_record->length += DISK_VPID_ALIGNED_SIZE;
2400 
2401  /* Redo logging for added link. */
2402  if (redo_logging)
2403  {
2404  *rv_redo_data_ptr =
2405  log_rv_pack_redo_record_changes (*rv_redo_data_ptr, leaf_record->length - DISK_VPID_ALIGNED_SIZE, 0,
2406  DISK_VPID_ALIGNED_SIZE, ovf_link_ptr);
2407  }
2408 
2409  /* First object must be fixed size to provide enough space if objects are swapped from overflow. */
2410  btree_leaf_record_handle_first_overflow (thread_p, leaf_record, btid_int, rv_undo_data_ptr, rv_redo_data_ptr);
2411  }
2412 
2413 #if !defined (NDEBUG)
2414  (void) btree_check_valid_record (thread_p, btid_int, leaf_record, BTREE_LEAF_NODE, NULL);
2415 #endif /* !NDEBUG */
2416 }
2417 
2418 /*
2419  * btree_leaf_get_first_object () - Get first object of leaf record.
2420  *
2421  * return : Error code.
2422  * btid (in) : B-tree info.
2423  * recp (in) : Leaf record.
2424  * oidp (out) : First object OID.
2425  * class_oid (out) : First object class OID.
2426  * mvcc_info (out) : First object MVCC info.
2427  */
2428 int
2430 {
2431  OR_BUF record_buffer;
2432  int error_code = NO_ERROR;
2433  int dummy_offset = 0;
2434 
2435  /* Assert expected arguments. */
2436  assert (btid != NULL);
2437  assert (oidp != NULL);
2438  /* TODO: consider class_oid and mvcc_info. Should they be required? */
2439 
2440  BTREE_RECORD_OR_BUF_INIT (record_buffer, recp);
2441  error_code = btree_or_get_object (&record_buffer, btid, BTREE_LEAF_NODE, dummy_offset, oidp, class_oid, mvcc_info);
2442  /* We expect first object can be successfully obtained. */
2443  assert (error_code == NO_ERROR);
2444  return error_code;
2445 }
2446 
2447 /*
2448  * btree_get_num_visible_oids_from_all_ovf () - Get the number of visible objects according to snapshot
2449  * in all overflow pages.
2450  *
2451  * return : Error code.
2452  * thread_p (in) : Thread entry.
2453  * btid (in) : B-tree info.
2454  * first_ovfl_vpid (in) : VPID of first overflow page.
2455  * num_visible_oids (out) : The number of visible objects.
2456  * max_visible_oids (in) : Maximum allowed number of visible objects.
2457  * mvcc_snapshot (in) : Snapshot used for visibility check.
2458  */
2459 static int
2461  int *num_visible_oids, int *max_visible_oids, MVCC_SNAPSHOT * mvcc_snapshot)
2462 {
2463  RECDES ovfl_copy_rec;
2464  char ovfl_copy_rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
2465  VPID next_ovfl_vpid;
2466  PAGE_PTR ovfl_page = NULL;
2467  int ret;
2468  int num_node_visible_oids = 0;
2469  int max_page_visible_oids = 0, *p_max_page_visible_oids = NULL;
2470 
2471  if (max_visible_oids != NULL)
2472  {
2473  max_page_visible_oids = *max_visible_oids;
2474  p_max_page_visible_oids = &max_page_visible_oids;
2475  }
2476  /* not found in leaf page - search in overflow page */
2477  ovfl_page = NULL;
2478 
2479  ovfl_copy_rec.area_size = DB_PAGESIZE;
2480  ovfl_copy_rec.data = PTR_ALIGN (ovfl_copy_rec_buf, BTREE_MAX_ALIGN);
2481 
2482  /* Assert expected arguments. */
2483  assert (btid != NULL);
2484  assert (num_visible_oids != NULL);
2485  assert (first_ovfl_vpid != NULL);
2486 
2487  *num_visible_oids = 0;
2488  ovfl_page = NULL;
2489  next_ovfl_vpid = *first_ovfl_vpid;
2490  /* search for OID into overflow page */
2491  while (!VPID_ISNULL (&next_ovfl_vpid))
2492  {
2493  ovfl_page = pgbuf_fix (thread_p, &next_ovfl_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
2494  if (ovfl_page == NULL)
2495  {
2496  ASSERT_ERROR_AND_SET (ret);
2497  goto error;
2498  }
2499 
2500  (void) pgbuf_check_page_ptype (thread_p, ovfl_page, PAGE_BTREE);
2501 
2502  if (spage_get_record (thread_p, ovfl_page, 1, &ovfl_copy_rec, COPY) != S_SUCCESS)
2503  {
2504  goto error;
2505  }
2506  assert (ovfl_copy_rec.length % 4 == 0);
2507 
2508  ret =
2509  btree_record_get_num_visible_oids (thread_p, btid, &ovfl_copy_rec, 0, BTREE_OVERFLOW_NODE,
2510  p_max_page_visible_oids, mvcc_snapshot, num_visible_oids);
2511  if (ret != NO_ERROR)
2512  {
2513  goto error;
2514  }
2515  (*num_visible_oids) += num_node_visible_oids;
2516 
2517  if (max_visible_oids)
2518  {
2519  if ((*num_visible_oids) >= (*max_visible_oids))
2520  {
2521  pgbuf_unfix_and_init (thread_p, ovfl_page);
2522  return NO_ERROR;
2523  }
2524 
2525  /* update remaining visible oids to search for */
2526  max_page_visible_oids -= num_node_visible_oids;
2527  }
2528 
2529  btree_get_next_overflow_vpid (thread_p, ovfl_page, &next_ovfl_vpid);
2530  pgbuf_unfix_and_init (thread_p, ovfl_page);
2531  }
2532 
2533  return NO_ERROR;
2534 
2535 error:
2536 
2537  if (ovfl_page != NULL)
2538  {
2539  pgbuf_unfix_and_init (thread_p, ovfl_page);
2540  }
2541 
2542  ret = er_errid ();
2543  if (ret == NO_ERROR)
2544  {
2545  ret = ER_FAILED;
2546  }
2547 
2548  return ret;
2549 }
2550 
2551 /*
2552  * btree_get_num_visible_from_leaf_and_ovf () - Get the number of visible objects in record.
2553  *
2554  * return : error_code .
2555  * thread_p (in) : Thread entry.
2556  * btid_int (in) : B-tree info.
2557  * leaf_record (in) : Leaf record descriptor.
2558  * offset_after_key (in) : Offset to where packed key is ended.
2559  * leaf_info (in) : Leaf record information (VPID of first overflow).
2560  * max_visible_oids (in) : Non-null value if there is limit of objects to count.
2561  * If limit is reached, counting is stopped and current count is returned.
2562  * mvcc_snapshot (in) : Snapshot for visibility test.
2563  * num_visible(out) : Number of visible items.
2564  */
2565 int
2567  int offset_after_key, LEAF_REC * leaf_info, int *max_visible_oids,
2568  MVCC_SNAPSHOT * mvcc_snapshot, int *num_visible)
2569 {
2570  int error_code = NO_ERROR; /* Error code. */
2571  int num_ovf_visible = 0; /* Overflow pages visible objects count. */
2572 
2573  *num_visible = 0;
2574 
2575  /* Get number of visible objects from leaf record. */
2576  error_code = btree_record_get_num_visible_oids (thread_p, btid_int, leaf_record, offset_after_key, BTREE_LEAF_NODE,
2577  max_visible_oids, mvcc_snapshot, num_visible);
2578  if (error_code != NO_ERROR)
2579  {
2580  /* Error occurred */
2581  ASSERT_ERROR ();
2582  return error_code;
2583  }
2584 
2585  if (max_visible_oids != NULL)
2586  {
2587  (*max_visible_oids) -= *num_visible;
2588  if (*max_visible_oids <= 0)
2589  {
2590  /* The maximum count of visible objects has been reached. Stop now. */
2591  return NO_ERROR;
2592  }
2593  }
2594 
2595  /* Get number of visible objects from overflow. */
2596  if (!VPID_ISNULL (&leaf_info->ovfl))
2597  {
2598  error_code = btree_get_num_visible_oids_from_all_ovf (thread_p, btid_int, &leaf_info->ovfl, &num_ovf_visible,
2599  max_visible_oids, mvcc_snapshot);
2600  if (error_code != NO_ERROR)
2601  {
2602  ASSERT_ERROR ();
2603  return error_code;
2604  }
2605 
2606  /* Safe guard. */
2607  assert (num_ovf_visible >= 0);
2608  }
2609 
2610  /* Return result */
2611  *num_visible = *num_visible + num_ovf_visible;
2612 
2613  return NO_ERROR;
2614 }
2615 
2616 /*
2617  * btree_record_get_num_visible_oids () - get number of visible OIDS
2618  * return: error code.
2619  * thread_p(in): thread entry
2620  * btid(in): B+tree index identifier
2621  * rec(in): record descriptor
2622  * oid_offset(in): OID offset
2623  * node_type(in): node type
2624  * max_visible_oids(in): max visible oids to search for
2625  * mvcc_snapshot(in): MVCC snapshot
2626  * num_visible(out): Number of visible oids.
2627  */
2628 static int
2630  BTREE_NODE_TYPE node_type, int *max_visible_oids, MVCC_SNAPSHOT * mvcc_snapshot,
2631  int *num_visible)
2632 {
2633  int mvcc_flags = 0, rec_oid_cnt = 0, length = 0;
2636  OR_BUF buf;
2637  bool is_first = true;
2638  int error_code = NO_ERROR;
2639 
2640  *num_visible = -1;
2641 
2642  /* Assert expected arguments. */
2643  assert (btid != NULL);
2644  assert (rec != NULL);
2645  assert (oid_offset >= 0);
2646  assert (node_type != BTREE_NON_LEAF_NODE);
2647 
2648  if (mvcc_snapshot == NULL)
2649  {
2650  error_code = ER_FAILED;
2651  return error_code;
2652  }
2653  length = rec->length;
2655  {
2656  length -= DB_ALIGN (DISK_VPID_SIZE, INT_ALIGNMENT);
2657  }
2658 
2659  or_init (&buf, rec->data, length);
2660  while (buf.ptr < buf.endptr)
2661  {
2662  /* Get MVCC flags */
2663  mvcc_flags = btree_record_object_get_mvcc_flags (buf.ptr);
2664 
2665  /* Skip object OID */
2666  error_code = or_advance (&buf, OR_OID_SIZE);
2667  if (error_code != NO_ERROR)
2668  {
2669  return error_code;
2670  }
2671 
2672  if (BTREE_IS_UNIQUE (btid->unique_pk)
2673  && (node_type == BTREE_OVERFLOW_NODE || !is_first || btree_leaf_is_flaged (rec, BTREE_LEAF_RECORD_CLASS_OID)))
2674  {
2675  /* Skip class OID */
2676  error_code = or_advance (&buf, OR_OID_SIZE);
2677  if (error_code != NO_ERROR)
2678  {
2679  return error_code;
2680  }
2681  }
2682 
2683  /* Get MVCC information */
2684  error_code = btree_or_get_mvccinfo (&buf, &mvcc_info, mvcc_flags);
2685  if (error_code != NO_ERROR)
2686  {
2687  return error_code;
2688  }
2689  /* TODO */
2690  /* Isn't it better to create snapshot function for BTREE_MVCC_INFO? */
2691  btree_mvcc_info_to_heap_mvcc_header (&mvcc_info, &mvcc_rec_header);
2692 
2693  /* Check snapshot */
2694  if (mvcc_snapshot->snapshot_fnc (thread_p, &mvcc_rec_header, mvcc_snapshot) == SNAPSHOT_SATISFIED)
2695  {
2696  /* Satisfies snapshot so counter must be incremented */
2697  rec_oid_cnt++;
2698  }
2699 
2700  if (max_visible_oids != NULL)
2701  {
2702  if (rec_oid_cnt >= *max_visible_oids)
2703  {
2704  *num_visible = rec_oid_cnt;
2705  return error_code;
2706  }
2707  }
2708 
2709  if (node_type == BTREE_LEAF_NODE && is_first)
2710  {
2711  /* Must skip over the key value to the next object */
2712  or_seek (&buf, oid_offset);
2713  }
2714  is_first = false;
2715  }
2716 
2717  *num_visible = rec_oid_cnt;
2718 
2719  return error_code;
2720 }
2721 
2722 /*
2723  * btree_record_get_num_oids () - Compute the total number of objects in a leaf or overflow record.
2724  *
2725  * return : Number of objects or error code.
2726  * thread_p (in) : Thread entry.
2727  * btid_int (in) : B-tree info.
2728  * rec (in) : Record descriptor.
2729  * after_key_offset (in) : Offset where packed key ends. Is used only in case of leaf records.
2730  * node_type (in) : Leaf/overflow node type.
2731  */
2732 static int
2733 btree_record_get_num_oids (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * rec, int after_key_offset,
2734  BTREE_NODE_TYPE node_type)
2735 {
2736  int rec_oid_cnt;
2737  short mvcc_flag;
2738  OR_BUF buf;
2739  int fixed_object_size;
2740 
2741  assert (rec != NULL && after_key_offset >= 0 && node_type != BTREE_NON_LEAF_NODE);
2742 
2743  if (node_type == BTREE_LEAF_NODE)
2744  {
2745  /* Leaf record. */
2746 
2747  /* There is one object before key. */
2748  rec_oid_cnt = 1;
2749 
2750  BTREE_RECORD_OR_BUF_INIT (buf, rec);
2751  or_seek (&buf, after_key_offset);
2752  /* Pointing after key. */
2753 
2754  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
2755  {
2756  /* All remaining objects are fixed size. It is enough to divide remaining record size by object size. */
2757  fixed_object_size = BTREE_OBJECT_FIXED_SIZE (btid_int);
2758  /* Safe guard */
2759  assert ((CAST_BUFLEN (buf.endptr - buf.ptr) % fixed_object_size) == 0);
2760  rec_oid_cnt += CAST_BUFLEN (buf.endptr - buf.ptr) / fixed_object_size;
2761  return rec_oid_cnt;
2762  }
2763  /* Not unique. Objects have variable size. Count them manually. */
2764 
2765  /* Count oids */
2766  while (buf.ptr < buf.endptr)
2767  {
2768  mvcc_flag = btree_record_object_get_mvcc_flags (buf.ptr);
2770  rec_oid_cnt++;
2771  }
2772  assert (buf.ptr == buf.endptr);
2773  return rec_oid_cnt;
2774  }
2775 
2776  /* Overflow. Object have fixed size. */
2777  fixed_object_size = BTREE_OBJECT_FIXED_SIZE (btid_int);
2778  assert (rec->length % fixed_object_size == 0);
2779  rec_oid_cnt = CEIL_PTVDIV (rec->length, fixed_object_size);
2780  return rec_oid_cnt;
2781 }
2782 
2783 /*
2784  * btree_leaf_change_first_object () - Replace first object in record with given object.
2785  *
2786  * return : Void
2787  * thread_p (in) : thread entry
2788  * recp (in/out) : B-tree leaf record.
2789  * btid (in) : B-tree info.
2790  * oidp (in) : Replacing instance OID.
2791  * class_oidp (in) : Replacing class OID.
2792  * mvcc_info (in) : Replacing MVCC info.
2793  * key_offset (in) : Output new offset to key.
2794  * rv_undo_data_ptr (out) : If not NULL, output undo logging of this change.
2795  * rv_redo_data_ptr (out) : If not NULL, output redo logging of this change.
2796  */
2797 void
2798 btree_leaf_change_first_object (THREAD_ENTRY * thread_p, RECDES * recp, BTID_INT * btid, OID * oidp, OID * class_oidp,
2799  BTREE_MVCC_INFO * mvcc_info, int *key_offset, char **rv_undo_data_ptr,
2800  char **rv_redo_data_ptr)
2801 {
2802  short old_rec_flag = 0, new_rec_flag = 0, mvcc_flags = 0;
2803  int old_object_size, new_object_size;
2804  bool new_has_insid = false, new_has_delid = false;
2805  bool new_has_class_oid = false;
2806  OR_BUF buffer;
2807  BTREE_MVCC_INFO local_mvcc_info;
2808 
2809  /* Get old record flags */
2810  old_rec_flag = btree_leaf_get_flag (recp);
2811  /* Initialize new record flags same as old record flags */
2812  new_rec_flag = old_rec_flag;
2813 
2814  /* Get the size of old object */
2816  {
2817  /* Also class OID is saved */
2818  old_object_size = 2 * OR_OID_SIZE;
2819  }
2820  else
2821  {
2822  old_object_size = OR_OID_SIZE;
2823  }
2824 
2825  /* Compute the required size for the new object */
2826  if (BTREE_IS_UNIQUE (btid->unique_pk) && !OID_EQ (&btid->topclass_oid, class_oidp)
2827  /* NULL class OID is considered topclass_oid. */
2828  && !OID_ISNULL (class_oidp))
2829  {
2830  /* Also class OID is saved */
2831  new_object_size = 2 * OR_OID_SIZE;
2832  /* Add BTREE_LEAF_RECORD_CLASS_OID flag */
2833  new_rec_flag |= BTREE_LEAF_RECORD_CLASS_OID;
2834  new_has_class_oid = true;
2835  }
2836  else
2837  {
2838  new_object_size = OR_OID_SIZE;
2839  /* Clear BTREE_LEAF_RECORD_CLASS_OID flag */
2840  new_rec_flag &= ~BTREE_LEAF_RECORD_CLASS_OID;
2841  }
2842 
2843  /* insert/delete MVCCID's may be present or may be added */
2844  if (mvcc_info == NULL)
2845  {
2846  /* Use empty MVCC info */
2847  mvcc_info = &local_mvcc_info;
2848  mvcc_info->flags = 0;
2849  }
2850 
2851  if (old_rec_flag & BTREE_LEAF_RECORD_OVERFLOW_OIDS)
2852  {
2853  /* First object must have fixed size */
2854  old_object_size += 2 * OR_MVCCID_SIZE;
2855  new_object_size += 2 * OR_MVCCID_SIZE;
2856  new_has_insid = true;
2857  new_has_delid = true;
2858  BTREE_MVCC_INFO_SET_FIXED_SIZE (mvcc_info);
2859  if (BTREE_IS_UNIQUE (btid->unique_pk) && !new_has_class_oid)
2860  {
2861  /* Fixed size is required, so force adding class OID also */
2862  new_rec_flag |= BTREE_LEAF_RECORD_CLASS_OID;
2863  new_object_size += OR_OID_SIZE;
2864  new_has_class_oid = true;
2865  }
2866  new_rec_flag |= BTREE_LEAF_RECORD_OVERFLOW_OIDS;
2867  }
2868  else
2869  {
2870  /* Check new MVCCID's */
2871  if (BTREE_MVCC_INFO_HAS_INSID (mvcc_info))
2872  {
2873  new_object_size += OR_MVCCID_SIZE;
2874  new_has_insid = true;
2875  }
2876  if (BTREE_MVCC_INFO_HAS_DELID (mvcc_info))
2877  {
2878  new_object_size += OR_MVCCID_SIZE;
2879  new_has_delid = true;
2880  }
2881  /* Check old MVCCID's */
2883  {
2884  old_object_size += OR_MVCCID_SIZE;
2885  }
2887  {
2888  old_object_size += OR_MVCCID_SIZE;
2889  }
2890  }
2891 
2892  /* Log undo changes. */
2893  if (rv_undo_data_ptr != NULL && *rv_undo_data_ptr != NULL)
2894  {
2895  *rv_undo_data_ptr =
2896  log_rv_pack_undo_record_changes (*rv_undo_data_ptr, 0, old_object_size, new_object_size, recp->data);
2897  }
2898 
2899  /* Key and any other OID's may need to be moved */
2900  RECORD_MOVE_DATA (recp, new_object_size, old_object_size);
2901  if (key_offset)
2902  {
2903  *key_offset = (new_object_size - old_object_size);
2904  }
2905 
2906  /* Add new data */
2907  or_init (&buffer, recp->data, new_object_size);
2908  /* Object OID first */
2909  if (or_put_oid (&buffer, oidp) != NO_ERROR)
2910  {
2911  assert_release (false);
2912  return;
2913  }
2914  /* Set record flags */
2915  btree_leaf_set_flag (recp, new_rec_flag);
2916 
2917  if (new_has_class_oid)
2918  {
2919  /* Add class OID */
2920  assert (!OID_ISNULL (class_oidp));
2921  if (or_put_oid (&buffer, class_oidp) != NO_ERROR)
2922  {
2923  assert_release (false);
2924  return;
2925  }
2926  }
2927 
2928  /* Add MVCC info */
2929  if (new_has_insid)
2930  {
2932  || (MVCCID_IS_VALID (mvcc_info->insert_mvccid)
2934  /* Add insert MVCCID */
2935  if (or_put_mvccid (&buffer, mvcc_info->insert_mvccid) != NO_ERROR)
2936  {
2937  assert_release (false);
2938  return;
2939  }
2940  mvcc_flags |= BTREE_OID_HAS_MVCC_INSID;
2941  }
2942  if (new_has_delid)
2943  {
2944  assert (mvcc_info->delete_mvccid == MVCCID_NULL
2946  /* Add delete MVCCID */
2947  if (or_put_mvccid (&buffer, mvcc_info->delete_mvccid) != NO_ERROR)
2948  {
2949  assert_release (false);
2950  }
2951  mvcc_flags |= BTREE_OID_HAS_MVCC_DELID;
2952  }
2953  if (mvcc_flags != 0)
2954  {
2955  /* Set MVCC flags */
2956  btree_record_object_set_mvcc_flags (recp->data, mvcc_flags);
2957  }
2958 
2959  /* Make sure everything was packed correctly */
2960  assert_release (buffer.ptr == buffer.endptr);
2961 
2962  /* If MVCC is enabled and if b-tree is unique and if the record has overflow OID's, the first oid must have fixed
2963  * size and should also contain class OID (marked as BTREE_LEAF_RECORD_CLASS_OID). */
2964  assert (!BTREE_IS_UNIQUE (btid->unique_pk) || !btree_leaf_is_flaged (recp, BTREE_LEAF_RECORD_OVERFLOW_OIDS)
2966 
2967 #if !defined (NDEBUG)
2968  (void) btree_check_valid_record (thread_p, btid, recp, BTREE_LEAF_NODE, NULL);
2969 #endif
2970 
2971  /* Redo log changes of first object. */
2972  if (rv_redo_data_ptr != NULL && *rv_redo_data_ptr != NULL)
2973  {
2974  *rv_redo_data_ptr =
2975  log_rv_pack_redo_record_changes (*rv_redo_data_ptr, 0, old_object_size, new_object_size, recp->data);
2976  }
2977 }
2978 
2979 /*
2980  * btree_leaf_record_handle_first_overflow () - Set fixed size for first object and update record.
2981  *
2982  * return : Void.
2983  * thread_p (in) : Thread entry
2984  * recp (in) : Leaf record.
2985  * btid_int (in) : B-tree info.
2986  * rv_undo_data_ptr (out) : If not null, outputs undo recovery data for the changes made to record.
2987  * rv_redo_data_ptr (out) : If not null, outputs redo recovery data for the changes made to record.
2988  */
2989 static void
2991  char **rv_undo_data_ptr, char **rv_redo_data_ptr)
2992 {
2993  int old_mvcc_flags;
2994  int old_object_size = OR_OID_SIZE;
2995  int new_object_size = BTREE_OBJECT_FIXED_SIZE (btid_int);
2996  MVCCID delid, insid;
2997  char *ptr = NULL;
2998 
2999  bool undo_logging = rv_undo_data_ptr != NULL && *rv_undo_data_ptr != NULL;
3000  bool redo_logging = rv_redo_data_ptr != NULL && *rv_redo_data_ptr != NULL;
3001 
3002  /* Assert expected arguments. */
3003  assert (recp != NULL);
3004  assert (btid_int != NULL);
3005 
3006  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
3007  {
3009  {
3010  old_object_size += OR_OID_SIZE;
3011  }
3012  }
3013 
3014  old_mvcc_flags = btree_record_object_get_mvcc_flags (recp->data);
3015  if (old_mvcc_flags & BTREE_OID_HAS_MVCC_INSID)
3016  {
3017  OR_GET_MVCCID (recp->data + old_object_size, &insid);
3018  old_object_size += OR_MVCCID_SIZE;
3019  }
3020  else
3021  {
3022  insid = MVCCID_ALL_VISIBLE;
3023  }
3024 
3025  if (old_mvcc_flags & BTREE_OID_HAS_MVCC_DELID)
3026  {
3027  OR_GET_MVCCID (recp->data + old_object_size, &delid);
3028  old_object_size += OR_MVCCID_SIZE;
3029  }
3030  else
3031  {
3032  delid = MVCCID_NULL;
3033  }
3034 
3035  if (old_object_size == new_object_size)
3036  {
3037  /* All information is already here */
3038 
3039  /* Log undo setting flag. */
3040  if (undo_logging)
3041  {
3042  *rv_undo_data_ptr =
3044  recp->data + OR_OID_SLOTID);
3045  }
3046 
3047  /* Set overflow OID's flag. */
3049 
3050 #if !defined (NDEBUG)
3051  (void) btree_check_valid_record (thread_p, btid_int, recp, BTREE_LEAF_NODE, NULL);
3052 #endif
3053 
3054  /* Log redo setting flag. */
3055  if (redo_logging)
3056  {
3057  /* Leaf record flags are kept in SLOTID field of first OID. */
3058  *rv_redo_data_ptr =
3060  recp->data + OR_OID_SLOTID);
3061  }
3062  return;
3063  }
3064 
3065  /* Log undo changes */
3066  if (undo_logging)
3067  {
3068  *rv_undo_data_ptr =
3069  log_rv_pack_undo_record_changes (*rv_undo_data_ptr, 0, old_object_size, new_object_size, recp->data);
3070  }
3071 
3072  /* Set overflow OID's flag. */
3074 
3075  /* Must free space to add extra info */
3076  RECORD_MOVE_DATA (recp, new_object_size, old_object_size);
3077 
3078  /* Set pointer after object OID. */
3079  ptr = recp->data + OR_OID_SIZE;
3080 
3081  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
3082  {
3083  /* Class OID must exist. */
3085  {
3086  /* Add top class OID */
3087  OR_PUT_OID (recp->data + OR_OID_SIZE, &btid_int->topclass_oid);
3089  }
3090  else
3091  {
3092  /* Already here. */
3093  }
3094  ptr += OR_OID_SIZE;
3095  }
3096 
3097  /* Add both insert MVCCID and delete MVCCID */
3098  OR_PUT_MVCCID (ptr, &insid);
3099  ptr += OR_MVCCID_SIZE;
3100  OR_PUT_MVCCID (ptr, &delid);
3101  ptr += OR_MVCCID_SIZE;
3102 
3103  /* Set MVCC flags where OID is packed. */
3105 
3106 #if !defined (NDEBUG)
3107  (void) btree_check_valid_record (thread_p, btid_int, recp, BTREE_LEAF_NODE, NULL);
3108 #endif
3109 
3110  /* Redo logging. */
3111  if (redo_logging)
3112  {
3113  /* Object OID could not change. Log only changes after its OID. */
3114  *rv_redo_data_ptr =
3115  log_rv_pack_redo_record_changes (*rv_redo_data_ptr, 0, old_object_size, new_object_size, recp->data);
3116  }
3117 }
3118 
3119 /*
3120  * btree_leaf_get_nth_oid_ptr () - Advance to the nth object in b-tree key record data and return pointer.
3121  *
3122  * return : Pointer to nth object in record data.
3123  * btid (in) : B-tree data.
3124  * recp (in) : Record data.
3125  * node_type (in) : Node type (leaf or overflow).
3126  * oid_list_offset (in) : Offset to list of objects (for leaf it must skip the packed key).
3127  * n (in) : Required object index.
3128  *
3129  * TODO: This function is no longer used due to changes in b-tree range scan.
3130  * However it may prove useful in the future, so better don't remove it.
3131  */
3132 static char *
3133 btree_leaf_get_nth_oid_ptr (BTID_INT * btid, RECDES * recp, BTREE_NODE_TYPE node_type, int oid_list_offset, int n)
3134 {
3135  OR_BUF buf;
3136  int fixed_size;
3137  int oids_size;
3138  int vpid_size;
3139  int mvcc_info_size;
3140  short mvcc_flags;
3141  bool is_fixed_size;
3142 
3143  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_OVERFLOW_NODE);
3144 
3145  if (n == 0)
3146  {
3147  /* First object is always first in record data */
3148  return recp->data;
3149  }
3150 
3153 
3154  if (BTREE_IS_UNIQUE (btid->unique_pk))
3155  {
3156  oids_size = 2 * OR_OID_SIZE;
3157  }
3158  else
3159  {
3160  oids_size = OR_OID_SIZE;
3161  }
3162 
3163  is_fixed_size = (node_type == BTREE_OVERFLOW_NODE || BTREE_IS_UNIQUE (btid->unique_pk));
3164  if (is_fixed_size)
3165  {
3166  /* Each object has fixed size */
3167  fixed_size = BTREE_OBJECT_FIXED_SIZE (btid);
3168 
3169  if (node_type == BTREE_OVERFLOW_NODE)
3170  {
3171  assert (oid_list_offset == 0);
3172  assert (n * fixed_size + vpid_size < recp->length);
3173  return recp->data + n * fixed_size;
3174  }
3175  else /* node_type == BTREE_LEAF_NODE */
3176  {
3177  assert ((oid_list_offset + (n - 1) * fixed_size + vpid_size) < recp->length);
3178  return recp->data + oid_list_offset + (n - 1) * fixed_size;
3179  }
3180  }
3181  /* Not fixed size. */
3182  assert (node_type == BTREE_LEAF_NODE);
3183  /* Count objects after key (without first). */
3184  n = n - 1;
3185  BTREE_RECORD_OR_BUF_INIT (buf, recp);
3186 
3187  while (n > 0)
3188  {
3189  /* Skip object */
3190  mvcc_flags = btree_record_object_get_mvcc_flags (buf.ptr);
3191  mvcc_info_size = BTREE_GET_MVCC_INFO_SIZE_FROM_FLAGS (mvcc_flags);
3192 
3193  if (or_advance (&buf, oids_size + mvcc_info_size) != NO_ERROR)
3194  {
3195  assert_release (false);
3196  return NULL;
3197  }
3198 
3199  n--;
3200  }
3201 
3202  /* buf.ptr points to nth object */
3203  assert (buf.ptr < buf.endptr);
3204 
3205  return buf.ptr;
3206 }
3207 
3208 /*
3209  * btree_record_get_last_object () - Get last object in record.
3210  *
3211  * return : Error code.
3212  * thread_p (in) : Thread entry.
3213  * btid_int (in) : B-tree info.
3214  * recp (in) : B-tree leaf/overflow record.
3215  * node_type (in) : Leaf/overflow node type.
3216  * offset_after_key (in) : For leaf record, offset to where packed key is ended.
3217  * oidp (out) : Output last object OID.
3218  * class_oid (out) : Output last object class OID.
3219  * mvcc_info (out) : Output last object MVCC info.
3220  * offset_to_last_object (out) : Output offset in record to last object.
3221  */
3222 static int
3224  int offset_after_key, OID * oidp, OID * class_oid, BTREE_MVCC_INFO * mvcc_info,
3225  int *offset_to_last_object)
3226 {
3227  char *offset = NULL; /* Pointer in record data. */
3228  OR_BUF buf; /* Buffer used to parse record. */
3229  int error_code = NO_ERROR; /* Error code. */
3230 
3231  /* Assert expected arguments. */
3232  assert (btid_int != NULL);
3233  assert (recp != NULL);
3234  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_OVERFLOW_NODE);
3235  assert (oidp != NULL);
3236  assert (class_oid != NULL);
3237  assert (mvcc_info != NULL);
3238  assert (offset_to_last_object != NULL);
3239 
3240  /* Create a buffer from b-tree record. */
3241  BTREE_RECORD_OR_BUF_INIT (buf, recp);
3242 
3243  /* Is last object fixed size? True if: 1. Overflow record. 2. Unique leaf record has more than object. */
3244  if (node_type == BTREE_OVERFLOW_NODE
3245  || (BTREE_IS_UNIQUE (btid_int->unique_pk) && offset_after_key != CAST_BUFLEN (buf.endptr - buf.buffer)))
3246  {
3247  /* Overflow nodes have fixed size objects. Also leaf records of unique indexes have fixed size object after
3248  * packed key. Just compute offset to last object and get object from there. */
3249  /* Set offset to end of record (without leaf link to first overflow). */
3250  offset = buf.endptr;
3251  /* Go back on fixed object size. */
3252  offset -= BTREE_OBJECT_FIXED_SIZE (btid_int);
3253 
3254  /* Save offset to last object offset. */
3255  *offset_to_last_object = CAST_BUFLEN (offset - recp->data);
3256 
3257  /* Unpack last object. */
3258  offset = btree_unpack_object (offset, btid_int, node_type, recp, offset_after_key, oidp, class_oid, mvcc_info);
3259  assert (offset == buf.endptr);
3260  return NO_ERROR;
3261  }
3262  /* Leaf node. */
3263  assert (node_type == BTREE_LEAF_NODE);
3264 
3265  if (CAST_BUFLEN (buf.endptr - buf.buffer) == offset_after_key)
3266  {
3267  /* Only one object! */
3268  /* Get offset to object. */
3269  *offset_to_last_object = 0;
3270 
3271  (void) btree_unpack_object (recp->data, btid_int, node_type, recp, offset_after_key, oidp, class_oid, mvcc_info);
3272  return NO_ERROR;
3273  }
3274  /* Leaf node and more than one object. */
3275  /* Unique should have been already handled. */
3276  assert (!BTREE_IS_UNIQUE (btid_int->unique_pk));
3277 
3278  /* Objects have variable size, depending on the stored MVCC info. We have to process existing objects until reaching
3279  * the end of record. */
3280 
3281  /* Start looking for last object after key. */
3282  or_seek (&buf, offset_after_key);
3283 
3284  /* Advance until record is consumed. */
3285  while (buf.ptr < buf.endptr)
3286  {
3287  offset = buf.ptr;
3288  error_code = btree_or_get_object (&buf, btid_int, node_type, offset_after_key, oidp, class_oid, mvcc_info);
3289  if (error_code != NO_ERROR)
3290  {
3291  ASSERT_ERROR ();
3292  assert_release (false);
3293  return error_code;
3294  }
3295  }
3296  /* Safe guard: record was consumed. */
3297  assert (buf.ptr == buf.endptr);
3298 
3299  /* Output offset to last object. */
3300  *offset_to_last_object = CAST_BUFLEN (offset - recp->data);
3301  return NO_ERROR;
3302 }
3303 
3304 /*
3305  * btree_record_remove_last_object () - Remove last object from b-tree record.
3306  *
3307  * return : Void.
3308  * thread_p (in) : Thread entry.
3309  * btid (in) : B-tree info.
3310  * recp (in) : B-tree record.
3311  * node_type (in) : Leaf or overflow node type.
3312  * offset_to_last_object (in) : Offset to last object (must be known).
3313  * rv_undo_data_ptr (out) : If not null, output the packed undo logging for this change.
3314  * rv_redo_data_ptr (out) : If not null, output the packed redo logging for this change.
3315  */
3316 static void
3318  int offset_to_last_object, char **rv_undo_data_ptr, char **rv_redo_data_ptr)
3319 {
3320  char *first_ovf_vpid_src;
3321  char *first_ovf_vpid_dest;
3322  int object_size;
3323  bool has_overflow_oids = false;
3324 
3325  /* Assert expected arguments. */
3326  assert (btid != NULL);
3327  assert (recp != NULL);
3328  assert (offset_to_last_object > 0);
3329  assert (recp->length > offset_to_last_object);
3330 
3331  has_overflow_oids = node_type == BTREE_LEAF_NODE && btree_leaf_is_flaged (recp, BTREE_LEAF_RECORD_OVERFLOW_OIDS);
3332  object_size = recp->length - offset_to_last_object - (has_overflow_oids ? DISK_VPID_ALIGNED_SIZE : 0);
3333 
3334  /* Undo logging. */
3335  if (rv_undo_data_ptr != NULL && *rv_undo_data_ptr != NULL)
3336  {
3337  *rv_undo_data_ptr =
3338  log_rv_pack_undo_record_changes (*rv_undo_data_ptr, offset_to_last_object, object_size, 0,
3339  recp->data + offset_to_last_object);
3340  }
3341 
3342  if (has_overflow_oids)
3343  {
3344  /* Remove last object and save first overflow VPID. */
3345  first_ovf_vpid_src = recp->data + recp->length - DISK_VPID_ALIGNED_SIZE;
3346  first_ovf_vpid_dest = recp->data + offset_to_last_object;
3347  assert ((first_ovf_vpid_src - first_ovf_vpid_dest) >= DISK_VPID_ALIGNED_SIZE);
3348  VPID_COPY ((VPID *) first_ovf_vpid_dest, (VPID *) first_ovf_vpid_src);
3349  recp->length = offset_to_last_object + DISK_VPID_ALIGNED_SIZE;
3350  }
3351  else
3352  {
3353  /* Just cut off the last object. */
3354  recp->length = offset_to_last_object;
3355  }
3356 
3357 #if !defined (NDEBUG)
3358  (void) btree_check_valid_record (thread_p, btid, recp, node_type, NULL);
3359 #endif /* !NDEBUG */
3360 
3361  /* Redo logging. */
3362  if (rv_redo_data_ptr != NULL && *rv_redo_data_ptr != NULL)
3363  {
3364  /* Pack redo recovery of change: remove old_size bytes from offset_to_last_object. */
3365  *rv_redo_data_ptr = log_rv_pack_redo_record_changes (*rv_redo_data_ptr, offset_to_last_object, object_size, 0,
3366  NULL /* just * remove */ );
3367  }
3368 }
3369 
3370 /*
3371  * btree_leaf_get_flag () -
3372  * return: flag of leaf record
3373  * recp(in):
3374  */
3375 static short
3377 {
3378  short slot_id;
3379 
3380  slot_id = OR_GET_SHORT (recp->data + OR_OID_SLOTID);
3381 
3382  return slot_id & BTREE_LEAF_RECORD_MASK;
3383 }
3384 
3385 /*
3386  * btree_record_object_get_mvcc_flags () - get MVCC flag for key oid
3387  * return: MVCC flag for key oid
3388  * data(in): pointer to OID into key buffer
3389  */
3390 STATIC_INLINE short
3392 {
3393  short vol_id;
3394 
3395  assert (data != NULL);
3396  vol_id = OR_GET_SHORT (data + OR_OID_VOLID);
3397 
3398  return vol_id & BTREE_OID_MVCC_FLAGS_MASK;
3399 }
3400 
3401 /*
3402  * btree_leaf_is_flaged () -
3403  * return:
3404  * recp(in):
3405  * record_flag(in):
3406  */
3407 static bool
3408 btree_leaf_is_flaged (RECDES * recp, short record_flag)
3409 {
3410  assert ((short) (record_flag & ~BTREE_LEAF_RECORD_MASK) == 0);
3411 
3412  return ((OR_GET_SHORT (recp->data + OR_OID_SLOTID) & record_flag) == record_flag);
3413 }
3414 
3415 /*
3416  * btree_record_object_is_flagged () - Check whether object found at rec_data has the MVCC flag.
3417  *
3418  * return : True if flag is set, false otherwise.
3419  * rec_data (in) : Pointer to an object in b-tree record.
3420  * mvcc_flag (in) : MVCC flag.
3421  */
3422 STATIC_INLINE bool
3423 btree_record_object_is_flagged (char *rec_data, short mvcc_flag)
3424 {
3425  assert ((short) (mvcc_flag & ~BTREE_OID_MVCC_FLAGS_MASK) == 0);
3426 
3427  return ((OR_GET_SHORT (rec_data + OR_OID_VOLID) & mvcc_flag) == mvcc_flag);
3428 }
3429 
3430 /*
3431  * btree_leaf_set_flag () -
3432  * return:
3433  * recp(in/out):
3434  * record_flag(in):
3435  */
3436 static void
3437 btree_leaf_set_flag (RECDES * recp, short record_flag)
3438 {
3439  short slot_id;
3440 
3441  assert ((short) (record_flag & ~BTREE_LEAF_RECORD_MASK) == 0);
3442 
3443  slot_id = OR_GET_SHORT (recp->data + OR_OID_SLOTID);
3444 
3445  OR_PUT_SHORT (recp->data + OR_OID_SLOTID, slot_id | record_flag);
3446 }
3447 
3448 /*
3449  * btree_record_object_set_mvcc_flags () - Set MVCC flags to an object in a b-tree record.
3450  *
3451  * return : Void.
3452  * rec_data (in) : Pointer to an object in a b-tree record.
3453  * mvcc_flags (in) : MVCC flags.
3454  */
3455 static void
3456 btree_record_object_set_mvcc_flags (char *rec_data, short mvcc_flags)
3457 {
3458  short vol_id;
3459 
3460  assert ((short) (mvcc_flags & ~BTREE_OID_MVCC_FLAGS_MASK) == 0);
3461 
3462  vol_id = OR_GET_SHORT (rec_data + OR_OID_VOLID);
3463 
3464  OR_PUT_SHORT (rec_data + OR_OID_VOLID, vol_id | mvcc_flags);
3465 }
3466 
3467 /*
3468  * btree_leaf_clear_flag () - clear leaf key oid flag
3469  * return: nothing
3470  * recp(in/out):
3471  * record_flag(in):
3472  */
3473 static void
3474 btree_leaf_clear_flag (RECDES * recp, short record_flag)
3475 {
3476  short slot_id;
3477 
3478  assert ((short) (record_flag & ~BTREE_LEAF_RECORD_MASK) == 0);
3479 
3480  slot_id = OR_GET_SHORT (recp->data + OR_OID_SLOTID);
3481 
3482  OR_PUT_SHORT (recp->data + OR_OID_SLOTID, slot_id & ~record_flag);
3483 }
3484 
3485 /*
3486  * btree_record_object_clear_mvcc_flags () - Clear MVCC flags from an object in a b-tree record.
3487  *
3488  * return : Void.
3489  * rec_data (in) : Pointer to an object in a b-tree record.
3490  * mvcc_flags (in) : MVCC flags to clear.
3491  */
3492 static void
3493 btree_record_object_clear_mvcc_flags (char *rec_data, short mvcc_flags)
3494 {
3495  short vol_id;
3496 
3497  assert ((short) (mvcc_flags & ~BTREE_OID_MVCC_FLAGS_MASK) == 0);
3498 
3499  vol_id = OR_GET_SHORT (rec_data + OR_OID_VOLID);
3500 
3501  OR_PUT_SHORT (rec_data + OR_OID_VOLID, vol_id & ~mvcc_flags);
3502 }
3503 
3504 /*
3505  * btree_write_fixed_portion_of_non_leaf_record () -
3506  * return:
3507  * rec(in):
3508  * non_leaf_rec(in):
3509  *
3510  * Note: Writes the fixed portion (preamble) of a non leaf record.
3511  * rec must be long enough to hold the header info.
3512  */
3513 static void
3515 {
3516  char *ptr = rec->data;
3517 
3518  assert (!VPID_ISNULL (&(non_leaf_rec->pnt)));
3519 
3520  OR_PUT_INT (ptr, non_leaf_rec->pnt.pageid);
3521  ptr += OR_INT_SIZE;
3522 
3523  OR_PUT_SHORT (ptr, non_leaf_rec->pnt.volid);
3524  ptr += OR_SHORT_SIZE;
3525 
3526  OR_PUT_SHORT (ptr, non_leaf_rec->key_len);
3527 }
3528 
3529 /*
3530  * btree_read_fixed_portion_of_non_leaf_record () -
3531  * return:
3532  * rec(in):
3533  * non_leaf_rec(in):
3534  *
3535  * Note: Reads the fixed portion (preamble) of a non leaf record.
3536  */
3537 static void
3539 {
3540  char *ptr = rec->data;
3541 
3542  non_leaf_rec->pnt.pageid = OR_GET_INT (ptr);
3543  ptr += OR_INT_SIZE;
3544 
3545  non_leaf_rec->pnt.volid = OR_GET_SHORT (ptr);
3546  ptr += OR_SHORT_SIZE;
3547 
3548  assert (!VPID_ISNULL (&(non_leaf_rec->pnt)));
3549 
3550  non_leaf_rec->key_len = OR_GET_SHORT (ptr);
3551 }
3552 
3553 /*
3554  * btree_write_fixed_portion_of_non_leaf_record_to_orbuf () -
3555  * return:
3556  * buf(in):
3557  * nlf_rec(in):
3558  *
3559  * Note: Writes the fixed portion (preamble) of a non leaf record using
3560  * the OR_BUF stuff.
3561  */
3562 static void
3564 {
3565  assert (!VPID_ISNULL (&(non_leaf_rec->pnt)));
3566 
3567  or_put_int (buf, non_leaf_rec->pnt.pageid);
3568  or_put_short (buf, non_leaf_rec->pnt.volid);
3569  or_put_short (buf, non_leaf_rec->key_len);
3570 }
3571 
3572 /*
3573  * btree_read_fixed_portion_of_non_leaf_record_from_orbuf () -
3574  * return: NO_ERROR
3575  * buf(in):
3576  * non_leaf_rec(in):
3577  *
3578  * Note: Reads the fixed portion (preamble) of a non leaf record using the OR_BUF stuff.
3579  */
3580 static int
3582 {
3583  int rc = NO_ERROR;
3584 
3585  non_leaf_rec->pnt.pageid = or_get_int (buf, &rc);
3586 
3587  if (rc == NO_ERROR)
3588  {
3589  non_leaf_rec->pnt.volid = or_get_short (buf, &rc);
3590  }
3591 
3592  assert (!VPID_ISNULL (&(non_leaf_rec->pnt)));
3593 
3594  if (rc == NO_ERROR)
3595  {
3596  non_leaf_rec->key_len = or_get_short (buf, &rc);
3597  }
3598 
3599  return rc;
3600 }
3601 
3602 /*
3603  * btree_append_oid () -
3604  * return:
3605  * rec(in):
3606  * oid(in):
3607  *
3608  * Note: Appends an OID onto the record. rec is assumed to have room
3609  * for the new OID and rec.length points to the end of the record
3610  * where the new OID will go and is word aligned.
3611  */
3612 static void
3614 {
3615  char *ptr;
3616 
3617  ptr = rec->data + rec->length;
3618  OR_PUT_OID (ptr, oid);
3619  rec->length += OR_OID_SIZE;
3620 }
3621 
3622 /*
3623  * btree_add_mvccid () - Add insert/delete MVCCID in b-tree record.
3624  *
3625  * return : Void.
3626  * rec (in) : B-tree record.
3627  * oid_offset (in) : Offset to object (where MVCC flag is set).
3628  * mvccid_offset (in) : Add MVCCID at this offset
3629  * mvccid (in) : MVCCID value
3630  * flag (in) : MVCCID flag for has insert or delete
3631  * rv_undo_data_ptr (out) : Outputs undo recovery data for changing the record.
3632  * rv_redo_data_ptr (out) : Outputs redo recovery data for changing the record.
3633  */
3634 STATIC_INLINE void
3635 btree_add_mvccid (RECDES * rec, int oid_offset, int mvccid_offset, MVCCID mvccid, short flag,
3636  char **rv_undo_data_ptr, char **rv_redo_data_ptr)
3637 {
3638  int dest_offset;
3639  char *mvccid_dest_ptr = NULL;
3640  char *oid_ptr = NULL;
3641 
3642  assert (rec != NULL && oid_offset >= 0 && mvccid_offset > 0 && oid_offset < mvccid_offset);
3643  assert (!btree_record_object_is_flagged (rec->data + oid_offset, flag));
3644  assert (rec->length + OR_MVCCID_SIZE < rec->area_size);
3645 
3646  dest_offset = mvccid_offset + OR_MVCCID_SIZE;
3647 
3648  if (rv_undo_data_ptr != NULL && *rv_undo_data_ptr != NULL)
3649  {
3650  /* Undo logging: changed flag (is kept in object volume ID). */
3651  *rv_undo_data_ptr =
3652  log_rv_pack_undo_record_changes (*rv_undo_data_ptr, oid_offset + OR_OID_VOLID, OR_SHORT_SIZE, OR_SHORT_SIZE,
3653  rec->data + oid_offset + OR_OID_VOLID);
3654  /* Undo logging: added MVCCID. */
3655  *rv_undo_data_ptr = log_rv_pack_undo_record_changes (*rv_undo_data_ptr, mvccid_offset, 0, OR_MVCCID_SIZE, NULL);
3656  }
3657 
3658  RECORD_MOVE_DATA (rec, dest_offset, mvccid_offset);
3659 
3660  /* Set MVCC flag. */
3661  oid_ptr = rec->data + oid_offset;
3662  btree_record_object_set_mvcc_flags (oid_ptr, flag);
3663 
3664  /* Set MVCCID. */
3665  mvccid_dest_ptr = rec->data + mvccid_offset;
3666  OR_PUT_MVCCID (mvccid_dest_ptr, &mvccid);
3667 
3668  if (rv_redo_data_ptr != NULL && *rv_redo_data_ptr != NULL)
3669  {
3670  /* Redo logging: changed flag (is kept in object volume ID). */
3671  *rv_redo_data_ptr =
3672  log_rv_pack_redo_record_changes (*rv_redo_data_ptr, oid_offset + OR_OID_VOLID, OR_SHORT_SIZE, OR_SHORT_SIZE,
3673  rec->data + oid_offset + OR_OID_VOLID);
3674  /* Redo logging: added MVCCID. */
3675  *rv_redo_data_ptr =
3676  log_rv_pack_redo_record_changes (*rv_redo_data_ptr, mvccid_offset, 0, OR_MVCCID_SIZE, mvccid_dest_ptr);
3677  }
3678 }
3679 
3680 /*
3681  * btree_set_mvccid () - Set MVCCID instead of existing one. This one works for insid and delid.
3682  *
3683  * return : Error code.
3684  * rec (in) : Record data.
3685  * mvccid_offset (in) : Offset of old MVCCID.
3686  * p_mvccid (in) : New MVCCID.
3687  * rv_undo_data_ptr (in) : Outputs undo recovery data for changing the record.
3688  * rv_redo_data_ptr (in) : Outputs redo recovery data for changing the record.
3689  */
3690 STATIC_INLINE void
3691 btree_set_mvccid (RECDES * rec, int mvccid_offset, MVCCID * p_mvccid, char **rv_undo_data_ptr, char **rv_redo_data_ptr)
3692 {
3693  char *mvccid_ptr = NULL;
3694 
3695  assert (rec != NULL && mvccid_offset > 0 && p_mvccid != NULL);
3696 
3697  mvccid_ptr = rec->data + mvccid_offset;
3698 
3699  if (rv_undo_data_ptr != NULL && *rv_undo_data_ptr != NULL)
3700  {
3701  /* Redo logging: replace MVCCID. */
3702  *rv_undo_data_ptr =
3703  log_rv_pack_undo_record_changes (*rv_undo_data_ptr, mvccid_offset, OR_MVCCID_SIZE, OR_MVCCID_SIZE, mvccid_ptr);
3704  }
3705 
3706  OR_PUT_MVCCID (mvccid_ptr, p_mvccid);
3707 
3708  if (rv_redo_data_ptr != NULL && *rv_redo_data_ptr != NULL)
3709  {
3710  /* Redo logging: replace MVCCID. */
3711  *rv_redo_data_ptr =
3712  log_rv_pack_redo_record_changes (*rv_redo_data_ptr, mvccid_offset, OR_MVCCID_SIZE, OR_MVCCID_SIZE, mvccid_ptr);
3713  }
3714 }
3715 
3716 //
3717 // btree_remove_mvccid () - remove insert or delete MVCCID from record and generate incremental logging
3718 //
3719 // record (in/out) : b-tree record
3720 // oid_offset (in) : offset to object OID
3721 // mvccid_offset (in) : offset to MVCCID being removed
3722 // flag (in) : has insert or has delete flag
3723 // rv_undo_data_ptr (in/out) : if not null, output undo logging
3724 // rv_redo_data_ptr (in/out) : if not null, output redo logging
3725 //
3726 static inline void
3727 btree_remove_mvccid (RECDES * record, int oid_offset, int mvccid_offset, short flag, char **rv_undo_data_ptr,
3728  char **rv_redo_data_ptr)
3729 {
3730  char *oid_ptr = record->data + oid_offset;
3731  char *mvccid_ptr = record->data + mvccid_offset;
3732 
3733  if (rv_undo_data_ptr != NULL && *rv_undo_data_ptr != NULL)
3734  {
3735  /* Undo logging: remove MVCCID. */
3736  *rv_undo_data_ptr =
3737  log_rv_pack_undo_record_changes (*rv_undo_data_ptr, mvccid_offset, OR_MVCCID_SIZE, 0, mvccid_ptr);
3738 
3739  /* Undo logging: clear flag. */
3740  *rv_undo_data_ptr =
3741  log_rv_pack_undo_record_changes (*rv_undo_data_ptr, oid_offset + OR_OID_VOLID, OR_SHORT_SIZE,
3742  OR_SHORT_SIZE, oid_ptr + OR_OID_VOLID);
3743  }
3744 
3745  /* Remove. */
3746  RECORD_MOVE_DATA (record, mvccid_offset, mvccid_offset + OR_MVCCID_SIZE);
3747  btree_record_object_clear_mvcc_flags (oid_ptr, flag);
3748 
3749  if (rv_redo_data_ptr != NULL && *rv_redo_data_ptr != NULL)
3750  {
3751  /* Redo logging: remove MVCCID. */
3752  *rv_redo_data_ptr = log_rv_pack_redo_record_changes (*rv_redo_data_ptr, mvccid_offset, OR_MVCCID_SIZE, 0, NULL);
3753 
3754  /* Redo logging: clear flag. */
3755  *rv_redo_data_ptr =
3756  log_rv_pack_redo_record_changes (*rv_redo_data_ptr, oid_offset + OR_OID_VOLID, OR_SHORT_SIZE,
3757  OR_SHORT_SIZE, oid_ptr + OR_OID_VOLID);
3758  }
3759 }
3760 
3761 /*
3762  * btree_record_append_object () - Append an object and all its info to record.
3763  *
3764  * return : Void.
3765  * thread_p (in) : Thread entry.
3766  * btid_int (in) : B-tree info.
3767  * record (in) : Leaf/overflow record.
3768  * node_type (in) : Note type.
3769  * object_info (in) : Object & info to append.
3770  * rv_undo_data_ptr (out) : If not NULL, outputs redo log recovery data for the change.
3771  * rv_redo_data_ptr (out) : If not NULL, outputs redo log recovery data for the change.
3772  */
3773 static void
3775  BTREE_OBJECT_INFO * object_info, char **rv_undo_data_ptr, char **rv_redo_data_ptr)
3776 {
3777  char *append_at = NULL;
3778  VPID ovf_vpid = VPID_INITIALIZER;
3779  int offset_to_object = 0;
3780  int new_data_size = 0;
3781 
3782  /* Assert expected arguments. */
3783  assert (btid_int != NULL);
3784  assert (record != NULL);
3785  assert (object_info != NULL);
3786 
3787  /* Set append pointer at the end of record. */
3788  append_at = record->data + record->length;
3789 
3790  /* Make sure to keep the link to overflow pages at the end. */
3791  if (node_type == BTREE_LEAF_NODE && record->length > 0
3793  {
3794  /* Set append pointer before the overflow link. */
3795  append_at -= DISK_VPID_ALIGNED_SIZE;
3796  /* Save overflow link. */
3797  OR_GET_VPID (append_at, &ovf_vpid);
3798  assert (!VPID_ISNULL (&ovf_vpid));
3799  }
3800  offset_to_object = CAST_BUFLEN (append_at - record->data);
3801 
3802  /* Pack object. */
3803  append_at = btree_pack_object (append_at, btid_int, node_type, record, object_info);
3804  new_data_size = CAST_BUFLEN (append_at - record->data) - offset_to_object;
3805 
3806  if (!VPID_ISNULL (&ovf_vpid))
3807  {
3808  /* Pack VPID again. */
3809  OR_PUT_VPID_ALIGNED (append_at, &ovf_vpid);
3810  append_at += DISK_VPID_ALIGNED_SIZE;
3811  }
3812 
3813  /* Update record length. */
3814  record->length = CAST_BUFLEN (append_at - record->data);
3815 
3816 #if !defined (NDEBUG)
3817  (void) btree_check_valid_record (thread_p, btid_int, record, node_type, NULL);
3818 #endif
3819 
3820  /* Redo logging. */
3821  if (rv_undo_data_ptr != NULL && *rv_undo_data_ptr != NULL)
3822  {
3823  assert (*rv_undo_data_ptr != NULL);
3824  *rv_undo_data_ptr = log_rv_pack_undo_record_changes (*rv_undo_data_ptr, offset_to_object, 0, new_data_size, NULL);
3825  }
3826 
3827  /* Redo logging. */
3828  if (rv_redo_data_ptr != NULL && *rv_redo_data_ptr != NULL)
3829  {
3830  assert (*rv_redo_data_ptr != NULL);
3831  *rv_redo_data_ptr = log_rv_pack_redo_record_changes (*rv_redo_data_ptr, offset_to_object, 0, /* just insert * data */
3832  new_data_size, record->data + offset_to_object);
3833  }
3834 }
3835 
3836 /*
3837  * btree_insert_object_ordered_by_oid () - Insert in record by keeping objects ordered by OID's.
3838  *
3839  * return : Error code.
3840  * thread_p (in) : Thread entry
3841  * record (in) : B-tree (overflow) record.
3842  * btid_int (in) : B-tree info.
3843  * object_info (in) : Object & info being inserted.
3844  * rv_undo_data_ptr (in) : If not null, outputs undo recovery data for the changes made to record.
3845  * rv_redo_data_ptr (in) : If not null, outputs redo recovery data for the changes made to record.
3846  * offset_to_objptr (out) : Output offset to inserted object.
3847  */
3848 static void
3850  BTREE_OBJECT_INFO * object_info, char **rv_undo_data_ptr, char **rv_redo_data_ptr,
3851  int *offset_to_objptr)
3852 {
3853  OID *oid = NULL;
3854  char *oid_ptr = NULL;
3855  int min, mid, max, num;
3856  OID mid_oid;
3857  int size = BTREE_OBJECT_FIXED_SIZE (btid_int);
3858  int offset_to_object = 0;
3859 
3860  /* Assert expected arguments. */
3861  assert (record != NULL);
3862  assert (btid_int != NULL);
3863  assert (object_info != NULL);
3864  assert (BTREE_MVCC_INFO_HAS_INSID (&object_info->mvcc_info) && BTREE_MVCC_INFO_HAS_DELID (&object_info->mvcc_info));
3865 
3866  assert (record->length % size == 0);
3867  num = CEIL_PTVDIV (record->length, size);
3868  assert (num >= 0);
3869 
3870  /* Binary search for the right position to keep the order */
3871  min = 0;
3872  max = num - 1;
3873  mid = 0;
3874  oid = &object_info->oid;
3875 
3876  while (min <= max)
3877  {
3878  mid = (min + max) / 2;
3879  oid_ptr = record->data + (size * mid);
3880 
3881  /* Get MID object OID. */
3882  BTREE_GET_OID (oid_ptr, &mid_oid);
3883 
3884  /* Is same OID? */
3885  if (OID_EQ (oid, &mid_oid))
3886  {
3887  /* With MVCC, this case is possible if some conditions are met: 1. OID is reusable. 2. Vacuum cleaned heap
3888  * entry but didn't clean b-tree entry. 3. A new record is inserted in the same slot. 4. The key for old
3889  * record and new record is the same. Just add the OID here. */
3890  break;
3891  }
3892  else if (OID_GT (oid, &mid_oid))
3893  {
3894  min = mid + 1;
3895  mid++;
3896  }
3897  else
3898  {
3899  max = mid - 1;
3900  }
3901  }
3902 
3903  offset_to_object = size * mid;
3904  oid_ptr = record->data + offset_to_object;
3905 
3906  if (rv_undo_data_ptr != NULL && *rv_undo_data_ptr != NULL)
3907  {
3908  *rv_undo_data_ptr = log_rv_pack_undo_record_changes (*rv_undo_data_ptr, offset_to_object, 0, size, oid_ptr);
3909  }
3910 
3911  /* oid_ptr points to the address where the new object should be saved */
3912  /* Make room for a new OID */
3913  RECORD_MOVE_DATA (record, offset_to_object + size, offset_to_object);
3914 
3915  (void) btree_pack_object (oid_ptr, btid_int, BTREE_OVERFLOW_NODE, record, object_info);
3916 
3917 #if !defined (NDEBUG)
3918  (void) btree_check_valid_record (thread_p, btid_int, record, BTREE_OVERFLOW_NODE, NULL);
3919 #endif
3920 
3921  /* Log redo changes. */
3922  if (rv_redo_data_ptr != NULL && *rv_redo_data_ptr != NULL)
3923  {
3924  *rv_redo_data_ptr = log_rv_pack_redo_record_changes (*rv_redo_data_ptr, offset_to_object, 0, size, oid_ptr);
3925  }
3926 
3927  if (offset_to_objptr != NULL)
3928  {
3929  *offset_to_objptr = offset_to_object;
3930  }
3931 }
3932 
3933 /*
3934  * btree_start_overflow_page () - Create a new overflow page when OID cannot
3935  * be inserted elsewhere. New page is always
3936  * inserted "after" leaf (and before other
3937  * existing overflow pages).
3938  *
3939  * return : Error code.
3940  * thread_p (in) : Thread entry.
3941  * btid_int (in) : B-tree info.
3942  * object_info (in) : New object info.
3943  * first_overflow_vpid (in) : VPID of current first overflow page.
3944  * near_vpid (in) : Hint to allocate new page.
3945  * new_vpid (out) : Output VPID of newly allocated page.
3946  * new_page_ptr (out) : New page.
3947  */
3948 static int
3950  VPID * first_overflow_vpid, VPID * near_vpid, VPID * new_vpid, PAGE_PTR * new_page_ptr)
3951 {
3952  RECDES rec;
3953  char rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
3954  BTREE_OVERFLOW_HEADER ovf_header_info;
3955  LOG_DATA_ADDR addr;
3956  int error_code = NO_ERROR;
3957 
3958  /* Redo recovery. */
3959  char rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + MAX_ALIGNMENT];
3960  char *rv_redo_data = PTR_ALIGN (rv_redo_data_buffer, MAX_ALIGNMENT);
3961  char *rv_redo_data_ptr = NULL;
3962  int rv_redo_data_length = 0;
3963 
3964  /* Assert expected arguments. */
3965  assert (btid_int != NULL);
3966  assert (object_info != NULL);
3967  assert (first_overflow_vpid != NULL);
3968  assert (new_vpid != NULL);
3969  assert (new_page_ptr != NULL);
3970  assert (BTREE_MVCC_INFO_HAS_INSID (&object_info->mvcc_info) && BTREE_MVCC_INFO_HAS_DELID (&object_info->mvcc_info));
3971 
3972  /* Get a new overflow page */
3973  *new_page_ptr = btree_get_new_page (thread_p, btid_int, new_vpid, near_vpid);
3974  if (*new_page_ptr == NULL)
3975  {
3976  ASSERT_ERROR_AND_SET (error_code);
3977  return error_code;
3978  }
3979 
3980  VPID_COPY (&ovf_header_info.next_vpid, first_overflow_vpid);
3981 
3982  error_code = btree_init_overflow_header (thread_p, *new_page_ptr, &ovf_header_info);
3983  if (error_code != NO_ERROR)
3984  {
3985  ASSERT_ERROR ();
3986  return error_code;
3987  }
3988 
3989  /* Insert the value in the new overflow page */
3990  /* Prepare record */
3991  rec.type = REC_HOME;
3992  rec.area_size = DB_PAGESIZE;
3993  rec.data = PTR_ALIGN (rec_buf, BTREE_MAX_ALIGN);
3994  rec.length = 0;
3995  btree_record_append_object (thread_p, btid_int, &rec, BTREE_OVERFLOW_NODE, object_info, NULL, NULL);
3996 
3997 #if !defined (NDEBUG)
3998  (void) btree_check_valid_record (thread_p, btid_int, &rec, BTREE_OVERFLOW_NODE, NULL);
3999 #endif /* !NDEBUG */
4000 
4001  /* Insert in page. */
4002  if (spage_insert_at (thread_p, *new_page_ptr, 1, &rec) != SP_SUCCESS)
4003  {
4004  assert_release (false);
4005  return ER_FAILED;
4006  }
4007 
4008  /* Initialized log data address */
4009  addr.offset = 0;
4010  addr.pgptr = *new_page_ptr;
4011  addr.vfid = &btid_int->sys_btid->vfid;
4012 
4013  /* Redo log. */
4014  rv_redo_data_ptr = rv_redo_data;
4017 #if !defined (NDEBUG)
4018  BTREE_RV_REDO_SET_DEBUG_INFO (&addr, rv_redo_data_ptr, btid_int, BTREE_RV_DEBUG_ID_START_OVF);
4019 #endif /* !NDEBUG */
4020  /* Save first overflow link. */
4021  OR_PUT_VPID_ALIGNED (rv_redo_data_ptr, first_overflow_vpid);
4022  rv_redo_data_ptr += DISK_VPID_ALIGNED_SIZE;
4023  /* Save overflow record data. */
4024  memcpy (rv_redo_data_ptr, rec.data, rec.length);
4025  rv_redo_data_ptr += rec.length;
4026 
4027  BTREE_RV_GET_DATA_LENGTH (rv_redo_data_ptr, rv_redo_data, rv_redo_data_length);
4028  log_append_redo_data (thread_p, RVBT_RECORD_MODIFY_NO_UNDO, &addr, rv_redo_data_length, rv_redo_data);
4029 
4030  pgbuf_set_dirty (thread_p, *new_page_ptr, DONT_FREE);
4031 
4032  return NO_ERROR;
4033 }
4034 
4035 /*
4036  * btree_get_disk_size_of_key () -
4037  * return:
4038  * key(in):
4039  */
4040 int
4042 {
4043  if (key == NULL || DB_IS_NULL (key))
4044  {
4045  assert (key != NULL && !DB_IS_NULL (key));
4046  return 0;
4047  }
4048 
4049  return pr_index_writeval_disk_size (key);
4050 }
4051 
4052 /*
4053  * btree_write_record () -
4054  * return: NO_ERROR
4055  * btid(in):
4056  * node_rec(in):
4057  * key(in):
4058  * node_type(in):
4059  * key_type(in):
4060  * key_len(in):
4061  * during_loading(in):
4062  * class_oid(in):
4063  * oid(in):
4064  * p_mvcc_rec_header(in): MVCC record header
4065  * rec(out):
4066  *
4067  * Note: This routine forms a btree record for both leaf and non leaf pages.
4068  *
4069  * node_rec is a NON_LEAF_REC * if we are writing a non leaf page,
4070  * otherwise it is a LEAF_REC *. ovfl_key indicates whether the key will
4071  * be written to the page or stored by the overflow manager. If we are
4072  * writing a non leaf record, oid should be NULL and will be ignored in
4073  * any case.
4074  */
4075 int
4076 btree_write_record (THREAD_ENTRY * thread_p, BTID_INT * btid, void *node_rec, DB_VALUE * key, BTREE_NODE_TYPE node_type,
4077  int key_type, int key_len, bool during_loading, OID * class_oid, OID * oid,
4078  BTREE_MVCC_INFO * mvcc_info, RECDES * rec)
4079 {
4080  VPID key_vpid;
4081  OR_BUF buf;
4082  int error_code = NO_ERROR;
4083 
4084  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_NON_LEAF_NODE);
4085  assert (key_type == BTREE_NORMAL_KEY || key_type == BTREE_OVERFLOW_KEY);
4086  assert (rec != NULL);
4087 
4088  or_init (&buf, rec->data, rec->area_size);
4089 
4090  if (node_type == BTREE_LEAF_NODE)
4091  {
4092  /* first instance oid */
4093  error_code = or_put_oid (&buf, oid);
4094  if (error_code != NO_ERROR)
4095  {
4096  assert_release (false);
4097  return error_code;
4098  }
4099  if (BTREE_IS_UNIQUE (btid->unique_pk) && !OID_EQ (&btid->topclass_oid, class_oid))
4100  {
4101  /* write the subclass OID */
4102  error_code = or_put_oid (&buf, class_oid);
4103  if (error_code != NO_ERROR)
4104  {
4105  assert_release (false);
4106  return error_code;
4107  }
4109  }
4110 
4111  if (mvcc_info != NULL)
4112  {
4113  if (BTREE_MVCC_INFO_HAS_INSID (mvcc_info))
4114  {
4115  error_code = or_put_mvccid (&buf, mvcc_info->insert_mvccid);
4116  if (error_code != NO_ERROR)
4117  {
4118  assert_release (false);
4119  return error_code;
4120  }
4121  }
4122  if (BTREE_MVCC_INFO_HAS_DELID (mvcc_info))
4123  {
4124  error_code = or_put_mvccid (&buf, mvcc_info->delete_mvccid);
4125  if (error_code != NO_ERROR)
4126  {
4127  assert_release (false);
4128  return error_code;
4129  }
4130  }
4131  btree_record_object_set_mvcc_flags (rec->data, mvcc_info->flags);
4132  }
4133  }
4134  else
4135  {
4136  NON_LEAF_REC *non_leaf_rec = (NON_LEAF_REC *) node_rec;
4137 
4139  }
4140 
4141  /* write the key */
4142  if (key_type == BTREE_NORMAL_KEY)
4143  { /* key fits in page */
4144  PR_TYPE *pr_type;
4145 
4146  if (node_type == BTREE_LEAF_NODE)
4147  {
4148  pr_type = btid->key_type->type;
4149  }
4150  else
4151  {
4152  pr_type = btid->nonleaf_key_type->type;
4153  }
4154 
4155  error_code = pr_type->index_writeval (&buf, key);
4156  if (error_code != NO_ERROR)
4157  {
4158  assert_release (false);
4159  return error_code;
4160  }
4161  }
4162  else
4163  {
4164  /* overflow key */
4165  if (node_type == BTREE_LEAF_NODE)
4166  {
4168  }
4169 
4170  error_code = btree_store_overflow_key (thread_p, btid, key, key_len, node_type, &key_vpid);
4171  if (error_code != NO_ERROR)
4172  {
4173  return error_code;
4174  }
4175 
4176  /* write the overflow VPID as the key */
4177  error_code = or_put_int (&buf, key_vpid.pageid);
4178  if (error_code != NO_ERROR)
4179  {
4180  assert_release (false);
4181  return error_code;
4182  }
4183  error_code = or_put_short (&buf, key_vpid.volid);
4184  if (error_code != NO_ERROR)
4185  {
4186  assert_release (false);
4187  return error_code;
4188  }
4189  }
4190 
4191  error_code = or_put_align32 (&buf);
4192  if (error_code != NO_ERROR)
4193  {
4194  assert_release (false);
4195  return error_code;
4196  }
4197 
4198  rec->length = CAST_BUFLEN (buf.ptr - buf.buffer);
4199  rec->type = REC_HOME;
4200 
4201  /* Success. */
4202  return NO_ERROR;
4203 }
4204 
4205 /*
4206  * btree_read_record () -
4207  * return:
4208  * btid(in):
4209  * pgptr(in):
4210  * rec(in):
4211  * key(in):
4212  * rec_header(in):
4213  * node_type(in):
4214  * clear_key(in):
4215  * offset(in):
4216  * copy_key(in):
4217  *
4218  * Note: This routine reads a btree record for both leaf and non leaf pages.
4219  *
4220  * copy_key indicates whether strings should be malloced and copied
4221  * or just returned via pointer. offset will point to the oid(s) following
4222  * the key for leaf pages. clear_key will indicate whether the key needs
4223  * to be cleared via pr_clear_value by the caller. If this record is
4224  * a leaf record, rec_header will be filled in with the LEAF_REC,
4225  * otherwise, rec_header will be filled in with the NON_LEAF_REC for this
4226  * record.
4227  *
4228  * If you don't want to actually read the key (possibly incurring a
4229  * malloc for the string), you can send a NULL pointer for the key.
4230  * index_readval() will do the right thing and simply skip the key in this case.
4231  */
4232 int
4234  void *rec_header, BTREE_NODE_TYPE node_type, bool * clear_key, int *offset, int copy_key,
4235  BTREE_SCAN * bts)
4236 {
4237  int n_prefix = COMMON_PREFIX_UNKNOWN;
4238  int error;
4239 
4240  assert (pgptr != NULL);
4241  assert (rec != NULL);
4242  assert (rec->type == REC_HOME);
4243  assert (bts == NULL || bts->common_prefix == -1
4244  || bts->common_prefix == btree_node_common_prefix (thread_p, btid, pgptr));
4245 
4246  if (bts != NULL)
4247  {
4248  n_prefix = bts->common_prefix;
4249  }
4250 
4251  error =
4252  btree_read_record_without_decompression (thread_p, btid, rec, key, rec_header, node_type, clear_key, offset,
4253  copy_key);
4254  if (error != NO_ERROR)
4255  {
4256  return error;
4257  }
4258 
4259  if (key != NULL && node_type == BTREE_LEAF_NODE && !btree_leaf_is_flaged (rec, BTREE_LEAF_RECORD_OVERFLOW_KEY)
4261  {
4262  if (n_prefix == COMMON_PREFIX_UNKNOWN)
4263  {
4264  /* recalculate n_prefix */
4265  n_prefix = btree_node_common_prefix (thread_p, btid, pgptr);
4266  }
4267 
4268  assert (n_prefix >= 0);
4269 
4270  if (n_prefix > 0)
4271  {
4272  RECDES peek_rec;
4273  DB_VALUE lf_key, result;
4274  bool lf_clear_key;
4275  LEAF_REC leaf_pnt;
4276  int dummy_offset;
4277 
4278  btree_init_temp_key_value (&lf_clear_key, &lf_key);
4279  (void) spage_get_record (thread_p, pgptr, 1, &peek_rec, PEEK);
4280  error = btree_read_record_without_decompression (thread_p, btid, &peek_rec, &lf_key, &leaf_pnt,
4281  BTREE_LEAF_NODE, &lf_clear_key, &dummy_offset,
4282  PEEK_KEY_VALUE);
4283  if (error != NO_ERROR)
4284  {
4285  btree_clear_key_value (clear_key, key);
4286  return error;
4287  }
4288 
4290  assert (DB_VALUE_TYPE (&lf_key) == DB_TYPE_MIDXKEY);
4291 
4292  pr_midxkey_add_prefix (&result, &lf_key, key, n_prefix);
4293 
4294  btree_clear_key_value (clear_key, key);
4295  btree_clear_key_value (&lf_clear_key, &lf_key);
4296 
4297  *key = result;
4298  *clear_key = true;
4299  }
4300  else if (n_prefix < 0)
4301  {
4302  return n_prefix;
4303  }
4304  }
4305 
4306  return NO_ERROR;
4307 }
4308 
4309 /*
4310  * btree_read_record_without_decompression () -
4311  * return:
4312  * btid(in):
4313  * rec(in):
4314  * key(in):
4315  * rec_header(in):
4316  * node_type(in):
4317  * clear_key(in):
4318  * offset(in):
4319  * copy_key(in):
4320  *
4321  */
4322 static int
4324  void *rec_header, BTREE_NODE_TYPE node_type, bool * clear_key, int *offset,
4325  int copy_key)
4326 {
4327  OR_BUF buf;
4328  VPID overflow_vpid;
4329  char *copy_key_buf;
4330  int copy_key_buf_len;
4331  int rc = NO_ERROR;
4332  int key_type = BTREE_NORMAL_KEY;
4333  LEAF_REC *leaf_rec = NULL;
4335 
4336  assert (rec != NULL);
4337  assert (rec->type == REC_HOME);
4338 
4339  if (key != NULL)
4340  {
4341  btree_clear_key_value (clear_key, key);
4342  }
4343 
4344  *clear_key = false;
4345 
4346 #if !defined(NDEBUG)
4347  if (!rec || !rec->data)
4348  {
4349  btree_log_if_enabled ("btree_read_record_without_decompression: null node header pointer. Operation Ignored.");
4350  return rc;
4351  }
4352 #endif
4353 
4354  assert (rec_header != NULL);
4355 
4356  or_init (&buf, rec->data, rec->length);
4357 
4358  /*
4359  * Find the beginning position of the key within the record and read
4360  * the key length.
4361  */
4362  if (node_type == BTREE_LEAF_NODE)
4363  {
4364  leaf_rec = (LEAF_REC *) rec_header;
4365  leaf_rec->key_len = -1;
4366 
4367  rc = or_advance (&buf, OR_OID_SIZE); /* skip instance oid */
4368  if (rc != NO_ERROR)
4369  {
4370  return rc;
4371  }
4372 
4373  if (BTREE_IS_UNIQUE (btid->unique_pk))
4374  {
4376  {
4377  rc = or_advance (&buf, OR_OID_SIZE); /* skip class oid */
4378  if (rc != NO_ERROR)
4379  {
4380  return rc;
4381  }
4382  }
4383  }
4384 
4386  {
4387  rc = or_advance (&buf, OR_MVCCID_SIZE); /* skip insert mvccid */
4388  if (rc != NO_ERROR)
4389  {
4390  return rc;
4391  }
4392  }
4393 
4395  {
4396  rc = or_advance (&buf, OR_MVCCID_SIZE); /* skip delete mvccid */
4397  if (rc != NO_ERROR)
4398  {
4399  return rc;
4400  }
4401  }
4402 
4404  {
4405  key_type = BTREE_OVERFLOW_KEY;
4406  }
4407 
4409  {
4410  btree_leaf_get_vpid_for_overflow_oids (rec, &leaf_rec->ovfl);
4411  }
4412  else
4413  {
4414  VPID_SET_NULL (&leaf_rec->ovfl);
4415  }
4416 
4417  assert (leaf_rec->key_len == -1);
4418  }
4419  else
4420  {
4421  non_leaf_rec = (NON_LEAF_REC *) rec_header;
4422  non_leaf_rec->key_len = -1;
4423 
4425  if (rc != NO_ERROR)
4426  {
4427  return rc;
4428  }
4429 
4430  if (non_leaf_rec->key_len < 0)
4431  {
4432  key_type = BTREE_OVERFLOW_KEY;
4433  }
4434  }
4435 
4436  if (key_type == BTREE_NORMAL_KEY)
4437  { /* key is within page */
4438  TP_DOMAIN *key_domain;
4439  PR_TYPE *pr_type;
4440  char *old_ptr;
4441 
4442  if (node_type == BTREE_LEAF_NODE)
4443  {
4444  key_domain = btid->key_type;
4445  }
4446  else
4447  {
4448  key_domain = btid->nonleaf_key_type;
4449  }
4450  pr_type = key_domain->type;
4451 
4452  copy_key_buf = NULL;
4453  copy_key_buf_len = 0;
4454 
4455  /*
4456  * When we read the key, must copy in two cases:
4457  * 1) we are told to via the copy_key flag, or 2) it is a set.
4458  */
4459  if (key != NULL && copy_key == COPY_KEY_VALUE)
4460  {
4461  *clear_key = true;
4462  }
4463  else
4464  {
4465  *clear_key = false;
4466  }
4467 
4468  if (*clear_key)
4469  { /* need to copy the key */
4470  if (btid->copy_buf != NULL)
4471  { /* check for copy_buf */
4472  if (pr_type->id == DB_TYPE_MIDXKEY || QSTR_IS_ANY_CHAR_OR_BIT (pr_type->id))
4473  { /* check for the key type */
4474  /* read key_val image into the copy_buf */
4475  copy_key_buf = btid->copy_buf;
4476  copy_key_buf_len = btid->copy_buf_len;
4477  }
4478  }
4479  }
4480 
4481  old_ptr = buf.ptr;
4482  rc = pr_type->index_readval (&buf, key, key_domain, -1, *clear_key, copy_key_buf, copy_key_buf_len);
4483  if (rc != NO_ERROR)
4484  {
4485  return rc;
4486  }
4487  assert (CAST_BUFLEN (buf.ptr - old_ptr) > 0);
4488  if (key != NULL)
4489  {
4490  assert (!DB_IS_NULL (key));
4491  }
4492 
4493  if (node_type == BTREE_LEAF_NODE)
4494  {
4495  leaf_rec->key_len = CAST_BUFLEN (buf.ptr - old_ptr);
4496  }
4497  else
4498  {
4499  non_leaf_rec->key_len = CAST_BUFLEN (buf.ptr - old_ptr);
4500  }
4501  }
4502  else
4503  {
4504  /* follow the chain of overflow key page pointers and fetch key */
4505  overflow_vpid.pageid = or_get_int (&buf, &rc);
4506  if (rc == NO_ERROR)
4507  {
4508  overflow_vpid.volid = or_get_short (&buf, &rc);
4509  if (rc != NO_ERROR)
4510  {
4511  assert (false);
4512 
4513  if (key != NULL)
4514  {
4515  db_make_null (key);
4516  }
4517 
4518  return rc;
4519  }
4520  }
4521  else
4522  {
4523  return rc;
4524  }
4525 
4526  if (key != NULL)
4527  {
4528  rc = btree_load_overflow_key (thread_p, btid, &overflow_vpid, key, node_type);
4529  if (rc != NO_ERROR)
4530  {
4531  db_make_null (key);
4532  return rc;
4533  }
4534 
4535  assert (!DB_IS_NULL (key));
4536 
4537  /* we always clear overflow keys */
4538  *clear_key = true;
4539  }
4540  else
4541  {
4542  /* we aren't copying the key so they don't have to clear it */
4543  *clear_key = false;
4544  }
4545  }
4546 
4547  if (key != NULL && key->need_clear)
4548  {
4549  *clear_key = true;
4550  }
4551 
4552  buf.ptr = PTR_ALIGN (buf.ptr, OR_INT_SIZE);
4553 
4554  *offset = CAST_BUFLEN (buf.ptr - buf.buffer);
4555 
4556  return rc;
4557 }
4558 
4559 /*
4560  * btree_dump_root_header () -
4561  * return:
4562  * rec(in):
4563  */
4564 static void
4565 btree_dump_root_header (THREAD_ENTRY * thread_p, FILE * fp, PAGE_PTR page_ptr)
4566 {
4567  OR_BUF buf;
4568  BTREE_ROOT_HEADER *root_header = NULL;
4569  TP_DOMAIN *key_type;
4570 
4571  root_header = btree_get_root_header (thread_p, page_ptr);
4572  if (root_header == NULL)
4573  {
4574  fprintf (fp, "btree_dump_root_header: get root header failure\n");
4575 
4576  return;
4577  }
4578 
4579  or_init (&buf, root_header->packed_key_domain, -1);
4580  key_type = or_get_domain (&buf, NULL, NULL);
4581 
4582  fprintf (fp, "============== R O O T P A G E ================\n\n");
4583  fprintf (fp, " Key_Type: %s\n", pr_type_name (TP_DOMAIN_TYPE (key_type)));
4584  fprintf (fp, " Num OIDs: %d, Num NULLs: %d, Num keys: %d\n", root_header->num_oids, root_header->num_nulls,
4585  root_header->num_keys);
4586  fprintf (fp, " Topclass_oid: (%d %d %d)\n", root_header->topclass_oid.volid, root_header->topclass_oid.pageid,
4587  root_header->topclass_oid.slotid);
4588  fprintf (fp, " Unique: ");
4589  if (BTREE_IS_UNIQUE (root_header->unique_pk))
4590  {
4591  fprintf (fp, "1");
4592  if (BTREE_IS_PRIMARY_KEY (root_header->unique_pk))
4593  {
4594  fprintf (fp, " (Primary Key)");
4595  }
4596  }
4597  else
4598  {
4599  assert (!BTREE_IS_PRIMARY_KEY (root_header->unique_pk));
4600  fprintf (fp, "0");
4601  }
4602  fprintf (fp, "\n");
4603  fprintf (fp, " OVFID: %d|%d\n", root_header->ovfid.fileid, root_header->ovfid.volid);
4604  fprintf (fp, " Btree Revision Level: %d\n", root_header->rev_level);
4605  fprintf (fp, " Reserved: %d\n", root_header->reverse_reserved); /* unused */
4606  fprintf (fp, "\n");
4607 }
4608 
4609 /*
4610  * btree_dump_key () -
4611  * return:
4612  * key(in):
4613  */
4614 void
4615 btree_dump_key (FILE * fp, const DB_VALUE * key)
4616 {
4617  fprintf (fp, " ");
4618  db_fprint_value (fp, key);
4619  fprintf (fp, " ");
4620 }
4621 
4622 /*
4623  * btree_dump_leaf_record () -
4624  * return: nothing
4625  * btid(in): B+tree index identifier
4626  * rec(in): Pointer to a record in a leaf page of the tree
4627  * n(in): Indentation left margin (number of preceding blanks)
4628  *
4629  * Note: Dumps the content of a leaf record, namely key and the set of values for the key.
4630  */
4631 static void
4632 btree_dump_leaf_record (THREAD_ENTRY * thread_p, FILE * fp, BTID_INT * btid, RECDES * rec, int depth)
4633 {
4634  OR_BUF buf;
4635  LEAF_REC leaf_record = { {NULL_PAGEID, NULL_VOLID}, 0 };
4636  int i, k, oid_cnt;
4637  OID class_oid;
4638  OID oid;
4639  int key_len, offset;
4640  VPID overflow_vpid;
4641  DB_VALUE key;
4642  bool clear_key;
4643  int oid_size;
4644  MVCCID mvccid;
4645  int error;
4647 
4648  btree_init_temp_key_value (&clear_key, &key);
4649 
4650  if (BTREE_IS_UNIQUE (btid->unique_pk))
4651  {
4652  oid_size = (2 * OR_OID_SIZE);
4653  }
4654  else
4655  {
4656  oid_size = OR_OID_SIZE;
4657  }
4658 
4659  /* output the leaf record structure content */
4660  btree_print_space (fp, depth * 4 + 1);
4661 
4662  error =
4663  btree_read_record_without_decompression (thread_p, btid, rec, &key, &leaf_record, BTREE_LEAF_NODE, &clear_key,
4664  &offset, PEEK_KEY_VALUE);
4665  if (error != NO_ERROR)
4666  {
4667  return;
4668  }
4669  key_len = btree_get_disk_size_of_key (&key);
4670 
4671  fprintf (fp, "Key_Len: %d Ovfl_Page: {%d , %d} ", key_len, leaf_record.ovfl.volid, leaf_record.ovfl.pageid);
4672 
4673  fprintf (fp, "Key: ");
4674  btree_dump_key (fp, &key);
4675 
4676  btree_clear_key_value (&clear_key, &key);
4677 
4678  overflow_vpid = leaf_record.ovfl;
4679 
4680  /* output the values */
4681  fprintf (fp, " Values: ");
4682 
4683  oid_cnt = btree_record_get_num_oids (thread_p, btid, rec, offset, BTREE_LEAF_NODE);
4684  fprintf (fp, "Oid_Cnt: %d ", oid_cnt);
4685 
4686  /* output first oid */
4687  (void) btree_leaf_get_first_object (btid, rec, &oid, &class_oid, &mvcc_info);
4688  if (BTREE_IS_UNIQUE (btid->unique_pk))
4689  {
4690  fprintf (fp, " (%d %d %d : %d, %d, %d) ", class_oid.volid, class_oid.pageid, class_oid.slotid, oid.volid,
4691  oid.pageid, oid.slotid);
4692  }
4693  else
4694  {
4695  fprintf (fp, " (%d, %d, %d) ", oid.volid, oid.pageid, oid.slotid);
4696  }
4697 
4698  /* output MVCCIDs of first OID */
4699  if (mvcc_info.flags != 0)
4700  {
4701  fprintf (fp, " (");
4702  if (mvcc_info.flags & BTREE_OID_HAS_MVCC_INSID)
4703  {
4704  /* Get and print insert MVCCID */
4705  OR_GET_MVCCID (rec->data + oid_size, &mvccid);
4706  fprintf (fp, "insid=%llu", (long long) mvccid);
4707 
4708  if (mvcc_info.flags & BTREE_OID_HAS_MVCC_DELID)
4709  {
4710  /* Get and print delete MVCCID */
4711  OR_GET_MVCCID (rec->data + oid_size + OR_MVCCID_SIZE, &mvccid);
4712  fprintf (fp, ", delid=%llu", (long long) mvccid);
4713  }
4714  }
4715  else
4716  {
4717  /* Safe guard */
4718  assert (mvcc_info.flags & BTREE_OID_HAS_MVCC_DELID);
4719 
4720  /* Get and print delete MVCCID */
4721  OR_GET_MVCCID (rec->data + oid_size, &mvccid);
4722  fprintf (fp, "delid=%llu", (long long) mvccid);
4723  }
4724 
4725  fprintf (fp, ") ");
4726  }
4727 
4728  /* output remainder OIDs */
4729  or_init (&buf, rec->data + offset, rec->length - offset);
4730  if (BTREE_IS_UNIQUE (btid->unique_pk))
4731  {
4732  for (k = 1; k < oid_cnt; k++)
4733  {
4734  /* values stored within the record */
4735  if (k % 2 == 0)
4736  {
4737  fprintf (fp, "\n");
4738  }
4739 
4740  /* Get OID */
4741  or_get_oid (&buf, &oid);
4743 
4744  /* Get class OID */
4745  or_get_oid (&buf, &class_oid);
4746 
4747  /* Print OID and class OID */
4748  fprintf (fp, " (%d %d %d : %d, %d, %d) ", class_oid.volid, class_oid.pageid, class_oid.slotid, oid.volid,
4749  oid.pageid, oid.slotid);
4750 
4751  /* Since objects are fixed size, they contain both insert and delete MVCCID's. */
4752  fprintf (fp, " (");
4753 
4754  /* Get and print insert MVCCID */
4755  (void) or_get_mvccid (&buf, &mvccid);
4756  fprintf (fp, "insid=%llu", (long long) mvccid);
4757 
4758  /* Get and print delete MVCCID */
4759  (void) or_get_mvccid (&buf, &mvccid);
4760  fprintf (fp, ", delid=%llu", (long long) mvccid);
4761 
4762  fprintf (fp, ") ");
4763  }
4764  }
4765  else
4766  {
4767  for (k = 1; k < oid_cnt; k++)
4768  {
4769  /* values stored within the record */
4770  if (k % 4 == 0)
4771  {
4772  fprintf (fp, "\n");
4773  }
4774 
4775  /* Get MVCC flags */
4776  mvcc_info.flags = btree_record_object_get_mvcc_flags (buf.ptr);
4777 
4778  or_get_oid (&buf, &oid);
4780 
4781  fprintf (fp, " (%d, %d, %d) ", oid.volid, oid.pageid, oid.slotid);
4782 
4783  if (mvcc_info.flags != 0)
4784  {
4785  fprintf (fp, " (");
4786  if (mvcc_info.flags & BTREE_OID_HAS_MVCC_INSID)
4787  {
4788  /* Get and print insert MVCCID */
4789  (void) or_get_mvccid (&buf, &mvccid);
4790  fprintf (fp, "insid=%llu", (long long) mvccid);
4791 
4792  if (mvcc_info.flags & BTREE_OID_HAS_MVCC_DELID)
4793  {
4794  /* Get and print delete MVCCID */
4795  (void) or_get_mvccid (&buf, &mvccid);
4796  fprintf (fp, ", delid=%llu", (long long) mvccid);
4797  }
4798  }
4799  else
4800  {
4801  /* Safe guard */
4802  assert (mvcc_info.flags & BTREE_OID_HAS_MVCC_DELID);
4803 
4804  /* Get and print delete MVCCID */
4805  (void) or_get_mvccid (&buf, &mvccid);
4806  fprintf (fp, "delid=%llu", (long long) mvccid);
4807  }
4808  fprintf (fp, ") ");
4809  }
4810  }
4811  }
4812 
4813  if (!VPID_ISNULL (&overflow_vpid))
4814  {
4815  /* record has an overflow page continuation */
4816  RECDES overflow_rec;
4817  PAGE_PTR overflow_page_ptr = NULL;
4818  char overflow_rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
4819 
4820  overflow_rec.area_size = DB_PAGESIZE;
4821  overflow_rec.data = PTR_ALIGN (overflow_rec_buf, BTREE_MAX_ALIGN);
4822 
4823  fprintf (fp, "\n\n======= O V E R F L O W P A G E S =========\n");
4824  fflush (fp);
4825 
4826  /* get all the overflow pages and output their value content */
4827  while (!VPID_ISNULL (&overflow_vpid))
4828  {
4829  fprintf (fp, "\n ------ Overflow Page {%d , %d} \n", overflow_vpid.volid, overflow_vpid.pageid);
4830  overflow_page_ptr =
4831  pgbuf_fix (thread_p, &overflow_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
4832  if (overflow_page_ptr == NULL)
4833  {
4834  ASSERT_ERROR ();
4835  return;
4836  }
4837 
4838  btree_get_next_overflow_vpid (thread_p, overflow_page_ptr, &overflow_vpid);
4839 
4840  (void) spage_get_record (thread_p, overflow_page_ptr, 1, &overflow_rec, COPY);
4841 
4842  oid_cnt = btree_record_get_num_oids (thread_p, btid, &overflow_rec, 0, BTREE_OVERFLOW_NODE);
4843  or_init (&buf, overflow_rec.data, overflow_rec.length);
4844  fprintf (fp, "Oid_Cnt: %d ", oid_cnt);
4845 
4846  for (i = 0; i < oid_cnt; i++)
4847  {
4848  if (i % 4 == 0)
4849  {
4850  fprintf (stdout, "\n");
4851  }
4852 
4853  or_get_oid (&buf, &oid);
4855 
4856  fprintf (fp, " (%d, %d, %d) ", oid.volid, oid.pageid, oid.slotid);
4857 
4858  if (BTREE_IS_UNIQUE (btid->unique_pk))
4859  {
4860  or_get_oid (&buf, &class_oid);
4861  fprintf (fp, " (%d, %d, %d) ", class_oid.volid, class_oid.pageid, class_oid.slotid);
4862  }
4863 
4864  fprintf (fp, " (");
4865  (void) or_get_mvccid (&buf, &mvccid);
4866  fprintf (fp, "insid=%llu", (long long) mvccid);
4867  (void) or_get_mvccid (&buf, &mvccid);
4868  fprintf (fp, ", delid=%llu", (long long) mvccid);
4869  fprintf (fp, ") ");
4870  }
4871 
4872  pgbuf_unfix_and_init (thread_p, overflow_page_ptr);
4873  }
4874  }
4875 
4876  fprintf (fp, "\n");
4877  fflush (fp);
4878 }
4879 
4880 /*
4881  * btree_dump_non_leaf_record () -
4882  * return: void
4883  * btid(in):
4884  * rec(in): Pointer to a record in a non_leaf page
4885  * n(in): Indentation left margin (number of preceding blanks)
4886  * print_key(in):
4887  *
4888  * Note: Dumps the content of a nonleaf record, namely key and child page identifier.
4889  */
4890 static void
4891 btree_dump_non_leaf_record (THREAD_ENTRY * thread_p, FILE * fp, BTID_INT * btid, RECDES * rec, int depth, int print_key)
4892 {
4893  NON_LEAF_REC non_leaf_record;
4894  int key_len, offset;
4895  DB_VALUE key;
4896  bool clear_key;
4897  int error;
4898 
4899  VPID_SET_NULL (&(non_leaf_record.pnt));
4900 
4901  btree_init_temp_key_value (&clear_key, &key);
4902 
4903  /* output the non_leaf record structure content */
4904  error =
4905  btree_read_record_without_decompression (thread_p, btid, rec, &key, &non_leaf_record, BTREE_NON_LEAF_NODE,
4906  &clear_key, &offset, PEEK_KEY_VALUE);
4907  if (error != NO_ERROR)
4908  {
4909  return;
4910  }
4911 
4912  btree_print_space (fp, depth * 4);
4913  fprintf (fp, "Child_Page: {%d , %d} ", non_leaf_record.pnt.volid, non_leaf_record.pnt.pageid);
4914 
4915  if (print_key)
4916  {
4917  key_len = btree_get_disk_size_of_key (&key);
4918  fprintf (fp, "Key_Len: %d Key: ", key_len);
4919  btree_dump_key (fp, &key);
4920  }
4921  else
4922  {
4923  fprintf (fp, "No Key");
4924  }
4925 
4926  btree_clear_key_value (&clear_key, &key);
4927 
4928  fprintf (fp, "\n");
4929  fflush (fp);
4930 }
4931 
4932 /*
4933  * btree_get_new_page () - GET a NEW PAGE for the B+tree index
4934  * return: The pointer to a newly allocated page for the given B+tree or NULL.
4935  * The parameter vpid is set to the page identifier.
4936  * btid(in): B+tree index identifier
4937  * vpid(out): Set to the page identifier for the newly allocated page
4938  * near_vpid(in): A page identifier that may be used in a nearby page allocation. (It may be ignored.)
4939  *
4940  * Note: Allocates a new page for the B+tree
4941  */
4942 static PAGE_PTR
4943 btree_get_new_page (THREAD_ENTRY * thread_p, BTID_INT * btid, VPID * vpid, VPID * near_vpid)
4944 {
4945  PAGE_PTR page_ptr = NULL;
4946 
4947  if (file_alloc (thread_p, &btid->sys_btid->vfid, btree_initialize_new_page, NULL, vpid, &page_ptr) != NO_ERROR)
4948  {
4949  ASSERT_ERROR ();
4950  return NULL;
4951  }
4952  if (page_ptr == NULL)
4953  {
4954  assert_release (false);
4955  return NULL;
4956  }
4957  pgbuf_check_page_ptype (thread_p, page_ptr, PAGE_BTREE);
4958 
4959  return page_ptr;
4960 }
4961 
4962 /*
4963  * btree_initialize_new_page () - initialize a new b-tree page
4964  *
4965  * return : NO_ERROR
4966  * thread_p (in) : thread entry
4967  * page (in) : new b-tree page
4968  * args (in) : true or nil for undoredo logging; false for redo
4969  */
4970 int
4971 btree_initialize_new_page (THREAD_ENTRY * thread_p, PAGE_PTR page, void *args)
4972 {
4973  pgbuf_set_page_ptype (thread_p, page, PAGE_BTREE);
4974 
4976  log_append_undoredo_data2 (thread_p, RVBT_GET_NEWPAGE, NULL, page, -1, 0, 0, NULL, NULL);
4977  pgbuf_set_dirty (thread_p, page, DONT_FREE);
4978 
4979  return NO_ERROR;
4980 }
4981 
4982 /*
4983  * btree_search_nonleaf_page () -
4984  * return: NO_ERROR
4985  * btid(in):
4986  * page_ptr(in): Pointer to the non_leaf page to be searched
4987  * key(in): Key to find
4988  * slot_id(out): Set to the record number that contains the key
4989  * child_vpid(out): Set to the child page identifier to be followed, or NULL_PAGEID
4990  *
4991  * Note: Binary search the page to locate the record that contains the child page pointer to be followed to locate
4992  * the key, and return the page identifier for this child page.
4993  */
4994 static int
4995 btree_search_nonleaf_page (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr, DB_VALUE * key, INT16 * slot_id,
4996  VPID * child_vpid, page_key_boundary * page_bounds)
4997 {
4998  int key_cnt, offset;
4999  int c;
5000  bool clear_key;
5001  /* the start position of non-equal-value column */
5002  int start_col, left_start_col, right_start_col;
5003  INT16 left, right;
5004  INT16 middle = 0;
5005  DB_VALUE temp_key;
5006  RECDES rec;
5008 
5009  /* initialize child page identifier */
5010  VPID_SET_NULL (child_vpid);
5011 
5012  btree_init_temp_key_value (&clear_key, &temp_key);
5013 
5014 #if !defined(NDEBUG)
5015  if (!page_ptr || !key || DB_IS_NULL (key))
5016  {
5017  btree_log_if_enabled ("btree_search_nonleaf_page: null page/key pointer. Operation Ignored.");
5018  return ER_FAILED;
5019  }
5020 #endif
5021 
5022  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
5023  assert (key_cnt > 0);
5024 
5025  if (key_cnt <= 0)
5026  { /* node record underflow */
5027  btree_log_if_enabled ("btree_search_nonleaf_page: node key count underflow: %d", key_cnt);
5028  return ER_FAILED;
5029  }
5030 
5031  if (key_cnt == 1)
5032  {
5033  /*
5034  * node has dummy neg-inf keys, but a child page pointer
5035  * So, follow this pointer
5036  */
5037  if (spage_get_record (thread_p, page_ptr, 1, &rec, PEEK) != S_SUCCESS)
5038  {
5039  return ER_FAILED;
5040  }
5041 
5042  btree_read_fixed_portion_of_non_leaf_record (&rec, &non_leaf_rec);
5043 
5044  *slot_id = 1;
5045  *child_vpid = non_leaf_rec.pnt;
5046 
5047  return NO_ERROR;
5048  }
5049 
5050  /* binary search the node to find the child page pointer to be followed */
5051  c = 0;
5052 
5053  /* for non-compressed midxkey; separator is not compressed */
5054  left_start_col = right_start_col = 0;
5055 
5056  left = 2; /* Ignore dummy key (neg-inf or 1st key) */
5057  right = key_cnt;
5058 
5059  while (left <= right)
5060  {
5061  btree_clear_key_value (&clear_key, &temp_key);
5062  middle = CEIL_PTVDIV ((left + right), 2); /* get the middle record */
5063 
5064  assert (middle > 0);
5065  if (spage_get_record (thread_p, page_ptr, middle, &rec, PEEK) != S_SUCCESS)
5066  {
5067  return ER_FAILED;
5068  }
5069 
5070  if (btree_read_record_without_decompression (thread_p, btid, &rec, &temp_key, &non_leaf_rec, BTREE_NON_LEAF_NODE,
5071  &clear_key, &offset, PEEK_KEY_VALUE) != NO_ERROR)
5072  {
5073  return ER_FAILED;
5074  }
5075 
5077  {
5078  start_col = MIN (left_start_col, right_start_col);
5079  }
5080 
5081  c = btree_compare_key (key, &temp_key, btid->key_type, 1, 1, &start_col);
5082 
5083  if (c == DB_UNK)
5084  {
5085  btree_clear_key_value (&clear_key, &temp_key);
5086  return ER_FAILED;
5087  }
5088 
5089  if (c == 0)
5090  {
5091  /* child page to be followed has been found */
5092  *slot_id = middle;
5093  *child_vpid = non_leaf_rec.pnt;
5094 
5095  if (page_bounds != NULL)
5096  {
5097  if (page_bounds->update_boundary_eq (thread_p, btid, page_ptr, temp_key, clear_key, middle) != NO_ERROR)
5098  {
5099  return ER_FAILED;
5100  }
5101  }
5102 
5103  btree_clear_key_value (&clear_key, &temp_key);
5104  return NO_ERROR;
5105  }
5106  else if (c < 0)
5107  {
5108  right = middle - 1;
5109  right_start_col = start_col;
5110  }
5111  else
5112  {
5113  left = middle + 1;
5114  left_start_col = start_col;
5115  }
5116  }
5117 
5118  if (c < 0)
5119  {
5120  /* child page is the one pointed by the record left to the middle */
5121  assert (middle - 1 > 0);
5122  if (spage_get_record (thread_p, page_ptr, middle - 1, &rec, PEEK) != S_SUCCESS)
5123  {
5124  btree_clear_key_value (&clear_key, &temp_key);
5125  return ER_FAILED;
5126  }
5127 
5128  btree_read_fixed_portion_of_non_leaf_record (&rec, &non_leaf_rec);
5129 
5130  *slot_id = middle - 1;
5131  *child_vpid = non_leaf_rec.pnt;
5132 
5133  if (page_bounds != NULL)
5134  {
5135  if (page_bounds->update_boundary_lt (thread_p, btid, page_ptr, rec, temp_key, clear_key) != NO_ERROR)
5136  {
5137  return ER_FAILED;
5138  }
5139  }
5140 
5141  }
5142  else
5143  {
5144  /* child page is the one pointed by the middle record */
5145  *slot_id = middle;
5146  *child_vpid = non_leaf_rec.pnt;
5147 
5148  if (page_bounds != NULL)
5149  {
5150  if (page_bounds->update_boundary_gt_or_eq (thread_p, btid, page_ptr, temp_key, clear_key, middle, key_cnt) !=
5151  NO_ERROR)
5152  {
5153  return ER_FAILED;
5154  }
5155  }
5156  }
5157 
5158  btree_clear_key_value (&clear_key, &temp_key);
5159 
5160  return NO_ERROR;
5161 }
5162 
5163 /*
5164  * btree_leaf_is_key_between_min_max () - Output if key is between first and last key in page. Function is useful
5165  * to decide to resume with leaf page after it was unfixed.
5166  *
5167  * return : Error code.
5168  * thread_p (in) : Thread entry.
5169  * btid_int (in) : B-tree info.
5170  * leaf (in) : Leaf page.
5171  * key (in) : Searched key.
5172  * search_key (out) : Output result of search.
5173  */
5174 static int
5176  BTREE_SEARCH_KEY_HELPER * search_key)
5177 {
5178  DB_VALUE border_key;
5179  RECDES border_record;
5180  BTREE_NODE_HEADER *node_header = NULL;
5181  LEAF_REC dummy_leaf_rec;
5182  int dummy_offset;
5183  bool clear_key = false;
5184  int error_code = NO_ERROR;
5186  int key_count = 0;
5187 
5188  /* Assert expected arguments */
5189  assert (btid_int != NULL);
5190  assert (leaf != NULL);
5191  assert (key != NULL && !DB_IS_NULL (key));
5192 
5194  {
5195  /* We don't need to do the early check. Output search key result BTREE_KEY_BETWEEN to force a normal search. */
5196  search_key->result = BTREE_KEY_BETWEEN;
5197  return NO_ERROR;
5198  }
5199 
5200  search_key->result = BTREE_KEY_NOTFOUND;
5201  node_header = btree_get_node_header (thread_p, leaf);
5202  if (node_header == NULL)
5203  {
5204  assert (false);
5205  return ER_FAILED;
5206  }
5207  key_count = btree_node_number_of_keys (thread_p, leaf);
5208  if (key_count < 1)
5209  {
5210  /* Too few keys to decide. */
5211  return NO_ERROR;
5212  }
5213 
5214  /*
5215  * Compare with first key in page.
5216  */
5217  /* Read record and get key. */
5218  btree_init_temp_key_value (&clear_key, &border_key);
5219 
5220  if (spage_get_record (thread_p, leaf, 1, &border_record, PEEK) != S_SUCCESS)
5221  {
5222  assert_release (false);
5223  return ER_FAILED;
5224  }
5225  error_code =
5226  btree_read_record (thread_p, btid_int, leaf, &border_record, &border_key, &dummy_leaf_rec, BTREE_LEAF_NODE,
5227  &clear_key, &dummy_offset, PEEK_KEY_VALUE, NULL);
5228  if (error_code != NO_ERROR)
5229  {
5230  ASSERT_ERROR ();
5231  return error_code;
5232  }
5233 
5234  /* Compare with first key. */
5235  c = btree_compare_key (key, &border_key, btid_int->key_type, 1, 1, NULL);
5236  btree_clear_key_value (&clear_key, &border_key);
5237  if (c == DB_EQ)
5238  {
5239  if (btree_leaf_is_flaged (&border_record, BTREE_LEAF_RECORD_FENCE))
5240  {
5241  /* Let btree_search_leaf_page find the key, if it exists. */
5242  search_key->result = BTREE_KEY_BETWEEN;
5243  /* Unknown slot */
5244  search_key->slotid = -1;
5245  }
5246  else
5247  {
5248  /* Key found. */
5249  search_key->result = BTREE_KEY_FOUND;
5250  search_key->slotid = 1;
5251  }
5252  return NO_ERROR;
5253  }
5254  else if (c != DB_GT)
5255  {
5256  /* Not bigger than first key. */
5257  search_key->result = (c == DB_LT) ? BTREE_KEY_SMALLER : BTREE_KEY_NOTFOUND;
5258  /* Unknown slot */
5259  search_key->slotid = -1;
5260  return NO_ERROR;
5261  }
5262  else if (key_count == 1)
5263  {
5264  search_key->result = BTREE_KEY_BIGGER;
5265  search_key->slotid = key_count + 1;
5266  return NO_ERROR;
5267  }
5268 
5269  /*
5270  * Compare with last key in page.
5271  */
5272  /* Read record and get key. */
5273  if (spage_get_record (thread_p, leaf, key_count, &border_record, PEEK) != S_SUCCESS)
5274  {
5275  assert_release (false);
5276  return ER_FAILED;
5277  }
5278  error_code =
5279  btree_read_record (thread_p, btid_int, leaf, &border_record, &border_key, &dummy_leaf_rec, BTREE_LEAF_NODE,
5280  &clear_key, &dummy_offset, PEEK_KEY_VALUE, NULL);
5281  if (error_code != NO_ERROR)
5282  {
5283  ASSERT_ERROR ();
5284  return error_code;
5285  }
5286  /* Compare with last key. */
5287  c = btree_compare_key (key, &border_key, btid_int->key_type, 1, 1, NULL);
5288  btree_clear_key_value (&clear_key, &border_key);
5289  if (c == DB_EQ)
5290  {
5291  if (btree_leaf_is_flaged (&border_record, BTREE_LEAF_RECORD_FENCE))
5292  {
5293  search_key->result = BTREE_KEY_BIGGER;
5294  search_key->slotid = key_count + 1;
5295  }
5296  else
5297  {
5298  search_key->result = BTREE_KEY_FOUND;
5299  search_key->slotid = key_count;
5300  }
5301  return NO_ERROR;
5302  }
5303  else if (c != DB_LT)
5304  {
5305  /* Not smaller than last key. */
5306  search_key->result = (c == DB_GT) ? BTREE_KEY_BIGGER : BTREE_KEY_NOTFOUND;
5307  search_key->slotid = key_count + 1;
5308  return NO_ERROR;
5309  }
5310 
5311  /* Key is between first and last key in leaf page. */
5312  search_key->result = BTREE_KEY_BETWEEN;
5313  /* Unknown slot */
5314  search_key->slotid = -1;
5315  return NO_ERROR;
5316 }
5317 
5318 /*
5319  * btree_search_leaf_page () - Search key in page and return result.
5320  * return : Error code.
5321  * btid (in) : B-tree info.
5322  * page_ptr (in) : Leaf node page pointer.
5323  * key (in) : Search key.
5324  * search_key (out) : Output result:
5325  * - BTREE_KEY_FOUND and slotid of key.
5326  * - BTREE_KEY_NOTFOUND (unknown compare result).
5327  * - BTREE_KEY_SMALLER (smaller than any key in page, slotid = 0).
5328  * - BTREE_KEY_BIGGER (bigger than any key in page, slotid = key_cnt + 1).
5329  * - BTREE_KEY_BETWEEN (key is not found, but it would belong to this page if it existed.
5330  * slotid of next bigger key, where the searched key should be inserted).
5331  *
5332  * Note: Binary search the page to find the location of the key.
5333  *
5334  * NOTE: If this function is called after advancing through the index from
5335  * root and if page has upper fence key, under no circumstance can key
5336  * argument be equal to it. The advance algorithm should have pointed to next leaf node.
5337  * However this case is possible when accessing leaf node directly
5338  * (e.g. after unfixing-refixing leaf node). In this case, the key
5339  * is not considered equal to fence key, but rather bigger than all
5340  * keys in page. The caller should know to go to next page.
5341  */
5342 static int
5344  BTREE_SEARCH_KEY_HELPER * search_key)
5345 {
5346  int key_cnt = 0, offset = 0;
5347  int c = 0, n_prefix = 0;
5348  bool clear_key = false;
5349  /* the start position of non-equal-value column */
5350  int start_col = 0, left_start_col = 0, right_start_col = 0;
5351  INT16 left = 0, right = 0, middle = 0;
5352  DB_VALUE temp_key;
5353  RECDES rec;
5354  bool is_record_read = false;
5355  LEAF_REC leaf_pnt;
5356  int error = NO_ERROR;
5357 
5358  /* Assert expected arguments. */
5359  assert (btid != NULL);
5360  assert (key != NULL && !DB_IS_NULL (key));
5361  assert (page_ptr != NULL);
5362  assert (search_key != NULL);
5363 
5364  btree_init_temp_key_value (&clear_key, &temp_key);
5365 
5366  /* Initialize search results. */
5367  search_key->result = BTREE_KEY_NOTFOUND;
5368  search_key->slotid = NULL_SLOTID;
5370 
5371  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
5372  if (key_cnt < 0)
5373  {
5374  assert (false);
5375  er_log_debug (ARG_FILE_LINE, "btree_search_leaf_page: node key count underflow: %d.", key_cnt);
5376  return ER_FAILED;
5377  }
5378 
5379  /* for compressed midxkey */
5380  n_prefix = btree_node_common_prefix (thread_p, btid, page_ptr);
5381  if (n_prefix < 0)
5382  {
5383  /* Error case? */
5384  return n_prefix;
5385  }
5386  left_start_col = right_start_col = n_prefix;
5387 
5388 #if !defined (NDEBUG)
5389  if (key_cnt > 0 && DB_VALUE_DOMAIN_TYPE (key) == DB_TYPE_MIDXKEY && n_prefix > 0)
5390  {
5391  /* We need to make sure the key is between the values of fence keys. Otherwise, the optimized midxkey compare
5392  * that skips columns may be broken. */
5393  BTREE_SEARCH_KEY_HELPER debug_search_key;
5394  BTREE_NODE_HEADER *node_header = btree_get_node_header (thread_p, page_ptr);
5395  error = btree_leaf_is_key_between_min_max (thread_p, btid, page_ptr, key, &debug_search_key);
5396  if (error != NO_ERROR)
5397  {
5398  ASSERT_ERROR ();
5399  return error;
5400  }
5401 
5402  assert (debug_search_key.result == BTREE_KEY_FOUND || debug_search_key.result == BTREE_KEY_BETWEEN
5403  || (debug_search_key.result == BTREE_KEY_SMALLER && VPID_ISNULL (&node_header->prev_vpid))
5404  || (debug_search_key.result == BTREE_KEY_BIGGER && VPID_ISNULL (&node_header->next_vpid)));
5405  }
5406 #endif /* !NDEBUG */
5407 
5408  /*
5409  * binary search the node to find if the key exists and in which record it
5410  * exists, or if it doesn't exist , the in which record it should have been
5411  * located to preserve the order of keys
5412  */
5413 
5414  /* Initialize binary search range to first and last key in page. */
5415  left = 1;
5416  right = key_cnt;
5417 
5418  /* Loop while binary search range has at least one key. */
5419  while (left <= right)
5420  {
5421  /* Get current range middle record */
5422  middle = CEIL_PTVDIV ((left + right), 2);
5423  /* Safe guard. */
5424  assert (middle > 0);
5425 
5426  /* Get current middle key. */
5427  if (spage_get_record (thread_p, page_ptr, middle, &rec, PEEK) != S_SUCCESS)
5428  {
5429  /* Unexpected error. */
5430  er_log_debug (ARG_FILE_LINE, "btree_search_leaf_page: sp_getrec fails for middle record.");
5431  assert (false);
5432  return ER_FAILED;
5433  }
5434 
5435  error =
5436  btree_read_record_without_decompression (thread_p, btid, &rec, &temp_key, &leaf_pnt, BTREE_LEAF_NODE,
5437  &clear_key, &offset, PEEK_KEY_VALUE);
5438  if (error != NO_ERROR)
5439  {
5440  /* Error! */
5441  ASSERT_ERROR ();
5442  return error;
5443  }
5444 
5445  is_record_read = true;
5446 
5448  {
5449  start_col = MIN (left_start_col, right_start_col);
5450  }
5451 
5452  /* Compare searched key with current middle key. */
5453  c = btree_compare_key (key, &temp_key, btid->key_type, 1, 1, &start_col);
5454 
5455  /* Clear current middle key. */
5456  btree_clear_key_value (&clear_key, &temp_key);
5457  if (c == DB_UNK)
5458  {
5459  /* Unknown compare result? */
5460  search_key->result = BTREE_KEY_NOTFOUND;
5461  search_key->slotid = NULL_SLOTID;
5462 
5463  /* Is this an error case? */
5464  ASSERT_ERROR_AND_SET (error);
5465  return error;
5466  }
5467 
5468  if (c == DB_EQ)
5469  {
5470  /* Current middle key is equal to searched key. */
5472  {
5474  /* Fence key! */
5475  assert (middle == 1 || middle == key_cnt);
5476  if (middle == 1)
5477  {
5478  /* Set c = DB_GT and fall through to compare with next key. */
5479  c = DB_GT;
5480  }
5481  else if (middle == key_cnt)
5482  {
5483  /* Key is bigger than all in page?? I have to understand these fence keys better. */
5484  search_key->result = BTREE_KEY_BIGGER;
5485  search_key->slotid = key_cnt;
5486  return NO_ERROR;
5487  }
5488  }
5489  else
5490  {
5491  /* Key exists in page in current middle slot. */
5492  search_key->result = BTREE_KEY_FOUND;
5493  search_key->slotid = middle;
5494  return NO_ERROR;
5495  }
5496  }
5497  /* Key not found yet. Keep searching. */
5498  if (c < 0)
5499  {
5500  /* Continue search between left and current middle. */
5501  right = middle - 1;
5502  right_start_col = start_col;
5503  }
5504  else
5505  {
5506  /* Continue search between current middle and right. */
5507  left = middle + 1;
5508  left_start_col = start_col;
5509  }
5510  }
5511 
5512  if (c < 0)
5513  {
5514  if (is_record_read && btree_leaf_is_flaged (&rec, BTREE_LEAF_RECORD_FENCE))
5515  {
5517  }
5518 
5519  /* Key doesn't exist and is smaller than current middle key. */
5520  if (middle == 1)
5521  {
5522  /* Key is smaller than all records in page. */
5523  search_key->result = BTREE_KEY_SMALLER;
5524  }
5525  else
5526  {
5527  /* Key doesn't exist but should belong to this page. */
5528  search_key->result = BTREE_KEY_BETWEEN;
5529  }
5530  search_key->slotid = middle;
5531 
5532  return NO_ERROR;
5533  }
5534  else
5535  {
5536  if (is_record_read && btree_leaf_is_flaged (&rec, BTREE_LEAF_RECORD_FENCE))
5537  {
5539  }
5540 
5541  /* Key doesn't exist and is bigger than current middle key. */
5542  if (middle == key_cnt)
5543  {
5544  search_key->result = BTREE_KEY_BIGGER;
5545  }
5546  else
5547  {
5548  search_key->result = BTREE_KEY_BETWEEN;
5549  }
5550  search_key->slotid = middle + 1;
5551 
5552  return NO_ERROR;
5553  }
5554 
5555  /* Impossible to reach. */
5556  assert (false);
5557  return ER_FAILED;
5558 }
5559 
5560 /*
5561  * xbtree_add_index () - ADD (create) a new B+tree INDEX
5562  * return: BTID * (btid on success and NULL on failure)
5563  * btid(out): Set to the created B+tree index identifier (Note: btid->vfid.volid should be set by the caller)
5564  * key_type(in): Key type of the index to be created.
5565  * class_oid(in): OID of the class for which the index is created
5566  * attr_id(in): Identifier of the attribute of the class for which the index is created.
5567  * unique_pk(in):
5568  * num_oids(in):
5569  * num_nulls(in):
5570  * num_keys(in):
5571  *
5572  * Note: Creates the B+tree index. A file identifier (index identifier)
5573  * is defined on the given volume. This identifier is used by
5574  * insertion, deletion and search routines, for the created
5575  * index. The routine allocates the root page of the tree and
5576  * initializes the root header information.
5577  */
5578 BTID *
5579 xbtree_add_index (THREAD_ENTRY * thread_p, BTID * btid, TP_DOMAIN * key_type, OID * class_oid, int attr_id,
5580  int unique_pk, int num_oids, int num_nulls, int num_keys)
5581 {
5582  BTREE_ROOT_HEADER root_header_info, *root_header = NULL;
5583  VPID root_vpid;
5584  PAGE_PTR page_ptr = NULL;
5585 
5586  root_header = &root_header_info;
5587 
5588  if (class_oid == NULL || OID_ISNULL (class_oid))
5589  {
5590  return NULL;
5591  }
5592 
5593  log_sysop_start (thread_p);
5594 
5595  /* create a file descriptor, allocate and initialize the root page */
5596  if (btree_create_file (thread_p, class_oid, attr_id, btid) != NO_ERROR)
5597  {
5598  ASSERT_ERROR ();
5599  goto error;
5600  }
5601  btree_get_root_vpid_from_btid (thread_p, btid, &root_vpid);
5602 
5603  page_ptr = pgbuf_fix (thread_p, &root_vpid, OLD_PAGE, PGBUF_LATCH_WRITE, PGBUF_UNCONDITIONAL_LATCH);
5604  if (page_ptr == NULL)
5605  {
5606  ASSERT_ERROR ();
5607  goto error;
5608  }
5609  pgbuf_check_page_ptype (thread_p, page_ptr, PAGE_BTREE);
5610 
5611  /* form the root header information */
5612  root_header->node.split_info.pivot = 0.0f;
5613  root_header->node.split_info.index = 0;
5614  VPID_SET_NULL (&(root_header->node.prev_vpid));
5615  VPID_SET_NULL (&(root_header->node.next_vpid));
5616  root_header->node.node_level = 1;
5617  root_header->node.max_key_len = 0;
5618 
5619  if (unique_pk)
5620  {
5621  root_header->num_oids = num_oids;
5622  root_header->num_nulls = num_nulls;
5623  root_header->num_keys = num_keys;
5624  root_header->unique_pk = unique_pk;
5625 
5626  assert (BTREE_IS_UNIQUE (root_header->unique_pk));
5627  assert (BTREE_IS_PRIMARY_KEY (root_header->unique_pk) || !BTREE_IS_PRIMARY_KEY (root_header->unique_pk));
5628  }
5629  else
5630  {
5631  root_header->num_oids = -1;
5632  root_header->num_nulls = -1;
5633  root_header->num_keys = -1;
5634  root_header->unique_pk = 0;
5635  }
5636 
5637  COPY_OID (&(root_header->topclass_oid), class_oid);
5638 
5639  VFID_SET_NULL (&(root_header->ovfid));
5640  root_header->rev_level = BTREE_CURRENT_REV_LEVEL;
5641 
5642  root_header->reverse_reserved = 0; /* unused */
5643 
5644 #if defined (SERVER_MODE)
5645  root_header->creator_mvccid = logtb_get_current_mvccid (thread_p);
5646 #else /* !SERVER_MODE */ /* SA_MODE */
5647  root_header->creator_mvccid = MVCCID_NULL;
5648 #endif /* SA_MODE */
5649 
5650  if (btree_init_root_header (thread_p, &btid->vfid, page_ptr, root_header, key_type) != NO_ERROR)
5651  {
5652  goto error;
5653  }
5654 
5655  pgbuf_set_dirty (thread_p, page_ptr, FREE);
5656  page_ptr = NULL;
5657 
5658  log_sysop_attach_to_outer (thread_p);
5660  if (unique_pk)
5661  {
5662  /* drop statistics if aborted */
5664  }
5665 
5666  return btid;
5667 
5668 error:
5669  if (page_ptr)
5670  {
5671  pgbuf_unfix_and_init (thread_p, page_ptr);
5672  }
5673 
5674  VFID_SET_NULL (&btid->vfid);
5675  btid->root_pageid = NULL_PAGEID;
5676 
5677  log_sysop_abort (thread_p);
5678 
5679  return NULL;
5680 }
5681 
5682 /*
5683  * xbtree_delete_index () -
5684  * return: NO_ERROR
5685  * btid(in): B+tree index identifier
5686  *
5687  * Note: Removes the B+tree index. All pages associated with the index are removed. After the routine is called,
5688  * the index identifier is not valid any more.
5689  */
5690 int
5692 {
5693  PAGE_PTR P = NULL;
5694  VPID P_vpid;
5695  BTREE_ROOT_HEADER *root_header = NULL;
5696  VFID ovfid;
5697  int unique_pk;
5698  int ret = NO_ERROR;
5699 
5700  P_vpid.volid = btid->vfid.volid; /* read the root page */
5701  P_vpid.pageid = btid->root_pageid;
5703  if (P == NULL)
5704  {
5705  ASSERT_ERROR_AND_SET (ret);
5706  return ret;
5707  }
5708 
5709  (void) pgbuf_check_page_ptype (thread_p, P, PAGE_BTREE);
5710 
5711  /* read the header record */
5712  root_header = btree_get_root_header (thread_p, P);
5713  if (root_header == NULL)
5714  {
5715  pgbuf_unfix_and_init (thread_p, P);
5716  return (((ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret);
5717  }
5718  ovfid = root_header->ovfid;
5719  unique_pk = root_header->unique_pk;
5720  pgbuf_unfix_and_init (thread_p, P);
5721 
5723  if (unique_pk)
5724  {
5725  LOG_DATA_ADDR addr = { NULL, NULL, NULL_OFFSET };
5726  log_append_postpone (thread_p, RVBT_REMOVE_UNIQUE_STATS, &addr, sizeof (*btid), btid);
5727  }
5728  if (!VFID_ISNULL (&ovfid))
5729  {
5730  file_postpone_destroy (thread_p, &ovfid);
5731  }
5732  file_postpone_destroy (thread_p, &btid->vfid);
5733 
5734 
5735  btid->root_pageid = NULL_PAGEID;
5736 
5737  return NO_ERROR;
5738 }
5739 
5740 /*
5741  * btree_generate_prefix_domain () -
5742  * return:
5743  * btid(in):
5744  *
5745  * Note: This routine returns a varying domain of the same precision for fixed domains which are one of the string
5746  * types. For all other domains, it returns the same domain.
5747  */
5748 TP_DOMAIN *
5750 {
5751  TP_DOMAIN *domain = btid->key_type;
5752  TP_DOMAIN *var_domain;
5753  DB_TYPE dbtype;
5754  DB_TYPE vartype;
5755 
5756  dbtype = TP_DOMAIN_TYPE (domain);
5757 
5758  /* varying domains did not come into use until btree revision level 1 */
5759  if (dbtype == DB_TYPE_CHAR || dbtype == DB_TYPE_NCHAR || dbtype == DB_TYPE_BIT)
5760  {
5761  switch (dbtype)
5762  {
5763  case DB_TYPE_CHAR:
5764  vartype = DB_TYPE_VARCHAR;
5765  break;
5766  case DB_TYPE_NCHAR:
5767  vartype = DB_TYPE_VARNCHAR;
5768  break;
5769  case DB_TYPE_BIT:
5770  vartype = DB_TYPE_VARBIT;
5771  break;
5772  default:
5773  return NULL;
5774  }
5775 
5776  var_domain =
5777  tp_domain_resolve (vartype, domain->class_mop, domain->precision, domain->scale, domain->setdomain,
5778  domain->collation_id);
5779  }
5780  else
5781  {
5782  var_domain = domain;
5783  }
5784 
5785  return var_domain;
5786 }
5787 
5788 /*
5789  * btree_glean_root_header_info () -
5790  * return:
5791  * root_header(in):
5792  * btid(in):
5793  *
5794  * Note: This captures the interesting header info into the BTID_INT structure.
5795  */
5796 int
5798 {
5799  int rc;
5800  OR_BUF buf;
5801 
5802  rc = NO_ERROR;
5803 
5804  btid->unique_pk = root_header->unique_pk;
5805 
5806  or_init (&buf, root_header->packed_key_domain, -1);
5807  btid->key_type = or_get_domain (&buf, NULL, NULL);
5808 
5809  COPY_OID (&btid->topclass_oid, &root_header->topclass_oid);
5810 
5811  btid->ovfid = root_header->ovfid; /* structure copy */
5812 
5813  /* check for the last element domain of partial-key is desc; for btree_range_search, part_key_desc is re-set at
5814  * btree_prepare_bts */
5815  btid->part_key_desc = 0;
5816 
5817  /* init index key copy_buf info */
5818  btid->copy_buf = NULL;
5819  btid->copy_buf_len = 0;
5820 
5821  /* this must be discovered after the Btree key_type */
5823 
5824  btid->rev_level = root_header->rev_level;
5825 
5826  return rc;
5827 }
5828 
5829 /*
5830  * xbtree_find_multi_uniques () - search a list of unique indexes for specified values
5831  * return : search return code
5832  * thread_p (in) : handler thread
5833  * class_oid (in) : class oid
5834  * pruning_type (in) : pruning type
5835  * btids (in) : indexes to search
5836  * values (in) : values to search for
5837  * count (in) : number of indexes
5838  * op_type (in) : operation for which this search is performed
5839  * oids (in/out) : found OIDs
5840  * oids_count (in): number of OIDs found
5841  *
5842  * Note: This function assumes that the indexes it searches are unique
5843  * indexes. If the operation is S_UPDATE, this function stops at the first
5844  * oid it finds in order to comply with the behavior of ON DUPLICATE KEY
5845  * UPDATE statement.
5846  */
5848 xbtree_find_multi_uniques (THREAD_ENTRY * thread_p, OID * class_oid, int pruning_type, BTID * btids, DB_VALUE * values,
5849  int count, SCAN_OPERATION_TYPE op_type, OID ** oids, int *oids_count)
5850 {
5851  BTREE_SEARCH result = BTREE_KEY_FOUND;
5852  OID *found_oids = NULL;
5853  int i, idx, error = NO_ERROR;
5854  bool is_at_least_one = false;
5855  BTID pruned_btid;
5856  OID pruned_class_oid;
5857  HFID pruned_hfid;
5858  PRUNING_CONTEXT context;
5859  bool is_global_index = false;
5860 
5861  partition_init_pruning_context (&context);
5862 
5863  found_oids = (OID *) db_private_alloc (thread_p, count * sizeof (OID));
5864  if (found_oids == NULL)
5865  {
5866  return BTREE_ERROR_OCCURRED;
5867  }
5868 
5869  if (pruning_type != DB_NOT_PARTITIONED_CLASS)
5870  {
5871  error = partition_load_pruning_context (thread_p, class_oid, pruning_type, &context);
5872  if (error != NO_ERROR)
5873  {
5874  result = BTREE_ERROR_OCCURRED;
5875  goto error_return;
5876  }
5877  }
5878 
5879  idx = 0;
5880  for (i = 0; i < count; i++)
5881  {
5882  is_global_index = false;
5883  BTID_COPY (&pruned_btid, &btids[i]);
5884  COPY_OID (&pruned_class_oid, class_oid);
5885 
5886  if (pruning_type)
5887  {
5888  /* At this point, there's no way of knowing if btids[i] refers a global unique index or a local one. Perform
5889  * pruning and use the partition BTID, even if it is the same one as the BTID we received */
5890  error = partition_prune_unique_btid (&context, &values[i], &pruned_class_oid, &pruned_hfid, &pruned_btid);
5891  if (error != NO_ERROR)
5892  {
5893  result = BTREE_ERROR_OCCURRED;
5894  goto error_return;
5895  }
5896  }
5897 
5898  result =
5899  xbtree_find_unique (thread_p, &pruned_btid, op_type, &values[i], &pruned_class_oid, &found_oids[idx],
5900  is_global_index);
5901 
5902  if (result == BTREE_KEY_FOUND)
5903  {
5904  if (pruning_type == DB_PARTITION_CLASS)
5905  {
5906  if (!OID_EQ (&pruned_class_oid, class_oid))
5907  {
5908  /* Found a constraint violation on a different partition: throw invalid partition */
5911  result = BTREE_ERROR_OCCURRED;
5912  goto error_return;
5913  }
5914  }
5915  is_at_least_one = true;
5916  idx++;
5917  if (op_type == S_UPDATE)
5918  {
5919  break;
5920  }
5921  }
5922  else if (result == BTREE_ERROR_OCCURRED)
5923  {
5924  goto error_return;
5925  }
5926  else
5927  {
5928  /* result == BTREE_KEY_NOTFOUND */
5929  ; /* just go to the next one */
5930  }
5931  }
5932 
5933  if (is_at_least_one)
5934  {
5935  *oids_count = idx;
5936  *oids = found_oids;
5937  result = BTREE_KEY_FOUND;
5938  }
5939  else
5940  {
5941  result = BTREE_KEY_NOTFOUND;
5942  db_private_free_and_init (thread_p, found_oids);
5943  *oids = NULL;
5944  *oids_count = 0;
5945  }
5947  return result;
5948 
5949 error_return:
5950  if (found_oids != NULL)
5951  {
5952  db_private_free_and_init (thread_p, found_oids);
5953  }
5954  *oids_count = 0;
5955  *oids = NULL;
5957  return BTREE_ERROR_OCCURRED;
5958 }
5959 
5960 /*
5961  * btree_find_foreign_key () - Find and lock any existing object in foreign key. Used to check that delete/update on
5962  * primary key is allowed.
5963  *
5964  * return : Error code.
5965  * thread_p (in) : Thread entry.
5966  * btid (in) : B-tree ID.
5967  * key (in) : Key value.
5968  * class_oid (in) : Class OID.
5969  * found_oid (out) : Outputs OID of found object. If no object is found outputs NULL.
5970  */
5971 int
5972 btree_find_foreign_key (THREAD_ENTRY * thread_p, BTID * btid, DB_VALUE * key, OID * class_oid, OID * found_oid)
5973 {
5975  int error_code = NO_ERROR;
5976  key_val_range kv_range;
5978 
5979  assert (btid != NULL);
5980  assert (key != NULL);
5981  assert (class_oid != NULL);
5982  assert (found_oid != NULL);
5983 
5984  /* Find if key has any objects. */
5985 
5986  /* Define range of scan. */
5987  pr_share_value (key, &kv_range.key1);
5988  pr_share_value (key, &kv_range.key2);
5989  kv_range.range = GE_LE;
5990  kv_range.num_index_term = 0;
5991 
5992  /* Initialize not found. */
5993  OID_SET_NULL (&find_fk_object.found_oid);
5994 
5995 #if defined (SERVER_MODE)
5996  /* Use S_LOCK to block found object. */
5997  find_fk_object.lock_mode = S_LOCK;
5998 #endif /* SERVER_MODE */
5999  /* Prepare scan. */
6000  BTREE_INIT_SCAN (&btree_scan);
6001  error_code =
6002  btree_prepare_bts (thread_p, &btree_scan, btid, NULL, &kv_range, NULL, NULL, NULL, NULL, false, &find_fk_object);
6003  if (error_code != NO_ERROR)
6004  {
6005  ASSERT_ERROR ();
6006  return error_code;
6007  }
6008  /* Execute scan. */
6009  error_code = btree_range_scan (thread_p, &btree_scan, btree_range_scan_find_fk_any_object);
6010  assert (error_code == NO_ERROR || er_errid () != NO_ERROR);
6011 
6012  /* Output found object. */
6013  COPY_OID (found_oid, &find_fk_object.found_oid);
6014 
6015 #if defined (SERVER_MODE)
6016  if (error_code != NO_ERROR || OID_ISNULL (&find_fk_object.found_oid))
6017  {
6018  /* Release lock on object if any. */
6019  if (!OID_ISNULL (&find_fk_object.locked_object))
6020  {
6021  lock_unlock_object_donot_move_to_non2pl (thread_p, &find_fk_object.locked_object,
6022  &btree_scan.btid_int.topclass_oid, S_LOCK);
6023  }
6024  }
6025 #endif /* SERVER_MODE */
6026  return error_code;
6027 }
6028 
6029 /*
6030  * btree_scan_clear_key () -
6031  * return:
6032  * btree_scan(in):
6033  */
6034 void
6036 {
6037  btree_clear_key_value (&btree_scan->clear_cur_key, &btree_scan->cur_key);
6038 }
6039 
6040 /*
6041  * btree_is_unique_type () -
6042  * return: Whether the given BTREE_TYPE corresponds to a unique index B+tree
6043  * type(in):
6044  */
6045 bool
6047 {
6048  if (type == BTREE_UNIQUE || type == BTREE_REVERSE_UNIQUE || type == BTREE_PRIMARY_KEY)
6049  {
6050  return true;
6051  }
6052  return false;
6053 }
6054 
6055 /*
6056  * xbtree_class_test_unique () -
6057  * return: int
6058  * buf(in):
6059  * buf_size(in):
6060  *
6061  * Note: Return NO_ERROR if the btrees given are unique. Return ER_BTREE_UNIQUE_FAILED if one of unique tests failed.
6062  * This is used for interpreter and xasl batch checking of uniqueness.
6063  */
6064 int
6065 xbtree_class_test_unique (THREAD_ENTRY * thread_p, char *buf, int buf_size)
6066 {
6067  int status = NO_ERROR;
6068  char *bufp, *buf_endptr;
6069  BTID btid;
6070 
6071  bufp = buf;
6072  buf_endptr = (buf + buf_size);
6073 
6074  while ((bufp < buf_endptr) && (status == NO_ERROR))
6075  {
6076  /* unpack the BTID */
6077  bufp = or_unpack_btid (bufp, &btid);
6078 
6079  /* check if the btree is unique */
6080  if ((status == NO_ERROR) && (xbtree_test_unique (thread_p, &btid) != 1))
6081  {
6082  BTREE_SET_UNIQUE_VIOLATION_ERROR (thread_p, NULL, NULL, NULL, &btid, NULL);
6083  status = ER_BTREE_UNIQUE_FAILED;
6084  }
6085  }
6086 
6087  return status;
6088 }
6089 
6090 /*
6091  * xbtree_test_unique () -
6092  * return: int
6093  * btid(in): B+tree index identifier
6094  *
6095  * Note: Return 1 (true) if the index is unique, return 0 if the index is not unique, return -1 if the btree isn't
6096  * keeping track of unique statistics (a regular, plain jane btree).
6097  */
6098 static int
6100 {
6101  INT32 num_oids, num_nulls, num_keys;
6102 
6103  if (logtb_get_global_unique_stats (thread_p, btid, &num_oids, &num_nulls, &num_keys) != NO_ERROR)
6104  {
6105  return 0;
6106  }
6107 
6108  if (num_nulls == -1)
6109  {
6110  return -1;
6111  }
6112  else if ((num_nulls + num_keys) != num_oids)
6113  {
6114  assert (false);
6115  return 0;
6116  }
6117  else
6118  {
6119  return 1;
6120  }
6121 }
6122 
6123 /*
6124  * xbtree_get_unique_pk () -
6125  * return:
6126  * btid(in):
6127  */
6128 int
6130 {
6131  VPID root_vpid;
6132  PAGE_PTR root = NULL;
6133  BTREE_ROOT_HEADER *root_header = NULL;
6134  int unique_pk;
6135 
6136  root_vpid.pageid = btid->root_pageid;
6137  root_vpid.volid = btid->vfid.volid;
6138 
6139  root = pgbuf_fix (thread_p, &root_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
6140  if (root == NULL)
6141  {
6142  return 0;
6143  }
6144 
6145  (void) pgbuf_check_page_ptype (thread_p, root, PAGE_BTREE);
6146 
6147  root_header = btree_get_root_header (thread_p, root);
6148  if (root_header == NULL)
6149  {
6150  return 0;
6151  }
6152 
6153  unique_pk = root_header->unique_pk;
6154 
6155  pgbuf_unfix_and_init (thread_p, root);
6156 
6157  return unique_pk;
6158 }
6159 
6160 /*
6161  * btree_get_unique_statistics_for_count () - gets unique statistics
6162  * return:
6163  * btid(in): B+tree index identifier
6164  * oid_cnt(in/out): no. of oids
6165  * null_cnt(in/out): no. of nulls
6166  * key_cnt(in/out): no. of keys
6167  *
6168  * Note: In MVCC the statistics are taken from memory structures. In non-mvcc from B-tree header
6169  */
6170 int
6171 btree_get_unique_statistics_for_count (THREAD_ENTRY * thread_p, BTID * btid, int *oid_cnt, int *null_cnt, int *key_cnt)
6172 {
6173  LOG_TRAN_BTID_UNIQUE_STATS *unique_stats = NULL;
6174 
6175  unique_stats = logtb_tran_find_btid_stats (thread_p, btid, true);
6176  if (unique_stats == NULL)
6177  {
6178  return ER_FAILED;
6179  }
6180  *oid_cnt = unique_stats->tran_stats.num_oids + unique_stats->global_stats.num_oids;
6181  *key_cnt = unique_stats->tran_stats.num_keys + unique_stats->global_stats.num_keys;
6182  *null_cnt = unique_stats->tran_stats.num_nulls + unique_stats->global_stats.num_nulls;
6183 
6184  return NO_ERROR;
6185 }
6186 
6187 /*
6188  * btree_get_unique_statistics () -
6189  * returns: NO_ERROR
6190  * btid(in):
6191  * oid_cnt(in):
6192  * null_cnt(in):
6193  * key_cnt(in):
6194  *
6195  * Note: Reads the unique btree statistics from the root header. If
6196  * the btree is not a unique btree, all the stats will be -1.
6197  */
6198 int
6199 btree_get_unique_statistics (THREAD_ENTRY * thread_p, BTID * btid, int *oid_cnt, int *null_cnt, int *key_cnt)
6200 {
6201  VPID root_vpid;
6202  PAGE_PTR root = NULL;
6203  BTREE_ROOT_HEADER *root_header = NULL;
6204  int ret;
6205 
6206  root_vpid.pageid = btid->root_pageid;
6207  root_vpid.volid = btid->vfid.volid;
6208 
6209  root = pgbuf_fix (thread_p, &root_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
6210  if (root == NULL)
6211  {
6212  return (((ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret);
6213  }
6214 
6215  (void) pgbuf_check_page_ptype (thread_p, root, PAGE_BTREE);
6216 
6217  root_header = btree_get_root_header (thread_p, root);
6218  if (root_header == NULL)
6219  {
6220  return (((ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret);
6221  }
6222 
6224 
6225  *oid_cnt = root_header->num_oids;
6226  *null_cnt = root_header->num_nulls;
6227  *key_cnt = root_header->num_keys;
6228 
6229  pgbuf_unfix_and_init (thread_p, root);
6230 
6231  return NO_ERROR;
6232 }
6233 
6234 #if defined(ENABLE_UNUSED_FUNCTION)
6235 /*
6236  * btree_get_subtree_stats () -
6237  * return: NO_ERROR
6238  * btid(in):
6239  * pg_ptr(in):
6240  * env(in):
6241  */
6242 static int
6243 btree_get_subtree_stats (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr, BTREE_STATS_ENV * stats_env)
6244 {
6245  int key_cnt;
6246  int i, j;
6248  VPID page_vpid;
6249  PAGE_PTR page = NULL;
6250  RECDES rec;
6251  DB_DOMAIN *key_type;
6252  int ret = NO_ERROR;
6253  BTREE_NODE_HEADER *header = NULL;
6254 
6255  key_type = btid->key_type;
6256  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
6257 
6258  header = btree_get_node_header (thread_p, page_ptr);
6259  if (header == NULL)
6260  {
6261  er_log_debug (ARG_FILE_LINE, "btree_get_subtree_stats: get node header failure: %d", key_cnt);
6262  goto exit_on_error;
6263  }
6264 
6265  if (header->node_level > 1) /* BTREE_NON_LEAF_NODE */
6266  {
6267  if (key_cnt < 0)
6268  {
6269  er_log_debug (ARG_FILE_LINE, "btree_get_subtree_stats: node key count underflow: %d", key_cnt);
6270  goto exit_on_error;
6271  }
6272 
6273  /*
6274  * traverse all the subtrees of this non_leaf page and accumulate
6275  * the statistical data in the environment structure
6276  */
6277  for (i = 1; i <= key_cnt + 1; i++)
6278  {
6279  if (spage_get_record (thread_p, page_ptr, i, &rec, PEEK) != S_SUCCESS)
6280  {
6281  goto exit_on_error;
6282  }
6283 
6284  btree_read_fixed_portion_of_non_leaf_record (&rec, &non_leaf_rec);
6285  page_vpid = non_leaf_rec.pnt;
6286 
6287  page = pgbuf_fix (thread_p, &page_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
6288  if (page == NULL)
6289  {
6290  goto exit_on_error;
6291  }
6292 
6293  (void) pgbuf_check_page_ptype (thread_p, page, PAGE_BTREE);
6294 
6295  ret = btree_get_subtree_stats (thread_p, btid, page, stats_env);
6296  if (ret != NO_ERROR)
6297  {
6298  goto exit_on_error;
6299  }
6300 
6301  pgbuf_unfix_and_init (thread_p, page);
6302  }
6303 
6304  stats_env->stat_info->height++;
6305  }
6306  else
6307  {
6308  DB_VALUE key, elem;
6310  bool clear_key;
6311  int offset;
6312  int k;
6313  DB_MIDXKEY *midxkey;
6314  int prev_j_index, prev_k_index;
6315  char *prev_j_ptr, *prev_k_ptr;
6316 
6317  stats_env->stat_info->leafs++;
6318  stats_env->stat_info->keys += key_cnt;
6319  stats_env->stat_info->height = 1; /* init */
6320 
6321  btree_init_temp_key_value (&clear_key, &key);
6322 
6323  if (stats_env->pkeys)
6324  {
6325  if (TP_DOMAIN_TYPE (key_type) != DB_TYPE_MIDXKEY)
6326  {
6327  /* single column index */
6328  stats_env->stat_info->pkeys[0] += key_cnt;
6329  }
6330  else
6331  {
6332  for (i = 1; i <= key_cnt; i++)
6333  {
6334  if (spage_get_record (thread_p, page_ptr, i, &rec, PEEK) != S_SUCCESS)
6335  {
6336  goto exit_on_error;
6337  }
6338 
6339  /* read key-value */
6340  btree_read_record (thread_p, page_ptr, btid, &rec, &key, &leaf_rec, BTREE_LEAF_NODE, &clear_key,
6341  &offset, PEEK_KEY_VALUE, NULL);
6342 
6343  /* extract the sequence of the key-value */
6344  midxkey = db_get_midxkey (&key);
6345  if (midxkey == NULL)
6346  {
6347  goto exit_on_error;
6348  }
6349 
6350  prev_j_index = 0;
6351  prev_j_ptr = NULL;
6352 
6354  for (j = 0; j < stats_env->stat_info->pkeys_size; j++)
6355  {
6356  /* extract the element of the midxkey */
6357  ret = pr_midxkey_get_element_nocopy (midxkey, j, &elem, &prev_j_index, &prev_j_ptr);
6358  if (ret != NO_ERROR)
6359  {
6360  goto exit_on_error;
6361  }
6362 
6363  if (tp_value_compare (&(stats_env->pkeys[j]), &elem, 0, 1) != DB_EQ)
6364  {
6365  /* found different value */
6366  stats_env->stat_info->pkeys[j] += 1;
6367  pr_clear_value (&(stats_env->pkeys[j])); /* clear saved */
6368  pr_clone_value (&elem, &(stats_env->pkeys[j])); /* save */
6369 
6370  /* propagate to the following partial key-values */
6371  prev_k_index = prev_j_index;
6372  prev_k_ptr = prev_j_ptr;
6373 
6375  for (k = j + 1; k < stats_env->stat_info->pkeys_size; k++)
6376  {
6377  ret = pr_midxkey_get_element_nocopy (midxkey, k, &elem, &prev_k_index, &prev_k_ptr);
6378  if (ret != NO_ERROR)
6379  {
6380  goto exit_on_error;
6381  }
6382 
6383  stats_env->stat_info->pkeys[k]++;
6384  pr_clear_value (&(stats_env->pkeys[k]));
6385  pr_clone_value (&elem, &(stats_env->pkeys[k])); /* save */
6386  }
6387 
6388  /* go to the next key */
6389  break;
6390  }
6391  }
6392 
6393  btree_clear_key_value (&clear_key, &key);
6394  }
6395  }
6396  }
6397  }
6398 
6399  stats_env->stat_info->pages++;
6400 
6401  return ret;
6402 
6403 exit_on_error:
6404 
6405  if (page)
6406  {
6407  pgbuf_unfix_and_init (thread_p, page);
6408  }
6409 
6410  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
6411 }
6412 #endif
6413 
6414 /*
6415  * btree_get_stats_midxkey () -
6416  * return: NO_ERROR
6417  * thread_p(in);
6418  * env(in/out): Structure to store and return the statistical information
6419  * midxkey(in);
6420  */
6421 static int
6423 {
6424  int i, k;
6425  int prev_i_index, prev_k_index;
6426  char *prev_i_ptr, *prev_k_ptr;
6427  DB_VALUE elem;
6428  int ret = NO_ERROR;
6429 
6430  if (midxkey == NULL)
6431  {
6432  assert_release (false);
6433  goto exit_on_error;
6434  }
6435 
6436  prev_i_index = 0;
6437  prev_i_ptr = NULL;
6438  for (i = 0; i < env->pkeys_val_num; i++)
6439  {
6440  /* extract the element of the key */
6441  ret = pr_midxkey_get_element_nocopy (midxkey, i, &elem, &prev_i_index, &prev_i_ptr);
6442  if (ret != NO_ERROR)
6443  {
6444  assert_release (false);
6445  goto exit_on_error;
6446  }
6447 
6448  if (tp_value_compare (&(env->pkeys_val[i]), &elem, 0, 1) != DB_EQ)
6449  {
6450  /* found different value */
6451  env->stat_info->pkeys[i]++;
6452  pr_clear_value (&(env->pkeys_val[i])); /* clear saved */
6453  pr_clone_value (&elem, &(env->pkeys_val[i])); /* save */
6454 
6455  if (elem.need_clear == true)
6456  {
6457  pr_clear_value (&elem);
6458  }
6459 
6460  /* propagate to the following partial key-values */
6461  prev_k_index = prev_i_index;
6462  prev_k_ptr = prev_i_ptr;
6463  for (k = i + 1; k < env->pkeys_val_num; k++)
6464  {
6465  ret = pr_midxkey_get_element_nocopy (midxkey, k, &elem, &prev_k_index, &prev_k_ptr);
6466  if (ret != NO_ERROR)
6467  {
6468  assert_release (false);
6469  goto exit_on_error;
6470  }
6471 
6472  env->stat_info->pkeys[k]++;
6473  pr_clear_value (&(env->pkeys_val[k])); /* clear saved */
6474  pr_clone_value (&elem, &(env->pkeys_val[k])); /* save */
6475 
6476  if (elem.need_clear == true)
6477  {
6478  pr_clear_value (&elem);
6479  }
6480  }
6481 
6482  break;
6483  }
6484  else if (elem.need_clear == true)
6485  {
6486  pr_clear_value (&elem);
6487  }
6488  }
6489 
6490  return ret;
6491 
6492 exit_on_error:
6493 
6494  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
6495 }
6496 
6497 /*
6498  * btree_get_stats_key () -
6499  * return: NO_ERROR
6500  * thread_p(in);
6501  * env(in/out): Structure to store and return the statistical information
6502  */
6503 static int
6505 {
6506  BTREE_SCAN *BTS;
6507  RECDES rec;
6508  DB_VALUE key_value;
6509  LEAF_REC leaf_pnt;
6510  bool clear_key = false;
6511  int offset;
6512  int ret = NO_ERROR;
6513 
6514  assert (env != NULL);
6515 
6516  btree_init_temp_key_value (&clear_key, &key_value);
6517 
6518  if (mvcc_snapshot != NULL)
6519  {
6520  int max_visible_oids = 1;
6521  int num_visible_oids = 0;
6522 
6523  BTS = &(env->btree_scan);
6524 
6525  if (BTS->C_page == NULL)
6526  {
6527  goto exit_on_error;
6528  }
6529 
6530  assert (BTS->slot_id > 0);
6531  if (spage_get_record (thread_p, BTS->C_page, BTS->slot_id, &rec, PEEK) != S_SUCCESS)
6532  {
6533  goto exit_on_error;
6534  }
6535 
6536  /* filter out fence_key */
6538  {
6539  goto count_keys;
6540  }
6541 
6542  /* read key-value */
6543  assert (clear_key == false);
6544 
6545  if (btree_read_record (thread_p, &BTS->btid_int, BTS->C_page, &rec, &key_value, (void *) &leaf_pnt,
6546  BTREE_LEAF_NODE, &clear_key, &offset, PEEK_KEY_VALUE, NULL) != NO_ERROR)
6547  {
6548  goto exit_on_error;
6549  }
6550 
6551  /* Is there any visible objects? */
6552  max_visible_oids = 1;
6553  ret =
6554  btree_get_num_visible_from_leaf_and_ovf (thread_p, &BTS->btid_int, &rec, offset, &leaf_pnt, &max_visible_oids,
6555  mvcc_snapshot, &num_visible_oids);
6556  if (ret != NO_ERROR)
6557  {
6558  /* Error. */
6559  goto exit_on_error;
6560  }
6561  else if (num_visible_oids == 0)
6562  {
6563  /* No visible object. */
6564  goto end;
6565  }
6566  }
6567 
6568 count_keys:
6569  env->stat_info->keys++;
6570 
6571  if (env->pkeys_val_num <= 0)
6572  {
6573  ; /* do not request pkeys info; go ahead */
6574  }
6575  else if (env->pkeys_val_num == 1)
6576  {
6577  /* single column index */
6578  env->stat_info->pkeys[0]++;
6579  }
6580  else
6581  {
6582  /* multi column index */
6583 
6584  if (mvcc_snapshot != NULL)
6585  {
6586  /* filter out fence_key */
6588  {
6589  assert (ret == NO_ERROR);
6590 
6591  env->stat_info->keys--;
6592  assert (env->stat_info->keys >= 0);
6593 
6594  goto end;
6595  }
6596 
6597  /* key_value already computed */
6598  assert (!DB_IS_NULL (&key_value));
6599 
6600  /* get pkeys info */
6601  ret = btree_get_stats_midxkey (thread_p, env, db_get_midxkey (&key_value));
6602  if (ret != NO_ERROR)
6603  {
6604  goto exit_on_error;
6605  }
6606 
6607  goto end;
6608  }
6609 
6610  BTS = &(env->btree_scan);
6611 
6612  if (BTS->C_page == NULL)
6613  {
6614  goto exit_on_error;
6615  }
6616 
6617  assert (BTS->slot_id > 0);
6618  if (spage_get_record (thread_p, BTS->C_page, BTS->slot_id, &rec, PEEK) != S_SUCCESS)
6619  {
6620  goto exit_on_error;
6621  }
6622 
6623  /* filter out fence_key */
6625  {
6626  assert (ret == NO_ERROR);
6627 
6628  env->stat_info->keys--;
6629  assert (env->stat_info->keys >= 0);
6630 
6631  goto end;
6632  }
6633 
6634  /* read key-value */
6635 
6636  assert (clear_key == false);
6637 
6638  if (btree_read_record (thread_p, &BTS->btid_int, BTS->C_page, &rec, &key_value, (void *) &leaf_pnt,
6639  BTREE_LEAF_NODE, &clear_key, &offset, PEEK_KEY_VALUE, NULL) != NO_ERROR)
6640  {
6641  goto exit_on_error;
6642  }
6643 
6644  /* get pkeys info */
6645  ret = btree_get_stats_midxkey (thread_p, env, db_get_midxkey (&key_value));
6646  if (ret != NO_ERROR)
6647  {
6648  goto exit_on_error;
6649  }
6650  }
6651 
6652 end:
6653 
6654  if (clear_key)
6655  {
6656  pr_clear_value (&key_value);
6657  clear_key = false;
6658  }
6659 
6660  return ret;
6661 
6662 exit_on_error:
6663 
6664  ret = (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
6665 
6666  goto end;
6667 }
6668 
6669 /*
6670  * btree_get_stats_with_AR_sampling () - Do Acceptance/Rejection Sampling
6671  * return: NO_ERROR
6672  * env(in/out): Structure to store and return the statistical information
6673  */
6674 static int
6676 {
6677  BTREE_SCAN *BTS;
6678  int n, i;
6679  bool found;
6680  int key_cnt;
6681  int exp_ratio;
6682  int ret = NO_ERROR;
6683 #if !defined(NDEBUG)
6684  BTREE_NODE_HEADER *header = NULL;
6685 #endif
6686 
6687  assert (env != NULL);
6688  assert (env->stat_info != NULL);
6689 
6690  BTS = &(env->btree_scan);
6691  BTS->use_desc_index = 0; /* init */
6692 
6693  for (n = 0; n < STATS_SAMPLING_THRESHOLD; n++)
6694  {
6696  {
6697  break; /* found all samples */
6698  }
6699 
6700  BTS->C_page =
6701  btree_find_AR_sampling_leaf (thread_p, BTS->btid_int.sys_btid, &BTS->C_vpid, env->stat_info, &found);
6702  if (BTS->C_page == NULL)
6703  {
6704  goto exit_on_error;
6705  }
6706 
6707  /* found sampling leaf page */
6708  if (found)
6709  {
6710  key_cnt = btree_node_number_of_keys (thread_p, BTS->C_page);
6711  assert_release (key_cnt >= 0);
6712 
6713 #if !defined(NDEBUG)
6714  header = btree_get_node_header (thread_p, BTS->C_page);
6715 
6716  assert (header != NULL);
6717  assert (header->node_level == 1); /* BTREE_LEAF_NODE */
6718 #endif
6719 
6720  if (key_cnt > 0)
6721  {
6722  env->stat_info->leafs++;
6723 
6724  BTS->slot_id = 1;
6725  BTS->oid_pos = 0;
6726 
6727  assert_release (BTS->slot_id <= key_cnt);
6728 
6729  for (i = 0; i < key_cnt; i++)
6730  {
6731  ret = btree_get_stats_key (thread_p, env, NULL);
6732  if (ret != NO_ERROR)
6733  {
6734  goto exit_on_error;
6735  }
6736 
6737  /* get the next index record */
6738  ret = btree_find_next_index_record (thread_p, BTS);
6739  if (ret != NO_ERROR)
6740  {
6741  goto exit_on_error;
6742  }
6743  }
6744  }
6745  }
6746 
6747  if (BTS->P_page != NULL)
6748  {
6749  pgbuf_unfix_and_init (thread_p, BTS->P_page);
6750  }
6751 
6752  if (BTS->C_page != NULL)
6753  {
6754  pgbuf_unfix_and_init (thread_p, BTS->C_page);
6755  }
6756 
6757  if (BTS->O_page != NULL)
6758  {
6759  pgbuf_unfix_and_init (thread_p, BTS->O_page);
6760  }
6761  } /* for (n = 0; ... ) */
6762 
6763  /* apply distributed expension */
6764  if (env->stat_info->leafs > 0)
6765  {
6766  exp_ratio = env->stat_info->pages / env->stat_info->leafs;
6767 
6768  env->stat_info->leafs *= exp_ratio;
6769  if (env->stat_info->leafs < 0)
6770  {
6771  env->stat_info->leafs = INT_MAX;
6772  }
6773 
6774  env->stat_info->keys *= exp_ratio;
6775  if (env->stat_info->keys < 0)
6776  {
6777  env->stat_info->keys = INT_MAX;
6778  }
6779 
6780  for (i = 0; i < env->pkeys_val_num; i++)
6781  {
6782  env->stat_info->pkeys[i] *= exp_ratio;
6783  if (env->stat_info->pkeys[i] < 0)
6784  { /* multiply-overflow defence */
6785  env->stat_info->pkeys[i] = INT_MAX;
6786  }
6787  }
6788  }
6789 
6790 end:
6791 
6792  if (BTS->P_page != NULL)
6793  {
6794  pgbuf_unfix_and_init (thread_p, BTS->P_page);
6795  }
6796 
6797  if (BTS->C_page != NULL)
6798  {
6799  pgbuf_unfix_and_init (thread_p, BTS->C_page);
6800  }
6801 
6802  if (BTS->O_page != NULL)
6803  {
6804  pgbuf_unfix_and_init (thread_p, BTS->O_page);
6805  }
6806 
6807  return ret;
6808 
6809 exit_on_error:
6810 
6811  ret = (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
6812 
6813  goto end;
6814 }
6815 
6816 /*
6817  * btree_get_stats_with_fullscan () - Do Full Scan
6818  * return: NO_ERROR
6819  * env(in/out): Structure to store and return the statistical information
6820  */
6821 static int
6823 {
6824  BTREE_SCAN *BTS;
6825  VPID C_vpid; /* vpid of current leaf page */
6826  int ret = NO_ERROR;
6828 
6829  assert (env != NULL);
6830  assert (env->stat_info != NULL);
6831 
6832  mvcc_snapshot = logtb_get_mvcc_snapshot (thread_p);
6833  if (mvcc_snapshot == NULL)
6834  {
6835  ASSERT_ERROR_AND_SET (ret);
6836  goto exit_on_error;
6837  }
6838 
6839  BTS = &(env->btree_scan);
6840  BTS->use_desc_index = 0; /* get the left-most leaf page */
6841 
6842  ret = btree_find_lower_bound_leaf (thread_p, BTS, env->stat_info);
6843  if (ret != NO_ERROR)
6844  {
6845  ASSERT_ERROR ();
6846  goto exit_on_error;
6847  }
6848 
6849  VPID_SET_NULL (&C_vpid); /* init */
6850 
6851  while (!BTREE_END_OF_SCAN (BTS))
6852  {
6853  /* move on another leaf page */
6854  if (!VPID_EQ (&(BTS->C_vpid), &C_vpid))
6855  {
6856  VPID_COPY (&C_vpid, &(BTS->C_vpid)); /* keep current leaf vpid */
6857 
6858  env->stat_info->leafs++;
6859  }
6860 
6861  ret = btree_get_stats_key (thread_p, env, mvcc_snapshot);
6862  if (ret != NO_ERROR)
6863  {
6864  ASSERT_ERROR ();
6865  goto exit_on_error;
6866  }
6867 
6868  /* get the next index record */
6869  ret = btree_find_next_index_record (thread_p, BTS);
6870  if (ret != NO_ERROR)
6871  {
6872  ASSERT_ERROR ();
6873  goto exit_on_error;
6874  }
6875  }
6876 
6877 end:
6878 
6879  if (BTS->P_page != NULL)
6880  {
6881  pgbuf_unfix_and_init (thread_p, BTS->P_page);
6882  }
6883 
6884  if (BTS->C_page != NULL)
6885  {
6886  pgbuf_unfix_and_init (thread_p, BTS->C_page);
6887  }
6888 
6889  if (BTS->O_page != NULL)
6890  {
6891  pgbuf_unfix_and_init (thread_p, BTS->O_page);
6892  }
6893 
6894  return ret;
6895 
6896 exit_on_error:
6897 
6898  assert_release (ret != NO_ERROR);
6899 
6900  goto end;
6901 }
6902 
6903 /*
6904  * btree_get_root_vpid_from_btid () -
6905  * return: pageid or NULL_PAGEID
6906  * btid(in): B+tree index identifier
6907  * first_vpid(out):
6908  *
6909  * Note: get the page identifier of the first allocated page of the given file.
6910  */
6911 void
6912 btree_get_root_vpid_from_btid (THREAD_ENTRY * thread_p, BTID * btid, VPID * root_vpid)
6913 {
6914  assert (btid != NULL);
6915  assert (root_vpid != NULL);
6916  assert (!VFID_ISNULL (&btid->vfid));
6917  root_vpid->volid = btid->vfid.volid;
6918  root_vpid->pageid = btid->root_pageid;
6919 }
6920 
6921 /*
6922  * btree_get_btid_from_file () - get btid for file (caller must make sure this is indeed a b-tree file
6923  *
6924  * return : error code
6925  * thread_p (in) : thread entry
6926  * vfid (in) : file identifier
6927  * btid_out (out) : b-tree identifier
6928  */
6929 int
6930 btree_get_btid_from_file (THREAD_ENTRY * thread_p, const VFID * vfid, BTID * btid_out)
6931 {
6932  VPID vpid_sticky;
6933 
6934  int error_code = NO_ERROR;
6935 
6936  error_code = file_get_sticky_first_page (thread_p, vfid, &vpid_sticky);
6937  if (error_code != NO_ERROR)
6938  {
6939  ASSERT_ERROR ();
6940  return error_code;
6941  }
6942  assert (!VPID_ISNULL (&vpid_sticky));
6943  assert (vfid->volid == vpid_sticky.volid);
6944  btid_out->vfid = *vfid;
6945  btid_out->root_pageid = vpid_sticky.pageid;
6946  return NO_ERROR;
6947 }
6948 
6949 /*
6950  * btree_get_stats () - Get Statistical Information about the B+tree index
6951  * return: NO_ERROR
6952  * stat_info_p(in/out): Structure to store and return the statistical information
6953  * with_fullscan(in): true iff WITH FULLSCAN
6954  *
6955  * Note: Computes and returns statistical information about B+tree which consist of the number of leaf pages,
6956  * total number of pages, number of keys and the height of the tree.
6957  */
6958 int
6959 btree_get_stats (THREAD_ENTRY * thread_p, BTREE_STATS * stat_info_p, bool with_fullscan)
6960 {
6961  int npages;
6962  BTREE_STATS_ENV stat_env, *env;
6963  VPID root_vpid;
6964  PAGE_PTR root_page_ptr = NULL;
6965  DB_TYPE dom_type;
6966  BTREE_ROOT_HEADER *root_header = NULL;
6967  int i;
6968  int ret = NO_ERROR;
6969 
6970  assert_release (stat_info_p != NULL);
6971  assert_release (!BTID_IS_NULL (&stat_info_p->btid));
6972 
6973  ret = file_get_num_user_pages (thread_p, &(stat_info_p->btid.vfid), &npages);
6974  if (ret != NO_ERROR)
6975  {
6976  ASSERT_ERROR ();
6977  return ret;
6978  }
6979  assert_release (npages >= 1);
6980 
6981  /* For the optimization of the sampling, if the btree file has currently the same pages as we gathered statistics, we
6982  * guess the btree file has not been modified; So, we take current stats as it is */
6983  if (!with_fullscan)
6984  {
6985  /* check if the stats has been gathered */
6986  if (stat_info_p->keys > 0)
6987  {
6988  /* guess the stats has not been modified */
6989  if (npages == stat_info_p->pages)
6990  {
6991  return NO_ERROR;
6992  }
6993  }
6994  }
6995 
6996  /* set environment variable */
6997  env = &stat_env;
6998  BTREE_INIT_SCAN (&(env->btree_scan));
6999  env->btree_scan.btid_int.sys_btid = &(stat_info_p->btid);
7000  env->stat_info = stat_info_p;
7001  env->pkeys_val_num = stat_info_p->pkeys_size;
7002 
7004  for (i = 0; i < env->pkeys_val_num; i++)
7005  {
7006  db_make_null (&(env->pkeys_val[i]));
7007  }
7008 
7009  root_vpid.pageid = env->stat_info->btid.root_pageid; /* read root page */
7010  root_vpid.volid = env->stat_info->btid.vfid.volid;
7011 
7012  root_page_ptr = pgbuf_fix (thread_p, &root_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
7013  if (root_page_ptr == NULL)
7014  {
7015  goto exit_on_error;
7016  }
7017 
7018  (void) pgbuf_check_page_ptype (thread_p, root_page_ptr, PAGE_BTREE);
7019 
7020  root_header = btree_get_root_header (thread_p, root_page_ptr);
7021  if (root_header == NULL)
7022  {
7023  goto exit_on_error;
7024  }
7025 
7026  ret = btree_glean_root_header_info (thread_p, root_header, &(env->btree_scan.btid_int));
7027  if (ret != NO_ERROR)
7028  {
7029  pgbuf_unfix_and_init (thread_p, root_page_ptr);
7030  goto exit_on_error;
7031  }
7032 
7033  pgbuf_unfix_and_init (thread_p, root_page_ptr);
7034 
7035  dom_type = TP_DOMAIN_TYPE (env->btree_scan.btid_int.key_type);
7036  if (env->pkeys_val_num <= 0)
7037  {
7038  /* do not request pkeys info; go ahead */
7039  if (!tp_valid_indextype (dom_type) && dom_type != DB_TYPE_MIDXKEY)
7040  {
7041  assert_release (false);
7042  goto exit_on_error;
7043  }
7044  }
7045  else if (env->pkeys_val_num == 1)
7046  {
7047  /* single column index */
7048  if (!tp_valid_indextype (dom_type))
7049  {
7050  assert_release (false);
7051  goto exit_on_error;
7052  }
7053  }
7054  else
7055  {
7056  /* multi column index */
7057  if (dom_type != DB_TYPE_MIDXKEY)
7058  {
7059  assert_release (false);
7060  goto exit_on_error;
7061  }
7062  }
7063 
7064  /* initialize environment stat_info structure */
7065  env->stat_info->pages = npages;
7066  env->stat_info->leafs = 0;
7067  env->stat_info->height = 0;
7068  env->stat_info->keys = 0;
7069 
7070  for (i = 0; i < env->pkeys_val_num; i++)
7071  {
7072  env->stat_info->pkeys[i] = 0; /* clear old stats */
7073  }
7074 
7075  if (with_fullscan || npages <= STATS_SAMPLING_THRESHOLD)
7076  {
7077  /* do fullscan at small table */
7078  ret = btree_get_stats_with_fullscan (thread_p, env);
7079  }
7080  else
7081  {
7082  ret = btree_get_stats_with_AR_sampling (thread_p, env);
7083  }
7084 
7085  if (ret != NO_ERROR)
7086  {
7087  goto exit_on_error;
7088  }
7089 
7090  /* check for emptiness */
7091  for (i = 0; i < env->pkeys_val_num; i++)
7092  {
7093  assert_release (env->stat_info->keys >= env->stat_info->pkeys[i]);
7094 
7095  if (env->stat_info->keys <= 0)
7096  {
7097  /* is empty */
7098  assert_release (env->stat_info->pkeys[i] == 0);
7099  env->stat_info->pkeys[i] = 0;
7100  }
7101  else
7102  {
7103  env->stat_info->pkeys[i] = MAX (env->stat_info->pkeys[i], 1);
7104  }
7105  }
7106 
7107  if (npages < env->stat_info->height)
7108  {
7109  // this is a corner case. if b-tree had only one page when npages was read, but its root was split immediately
7110  // after, we'd have this awkward situation.
7111  //
7112  // but we may read npages again, and this time it should be better (we rely also on the fact that one root is
7113  // split, it is never merged back to one page again).
7114  //
7115  ret = file_get_num_user_pages (thread_p, &(stat_info_p->btid.vfid), &npages);
7116  if (ret != NO_ERROR)
7117  {
7118  ASSERT_ERROR ();
7119  return ret;
7120  }
7121  assert_release (npages >= 1);
7122  assert_release (npages >= env->stat_info->height);
7123  }
7124 
7125  /* check for leaf pages */
7126  env->stat_info->leafs = MAX (1, env->stat_info->leafs);
7127  env->stat_info->leafs = MIN (env->stat_info->leafs, npages - (env->stat_info->height - 1));
7128 
7129  assert_release (env->stat_info->pages >= 1);
7130  assert_release (env->stat_info->leafs >= 1);
7131  assert_release (env->stat_info->height >= 1);
7132  assert_release (env->stat_info->keys >= 0);
7133 
7134 end:
7135 
7136  if (root_page_ptr)
7137  {
7138  pgbuf_unfix_and_init (thread_p, root_page_ptr);
7139  }
7140 
7141  /* clear partial key-values */
7142  for (i = 0; i < env->pkeys_val_num; i++)
7143  {
7144  pr_clear_value (&(env->pkeys_val[i]));
7145  }
7146 
7148 
7149  return ret;
7150 
7151 exit_on_error:
7152 
7153  ret = (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
7154 
7155  goto end;
7156 }
7157 
7158 /*
7159  * xbtree_get_key_type () - Obtains index key type.
7160  *
7161  * return : Error code.
7162  * thread_p (in) : Thread entry
7163  * btid (in) : B-tree identifier.
7164  * key_type (out) : Index key type.
7165  */
7166 int
7167 xbtree_get_key_type (THREAD_ENTRY * thread_p, BTID btid, TP_DOMAIN ** key_type)
7168 {
7169  VPID root_vpid;
7170  PAGE_PTR root_page;
7171  BTREE_ROOT_HEADER *root_header = NULL;
7172  OR_BUF buf;
7173 
7174  root_vpid.pageid = btid.root_pageid;
7175  root_vpid.volid = btid.vfid.volid;
7176 
7177  assert (key_type != NULL);
7178  *key_type = NULL;
7179 
7180  root_page = pgbuf_fix (thread_p, &root_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
7181  if (root_page == NULL)
7182  {
7183  return ER_FAILED;
7184  }
7185 
7186  root_header = btree_get_root_header (thread_p, root_page);
7187  or_init (&buf, root_header->packed_key_domain, -1);
7188  *key_type = or_get_domain (&buf, NULL, NULL);
7189 
7190  pgbuf_unfix (thread_p, root_page);
7191 
7192  if (*key_type == NULL)
7193  {
7194  return ER_FAILED;
7195  }
7196 
7197  return NO_ERROR;
7198 }
7199 
7200 /*
7201  * btree_check_page_key () - Check (verify) page
7202  * return: either: DISK_INVALID, DISK_VALID, DISK_ERROR
7203  * btid(in):
7204  * page_ptr(in): Page pointer
7205  * page_vpid(in): Page identifier
7206  *
7207  * Note: Verifies the correctness of the specified page of the B+tree.
7208  * Tests include checking the order of the keys in the page,
7209  * checking the key count and maximum key length values stored page header.
7210  */
7211 static DISK_ISVALID
7212 btree_check_page_key (THREAD_ENTRY * thread_p, const OID * class_oid_p, BTID_INT * btid, const char *btname,
7213  PAGE_PTR page_ptr, VPID * page_vpid)
7214 {
7215  int key_cnt, offset;
7216  RECDES peek_rec1, peek_rec2;
7217  DB_VALUE key1, key2;
7218  BTREE_NODE_HEADER *header = NULL;
7219  BTREE_NODE_TYPE node_type;
7220  int k, overflow_key1 = 0, overflow_key2 = 0;
7221  bool clear_key1, clear_key2;
7222  LEAF_REC leaf_pnt;
7223  NON_LEAF_REC nleaf_pnt;
7224  DISK_ISVALID valid = DISK_ERROR;
7225  int c;
7226  char err_buf[LINE_MAX];
7227 
7228  /* initialize */
7229  leaf_pnt.key_len = 0;
7230  VPID_SET_NULL (&leaf_pnt.ovfl);
7231  nleaf_pnt.key_len = 0;
7232  VPID_SET_NULL (&nleaf_pnt.pnt);
7233 
7234  btree_init_temp_key_value (&clear_key1, &key1);
7235  btree_init_temp_key_value (&clear_key2, &key2);
7236 
7237  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
7238 
7239  header = btree_get_node_header (thread_p, page_ptr);
7240  if (header == NULL)
7241  {
7242  snprintf (err_buf, LINE_MAX, "btree_check_page_key: get node header failure: %d\n", key_cnt);
7244  valid = DISK_INVALID;
7245  goto error;
7246  }
7247 
7248  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
7249 
7250  if ((node_type == BTREE_NON_LEAF_NODE && key_cnt <= 0) || (node_type == BTREE_LEAF_NODE && key_cnt < 0))
7251  {
7252  snprintf (err_buf, LINE_MAX, "btree_check_page_key: node key count underflow: %d\n", key_cnt);
7254  valid = DISK_INVALID;
7255  goto error;
7256  }
7257 
7258  if (key_cnt == 0)
7259  {
7260  return DISK_VALID;
7261  }
7262 
7263  if (key_cnt == 1)
7264  {
7265  /* there is only one key, so no order check */
7266  if (spage_get_record (thread_p, page_ptr, 1, &peek_rec1, PEEK) != S_SUCCESS)
7267  {
7268  valid = DISK_ERROR;
7269  goto error;
7270  }
7271 
7272  return DISK_VALID;
7273  }
7274 
7275  for (k = 1; k < key_cnt; k++)
7276  {
7277  if (spage_get_record (thread_p, page_ptr, k, &peek_rec1, PEEK) != S_SUCCESS)
7278  {
7279  valid = DISK_ERROR;
7280  goto error;
7281  }
7282 
7284  {
7285  continue;
7286  }
7287 
7288  /* read the current record key */
7289  if (node_type == BTREE_LEAF_NODE)
7290  {
7291  if (btree_read_record (thread_p, btid, page_ptr, &peek_rec1, &key1, (void *) &leaf_pnt, BTREE_LEAF_NODE,
7292  &clear_key1, &offset, PEEK_KEY_VALUE, NULL) != NO_ERROR)
7293  {
7294  valid = DISK_ERROR;
7295  goto error;
7296  }
7297  overflow_key1 = (leaf_pnt.key_len < 0);
7298  }
7299  else
7300  {
7301  if (btree_read_record (thread_p, btid, page_ptr, &peek_rec1, &key1, (void *) &nleaf_pnt,
7302  BTREE_NON_LEAF_NODE, &clear_key1, &offset, PEEK_KEY_VALUE, NULL) != NO_ERROR)
7303  {
7304  valid = DISK_ERROR;
7305  goto error;
7306  }
7307  overflow_key1 = (nleaf_pnt.key_len < 0);
7308  }
7309 
7310  if ((!overflow_key1 && (btree_get_disk_size_of_key (&key1) > header->max_key_len))
7311  || (overflow_key1 && (DISK_VPID_SIZE > header->max_key_len)))
7312  {
7313  btree_dump_page (thread_p, stdout, class_oid_p, btid, btname, page_ptr, page_vpid, 2, 2);
7314 
7315  snprintf (err_buf, LINE_MAX,
7316  "btree_check_page_key: --- max key length test failed for page "
7317  "{%d , %d}. Check key_rec = %d\n", page_vpid->volid, page_vpid->pageid, k);
7319  valid = DISK_INVALID;
7320  goto error;
7321  }
7322 
7323  if (spage_get_record (thread_p, page_ptr, k + 1, &peek_rec2, PEEK) != S_SUCCESS)
7324  {
7325  valid = DISK_ERROR;
7326  goto error;
7327  }
7328 
7330  {
7331  btree_clear_key_value (&clear_key1, &key1);
7332  continue;
7333  }
7334 
7335  /* read the next record key */
7336  if (node_type == BTREE_LEAF_NODE)
7337  {
7338  if (btree_read_record (thread_p, btid, page_ptr, &peek_rec2, &key2, (void *) &leaf_pnt, BTREE_LEAF_NODE,
7339  &clear_key2, &offset, PEEK_KEY_VALUE, NULL) != NO_ERROR)
7340  {
7341  valid = DISK_ERROR;
7342  goto error;
7343  }
7344  overflow_key2 = (leaf_pnt.key_len < 0);
7345  }
7346  else
7347  {
7348  if (btree_read_record (thread_p, btid, page_ptr, &peek_rec2, &key2, (void *) &nleaf_pnt,
7349  BTREE_NON_LEAF_NODE, &clear_key2, &offset, PEEK_KEY_VALUE, NULL) != NO_ERROR)
7350  {
7351  valid = DISK_ERROR;
7352  goto error;
7353  }
7354  overflow_key2 = (nleaf_pnt.key_len < 0);
7355  }
7356 
7357  if ((!overflow_key2 && (btree_get_disk_size_of_key (&key2) > header->max_key_len))
7358  || (overflow_key2 && (DISK_VPID_SIZE > header->max_key_len)))
7359  {
7360  btree_dump_page (thread_p, stdout, class_oid_p, btid, btname, page_ptr, page_vpid, 2, 2);
7361 
7362  snprintf (err_buf, LINE_MAX,
7363  "btree_check_page_key: --- max key length test failed for page "
7364  "{%d , %d}. Check key_rec = %d\n", page_vpid->volid, page_vpid->pageid, k + 1);
7366  valid = DISK_INVALID;
7367  goto error;
7368  }
7369 
7370  if (k == 1 && node_type == BTREE_NON_LEAF_NODE)
7371  {
7372  c = DB_LT; /* TODO - may compare with neg-inf sep */
7373  }
7374  else
7375  {
7376  /* compare the keys for the order */
7377  c = btree_compare_key (&key1, &key2, btid->key_type, 1, 1, NULL);
7378  }
7379 
7380  if (c != DB_LT)
7381  {
7382  btree_dump_page (thread_p, stdout, class_oid_p, btid, btname, page_ptr, page_vpid, 2, 2);
7383 
7384  snprintf (err_buf, LINE_MAX,
7385  "btree_check_page_key:--- key order test failed for page"
7386  " {%d , %d}. Check key_recs = %d and %d\n", page_vpid->volid, page_vpid->pageid, k, k + 1);
7388  valid = DISK_INVALID;
7389  goto error;
7390  }
7391 
7392  btree_clear_key_value (&clear_key1, &key1);
7393  btree_clear_key_value (&clear_key2, &key2);
7394  }
7395 
7396  /* page check passed */
7397  return DISK_VALID;
7398 
7399 error:
7400 
7401  btree_clear_key_value (&clear_key1, &key1);
7402  btree_clear_key_value (&clear_key2, &key2);
7403 
7404  return valid;
7405 }
7406 
7407 /*
7408  * btree_verify_subtree () - Check (verify) a page and its subtrees
7409  * return: either: DISK_INVALID, DISK_VALID, DISK_ERROR
7410  * btid(in): B+tree index identifier
7411  * pg_ptr(in): Page pointer for the subtree root page
7412  * pg_vpid(in): Page identifier for the subtree root page
7413  * INFO(in):
7414  *
7415  * Note: Verifies the correctness of the content of the given page together with its subtree
7416  */
7417 static DISK_ISVALID
7418 btree_verify_subtree (THREAD_ENTRY * thread_p, const OID * class_oid_p, BTID_INT * btid, const char *btname,
7419  PAGE_PTR pg_ptr, VPID * pg_vpid, BTREE_NODE_INFO * INFO)
7420 {
7421  int key_cnt;
7422  NON_LEAF_REC nleaf_ptr;
7423  VPID page_vpid;
7424  PAGE_PTR page = NULL;
7425  RECDES rec;
7426  DB_VALUE curr_key;
7427  int offset;
7428  bool clear_key = false;
7429  int i;
7430  DISK_ISVALID valid = DISK_ERROR;
7431  BTREE_NODE_INFO INFO2;
7432  BTREE_NODE_HEADER *header = NULL;
7433  BTREE_NODE_TYPE node_type;
7434  char err_buf[LINE_MAX];
7435 
7436  db_make_null (&INFO2.max_key);
7437  btree_init_temp_key_value (&clear_key, &curr_key);
7438 
7439  /* test the page for the order of the keys within the page and get the biggest key of this page */
7440  valid = btree_check_page_key (thread_p, class_oid_p, btid, btname, pg_ptr, pg_vpid);
7441  if (valid != DISK_VALID)
7442  {
7443  goto error;
7444  }
7445 
7446  key_cnt = btree_node_number_of_keys (thread_p, pg_ptr);
7447 
7448  header = btree_get_node_header (thread_p, pg_ptr);
7449  if (header == NULL)
7450  {
7451  valid = DISK_INVALID;
7452  goto error;
7453  }
7454 
7455  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
7456 
7457  if ((node_type == BTREE_NON_LEAF_NODE && key_cnt <= 0) || (node_type == BTREE_LEAF_NODE && key_cnt < 0))
7458  {
7459  btree_dump_page (thread_p, stdout, class_oid_p, btid, btname, pg_ptr, pg_vpid, 2, 2);
7460  snprintf (err_buf, LINE_MAX, "btree_verify_subtree: node key count underflow: %d\n", key_cnt);
7462  valid = DISK_INVALID;
7463  goto error;
7464  }
7465 
7466  /* initialize INFO structure */
7467  INFO->max_key_len = header->max_key_len;
7468  INFO->height = 0;
7469  INFO->tot_key_cnt = 0;
7470  INFO->page_cnt = 0;
7471  INFO->leafpg_cnt = 0;
7472  INFO->nleafpg_cnt = 0;
7473  db_make_null (&INFO->max_key);
7474 
7475  if (node_type == BTREE_NON_LEAF_NODE)
7476  { /* a non-leaf page */
7477  if (key_cnt < 0)
7478  {
7479  btree_dump_page (thread_p, stdout, class_oid_p, btid, btname, pg_ptr, pg_vpid, 2, 2);
7480 
7481  snprintf (err_buf, LINE_MAX, "btree_verify_subtree: node key count underflow: %d\n", key_cnt);
7483  valid = DISK_INVALID;
7484  goto error;
7485  }
7486 
7487  INFO2.key_area_len = 0;
7488  db_make_null (&INFO2.max_key);
7489 
7490  /* traverse all the subtrees of this non_leaf page and accumulate the statistical data in the INFO structure */
7491  for (i = 1; i <= key_cnt; i++)
7492  {
7493  if (spage_get_record (thread_p, pg_ptr, i, &rec, PEEK) != S_SUCCESS)
7494  {
7495  valid = DISK_ERROR;
7496  goto error;
7497  }
7498 
7499  if (btree_read_record (thread_p, btid, pg_ptr, &rec, &curr_key, &nleaf_ptr, BTREE_NON_LEAF_NODE,
7500  &clear_key, &offset, PEEK_KEY_VALUE, NULL) != NO_ERROR)
7501  {
7502  valid = DISK_ERROR;
7503  goto error;
7504  }
7505 
7506  page_vpid = nleaf_ptr.pnt;
7507 
7508  page = pgbuf_fix (thread_p, &page_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
7509  if (page == NULL)
7510  {
7511  valid = DISK_ERROR;
7512  goto error;
7513  }
7514 
7515  (void) pgbuf_check_page_ptype (thread_p, page, PAGE_BTREE);
7516 
7517  valid = btree_verify_subtree (thread_p, class_oid_p, btid, btname, page, &page_vpid, &INFO2);
7518  if (valid != DISK_VALID)
7519  {
7520  goto error;
7521  }
7522 
7523  /* accumulate results */
7524  INFO->height = INFO2.height + 1;
7525  INFO->tot_key_cnt += INFO2.tot_key_cnt;
7526  INFO->page_cnt += INFO2.page_cnt;
7527  INFO->leafpg_cnt += INFO2.leafpg_cnt;
7528  INFO->nleafpg_cnt += INFO2.nleafpg_cnt;
7529 
7530  pgbuf_unfix_and_init (thread_p, page);
7531  btree_clear_key_value (&clear_key, &curr_key);
7532  }
7533  INFO->page_cnt += 1;
7534  INFO->nleafpg_cnt += 1;
7535  }
7536  else
7537  { /* a leaf page */
7538  /* form the INFO structure from the header information */
7539  INFO->height = 1;
7540  INFO->tot_key_cnt = key_cnt;
7541  INFO->page_cnt = 1;
7542  INFO->leafpg_cnt = 1;
7543  INFO->nleafpg_cnt = 0;
7544  }
7545 
7546  return DISK_VALID;
7547 
7548 error:
7549  btree_clear_key_value (&clear_key, &curr_key);
7550 
7551  if (page)
7552  {
7553  pgbuf_unfix_and_init (thread_p, page);
7554  }
7555 
7556  return valid;
7557 }
7558 
7559 /*
7560  * btree_verify_tree () - Check (verify) tree
7561  * return: either: DISK_INVALID, DISK_VALID, DISK_ERROR
7562  * btid_int(in): B+tree index identifier
7563  *
7564  * Note: Verifies the correctness of the B+tree index. During tree traversal, several tests are conducted,
7565  * such as checking the order of keys on a page or among pages that are in a father-child relationship.
7566  */
7568 btree_verify_tree (THREAD_ENTRY * thread_p, const OID * class_oid_p, BTID_INT * btid_int, const char *btname)
7569 {
7570  VPID p_vpid;
7571  PAGE_PTR root = NULL;
7573  DISK_ISVALID valid = DISK_ERROR;
7574 
7575  p_vpid.pageid = btid_int->sys_btid->root_pageid; /* read root page */
7576  p_vpid.volid = btid_int->sys_btid->vfid.volid;
7577  root = pgbuf_fix (thread_p, &p_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
7578  if (root == NULL)
7579  {
7580  valid = DISK_ERROR;
7581  goto error;
7582  }
7583 
7584  (void) pgbuf_check_page_ptype (thread_p, root, PAGE_BTREE);
7585 
7586  /* traverse the tree and store the statistical data in the INFO structure */
7587  valid = btree_verify_subtree (thread_p, class_oid_p, btid_int, btname, root, &p_vpid, &INFO);
7588  if (valid != DISK_VALID)
7589  {
7590  goto error;
7591  }
7592 
7593  pgbuf_unfix_and_init (thread_p, root);
7594 
7595  return DISK_VALID;
7596 
7597 error:
7598 
7599  if (root)
7600  {
7601  pgbuf_unfix_and_init (thread_p, root);
7602  }
7603 
7604  return valid;
7605 }
7606 
7607 /*
7608  * db_check consistency routines
7609  */
7610 
7611 /*
7612  * btree_check_pages () -
7613  * return: DISK_VALID, DISK_VALID or DISK_ERROR
7614  * btid(in): B+tree index identifier
7615  * pg_ptr(in): Page pointer
7616  * pg_vpid(in): Page identifier
7617  *
7618  * Note: Verify that given page and all its subpages are valid.
7619  */
7620 static DISK_ISVALID
7621 btree_check_pages (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR pg_ptr, VPID * pg_vpid)
7622 {
7623  VPID page_vpid; /* Child page identifier */
7624  PAGE_PTR page = NULL; /* Child page pointer */
7625  RECDES rec; /* Record descriptor for page node records */
7626  DISK_ISVALID vld = DISK_ERROR; /* Validity return code from subtree */
7627  int key_cnt; /* Number of keys in the page */
7628  int i; /* Loop counter */
7629  NON_LEAF_REC nleaf;
7630  BTREE_NODE_HEADER *header = NULL;
7631  BTREE_NODE_TYPE node_type;
7632 
7633  /* Verify the given page */
7634  vld = file_check_vpid (thread_p, &btid->sys_btid->vfid, pg_vpid);
7635  if (vld != DISK_VALID)
7636  {
7637  goto error;
7638  }
7639 
7640 #ifdef SPAGE_DEBUG
7641  if (spage_check (thread_p, pg_ptr) != NO_ERROR)
7642  {
7643  vld = DISK_ERROR;
7644  goto error;
7645  }
7646 #endif /* SPAGE_DEBUG */
7647 
7648  /* Verify subtree child pages */
7649 
7650  key_cnt = btree_node_number_of_keys (thread_p, pg_ptr);
7651 
7652  header = btree_get_node_header (thread_p, pg_ptr);
7653  if (header == NULL)
7654  {
7655  vld = DISK_ERROR;
7656  goto error;
7657  }
7658 
7659  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
7660 
7661  if (node_type == BTREE_NON_LEAF_NODE)
7662  { /* non-leaf page */
7663  for (i = 1; i <= key_cnt; i++)
7664  {
7665  if (spage_get_record (thread_p, pg_ptr, i, &rec, PEEK) != S_SUCCESS)
7666  {
7667  vld = DISK_ERROR;
7668  goto error;
7669  }
7671  page_vpid = nleaf.pnt;
7672 
7673  page = pgbuf_fix (thread_p, &page_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
7674  if (page == NULL)
7675  {
7676  vld = DISK_ERROR;
7677  goto error;
7678  }
7679 
7680  (void) pgbuf_check_page_ptype (thread_p, page, PAGE_BTREE);
7681 
7682  vld = btree_check_pages (thread_p, btid, page, &page_vpid);
7683  if (vld != DISK_VALID)
7684  {
7685  goto error;
7686  }
7687  pgbuf_unfix_and_init (thread_p, page);
7688  }
7689  }
7690 
7691  return DISK_VALID;
7692 
7693 error:
7694 
7695  if (page)
7696  {
7697  pgbuf_unfix_and_init (thread_p, page);
7698  }
7699  return vld;
7700 
7701 }
7702 
7703 /*
7704  * btree_check_tree () -
7705  * return: DISK_VALID, DISK_INVALID or DISK_ERROR
7706  * btid(in): B+tree index identifier
7707  *
7708  * Note: Verify that all the pages of the specified index are valid.
7709  */
7711 btree_check_tree (THREAD_ENTRY * thread_p, const OID * class_oid_p, BTID * btid, const char *btname)
7712 {
7713  DISK_ISVALID valid = DISK_ERROR;
7714  VPID r_vpid; /* root page identifier */
7715  PAGE_PTR r_pgptr = NULL; /* root page pointer */
7717  BTREE_ROOT_HEADER *root_header = NULL;
7718 
7719  /* Fetch the root page */
7720  r_vpid.pageid = btid->root_pageid;
7721  r_vpid.volid = btid->vfid.volid;
7722  r_pgptr = pgbuf_fix (thread_p, &r_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
7723  if (r_pgptr == NULL)
7724  {
7725  valid = DISK_ERROR;
7726  goto error;
7727  }
7728 
7729  (void) pgbuf_check_page_ptype (thread_p, r_pgptr, PAGE_BTREE);
7730 
7731  root_header = btree_get_root_header (thread_p, r_pgptr);
7732  if (root_header == NULL)
7733  {
7734  valid = DISK_ERROR;
7735  goto error;
7736  }
7737 
7738  btid_int.sys_btid = btid;
7739  if (btree_glean_root_header_info (thread_p, root_header, &btid_int) != NO_ERROR)
7740  {
7741  valid = DISK_ERROR;
7742  goto error;
7743  }
7744 
7745  valid = btree_check_pages (thread_p, &btid_int, r_pgptr, &r_vpid);
7746  if (valid != DISK_VALID)
7747  {
7748  goto error;
7749  }
7750 
7751  pgbuf_unfix_and_init (thread_p, r_pgptr);
7752 
7753  /* Now check for the logical correctness of the tree */
7754  return btree_verify_tree (thread_p, class_oid_p, &btid_int, btname);
7755 
7756 error:
7757 
7758  if (r_pgptr)
7759  {
7760  pgbuf_unfix_and_init (thread_p, r_pgptr);
7761  }
7762  return valid;
7763 }
7764 
7765 /*
7766  * btree_check_by_btid () -
7767  * btid(in): B+tree index identifier
7768  * return: either: DISK_INVALID, DISK_VALID, DISK_ERROR
7769  *
7770  * Note: Verify that all pages of a btree indices are valid.
7771  */
7774 {
7775  DISK_ISVALID valid = DISK_ERROR;
7776  char *btname;
7777  FILE_DESCRIPTORS fdes;
7778 
7779  assert (!VFID_ISNULL (&btid->vfid));
7780 
7781  if (file_descriptor_get (thread_p, &btid->vfid, &fdes) != NO_ERROR)
7782  {
7783  ASSERT_ERROR ();
7784  return DISK_ERROR;
7785  }
7786 
7787  if (btree_get_btid_from_file (thread_p, &btid->vfid, btid) != NO_ERROR)
7788  {
7789  ASSERT_ERROR ();
7790  return DISK_ERROR;
7791  }
7792 
7793  /* get the index name of the index key */
7794  if (heap_get_indexinfo_of_btid (thread_p, &fdes.btree.class_oid, btid, NULL, NULL, NULL, NULL, &btname, NULL) !=
7795  NO_ERROR)
7796  {
7797  if (er_errid () == NO_ERROR)
7798  {
7799  /* this is sometimes expected. I found a case when index was just loaded, but class object was not updated
7800  * yet. heap_get_indexinfo_of_btid is ambiguously handled, it does not set errors, but returns error code.
7801  * this crashes in ASSERT_ERROR safe-guards.
7802  *
7803  * this is, for now, a quick fix to avoid the safe-guard. I hope it won't hide other issues.
7804  */
7805  btree_log_if_enabled ("btree_check_by_btid on (%d, %d|%d) failed, because index info could not be "
7806  "fetched. it is possible that index is still loading... \n", BTID_AS_ARGS (btid));
7807  valid = DISK_VALID;
7808  }
7809  goto exit_on_end;
7810  }
7811 
7812  valid = btree_check_tree (thread_p, &fdes.btree.class_oid, btid, btname);
7813  if (valid == DISK_ERROR)
7814  {
7815  ASSERT_ERROR ();
7816  }
7817  else if (valid == DISK_INVALID)
7818  {
7819  assert (false);
7820  }
7821 
7822 exit_on_end:
7823  if (btname)
7824  {
7825  free_and_init (btname);
7826  }
7827  assert (valid != DISK_INVALID);
7828 
7829  return valid;
7830 }
7831 
7832 int
7833 btree_get_pkey_btid (THREAD_ENTRY * thread_p, OID * cls_oid, BTID * pkey_btid)
7834 {
7835  OR_CLASSREP *cls_repr = NULL;
7836  OR_INDEX *curr_idx;
7837  int cache_idx = -1;
7838  int i;
7839  int error = NO_ERROR;
7840 
7841  assert (pkey_btid != NULL);
7842 
7843  BTID_SET_NULL (pkey_btid);
7844 
7845  cls_repr = heap_classrepr_get (thread_p, cls_oid, NULL, NULL_REPRID, &cache_idx);
7846  if (cls_repr == NULL)
7847  {
7848  ASSERT_ERROR_AND_SET (error);
7849 
7850  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, error, 0);
7851  return error;
7852  }
7853 
7854  for (i = 0, curr_idx = cls_repr->indexes; i < cls_repr->n_indexes; i++, curr_idx++)
7855  {
7856  if (curr_idx == NULL)
7857  {
7858  error = ER_UNEXPECTED;
7859  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, error, 1, "Bad index information in class representation.");
7860  break;
7861  }
7862 
7863  if (curr_idx->type == BTREE_PRIMARY_KEY)
7864  {
7865  BTID_COPY (pkey_btid, &curr_idx->btid);
7866  break;
7867  }
7868  }
7869 
7870  if (cls_repr != NULL)
7871  {
7872  heap_classrepr_free_and_init (cls_repr, &cache_idx);
7873  }
7874 
7875  return error;
7876 }
7877 
7878 /*
7879  * btree_check_by_class_oid () -
7880  * cls_oid(in):
7881  * return: either: DISK_INVALID, DISK_VALID, DISK_ERROR
7882  *
7883  */
7885 btree_check_by_class_oid (THREAD_ENTRY * thread_p, OID * cls_oid, BTID * idx_btid)
7886 {
7887  OR_CLASSREP *cls_repr = NULL;
7888  OR_INDEX *curr;
7889  BTID btid;
7890  int i;
7891  int cache_idx = -1;
7893 
7894  if (lock_object (thread_p, cls_oid, oid_Root_class_oid, IS_LOCK, LK_UNCOND_LOCK) != LK_GRANTED)
7895  {
7896  return DISK_ERROR;
7897  }
7898 
7899  cls_repr = heap_classrepr_get (thread_p, cls_oid, NULL, NULL_REPRID, &cache_idx);
7900  if (cls_repr == NULL)
7901  {
7902  lock_unlock_object (thread_p, cls_oid, oid_Root_class_oid, IS_LOCK, true);
7903 
7904  ASSERT_ERROR ();
7906  return DISK_ERROR;
7907  }
7908 
7909  for (i = 0, curr = cls_repr->indexes; i < cls_repr->n_indexes; i++, curr++)
7910  {
7911  if (curr == NULL)
7912  {
7913  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_UNEXPECTED, 1, "Bad index information in class representation.");
7914  rv = DISK_ERROR;
7915  break;
7916  }
7917 
7918  if (idx_btid != NULL && !BTID_IS_EQUAL (&curr->btid, idx_btid))
7919  {
7920  continue;
7921  }
7922 
7923  BTID_COPY (&btid, &curr->btid);
7924  if (btree_check_by_btid (thread_p, &btid) != DISK_VALID)
7925  {
7926  rv = DISK_ERROR;
7927  break;
7928  }
7929  }
7930 
7931  lock_unlock_object (thread_p, cls_oid, oid_Root_class_oid, IS_LOCK, true);
7932 
7933  if (cls_repr)
7934  {
7935  heap_classrepr_free_and_init (cls_repr, &cache_idx);
7936  }
7937 
7938  return rv;
7939 }
7940 
7941 /*
7942  * btree_repair_prev_link_by_btid () -
7943  * btid(in) :
7944  * repair(in) :
7945  * index_name(in) :
7946  * return:
7947  */
7948 static DISK_ISVALID
7949 btree_repair_prev_link_by_btid (THREAD_ENTRY * thread_p, BTID * btid, bool repair, char *index_name)
7950 {
7951  PAGE_PTR current_pgptr, next_pgptr, root_pgptr;
7952  VPID current_vpid, next_vpid;
7953  DISK_ISVALID valid = DISK_VALID;
7954  PGBUF_LATCH_MODE request_mode;
7955  int retry_count = 0;
7956  int retry_max = 20;
7957  char output[LINE_MAX];
7958  BTREE_NODE_HEADER *header = NULL;
7959  BTREE_NODE_TYPE node_type;
7960 
7961  VPID_SET_NULL (&next_vpid);
7962 
7963  snprintf (output, LINE_MAX, "%s - %s... ", repair ? "repair index" : "check index", index_name);
7964  xcallback_console_print (thread_p, output);
7965 
7966  current_pgptr = NULL;
7967  next_pgptr = NULL;
7968  root_pgptr = NULL;
7969 
7970  request_mode = repair ? PGBUF_LATCH_WRITE : PGBUF_LATCH_READ;
7971 
7972  /* root page */
7973  VPID_SET (&current_vpid, btid->vfid.volid, btid->root_pageid);
7974  root_pgptr = pgbuf_fix (thread_p, &current_vpid, OLD_PAGE, request_mode, PGBUF_UNCONDITIONAL_LATCH);
7975  if (root_pgptr == NULL)
7976  {
7977  valid = DISK_ERROR;
7978  goto exit_repair;
7979  }
7980 
7981  (void) pgbuf_check_page_ptype (thread_p, root_pgptr, PAGE_BTREE);
7982 
7983 retry_repair:
7984  if (retry_count >= retry_max)
7985  {
7986  valid = DISK_ERROR;
7987  goto exit_repair;
7988  }
7989 
7990 #if defined(SERVER_MODE)
7991  if (retry_count > 0)
7992  {
7993  thread_sleep (10);
7994  }
7995 #endif
7996 
7997  if (current_pgptr)
7998  {
7999  pgbuf_unfix_and_init (thread_p, current_pgptr);
8000  }
8001  if (next_pgptr)
8002  {
8003  pgbuf_unfix_and_init (thread_p, next_pgptr);
8004  }
8005 
8006  while (!VPID_ISNULL (&current_vpid))
8007  {
8008  current_pgptr = pgbuf_fix (thread_p, &current_vpid, OLD_PAGE, request_mode, PGBUF_CONDITIONAL_LATCH);
8009  if (current_pgptr == NULL)
8010  {
8011  retry_count++;
8012  goto retry_repair;
8013  }
8014 
8015  (void) pgbuf_check_page_ptype (thread_p, current_pgptr, PAGE_BTREE);
8016 
8017  header = btree_get_node_header (thread_p, current_pgptr);
8018  if (header == NULL)
8019  {
8020  valid = DISK_ERROR;
8021  goto exit_repair;
8022  }
8023 
8024  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
8025 
8026  if (node_type == BTREE_LEAF_NODE)
8027  {
8028  next_vpid = header->next_vpid;
8029  break;
8030  }
8031  else
8032  {
8033  RECDES rec;
8035 
8036  if (spage_get_record (thread_p, current_pgptr, 1, &rec, PEEK) != S_SUCCESS)
8037  {
8038  valid = DISK_ERROR;
8039  goto exit_repair;
8040  }
8041  btree_read_fixed_portion_of_non_leaf_record (&rec, &non_leaf_rec);
8042  current_vpid = non_leaf_rec.pnt;
8043  pgbuf_unfix_and_init (thread_p, current_pgptr);
8044  }
8045  }
8046 
8047  assert (node_type == BTREE_LEAF_NODE);
8048  assert (header != NULL);
8049 
8050  while (!VPID_ISNULL (&next_vpid))
8051  {
8052  next_pgptr = pgbuf_fix (thread_p, &next_vpid, OLD_PAGE, request_mode, PGBUF_CONDITIONAL_LATCH);
8053  if (next_pgptr == NULL)
8054  {
8055  retry_count++;
8056  goto retry_repair;
8057  }
8058 
8059  (void) pgbuf_check_page_ptype (thread_p, next_pgptr, PAGE_BTREE);
8060 
8061  header = btree_get_node_header (thread_p, next_pgptr);
8062  if (header == NULL)
8063  {
8064  valid = DISK_ERROR;
8065  goto exit_repair;
8066  }
8067 
8068  if (!VPID_EQ (&header->prev_vpid, &current_vpid))
8069  {
8071  next_vpid.pageid);
8072 
8073  if (repair)
8074  {
8075  BTID_INT bint;
8076 
8077  log_sysop_start (thread_p);
8078  bint.sys_btid = btid;
8079  if (btree_set_vpid_previous_vpid (thread_p, &bint, next_pgptr, &current_vpid) != NO_ERROR)
8080  {
8081  valid = DISK_ERROR;
8082  log_sysop_abort (thread_p);
8083  goto exit_repair;
8084  }
8085  valid = DISK_INVALID;
8086  log_sysop_commit (thread_p);
8088  next_vpid.volid, next_vpid.pageid);
8089  }
8090  else
8091  {
8092  valid = DISK_INVALID;
8093  goto exit_repair;
8094  }
8095  }
8096  pgbuf_unfix_and_init (thread_p, current_pgptr);
8097 
8098  /* move to next page */
8099  current_vpid = next_vpid;
8100  next_vpid = header->next_vpid;
8101  current_pgptr = next_pgptr;
8102  next_pgptr = NULL;
8103  }
8104 
8105 exit_repair:
8106  if (root_pgptr)
8107  {
8108  pgbuf_unfix (thread_p, root_pgptr);
8109  }
8110  if (current_pgptr)
8111  {
8112  pgbuf_unfix (thread_p, current_pgptr);
8113  }
8114  if (next_pgptr)
8115  {
8116  pgbuf_unfix (thread_p, next_pgptr);
8117  }
8118 
8119  if (valid == DISK_ERROR)
8120  {
8121  xcallback_console_print (thread_p, (char *) "error\n");
8122  }
8123  else if (valid == DISK_VALID)
8124  {
8125  xcallback_console_print (thread_p, (char *) "pass\n");
8126  }
8127  else
8128  {
8129  if (repair)
8130  {
8131  xcallback_console_print (thread_p, (char *) "repaired\n");
8132  }
8133  else
8134  {
8135  xcallback_console_print (thread_p, (char *) "repair needed\n");
8136  }
8137  }
8138 
8139  return valid;
8140 }
8141 
8142 /*
8143  * btree_repair_prev_link_by_class_oid () -
8144  * oid(in) :
8145  * repair(in) :
8146  * return:
8147  */
8148 static DISK_ISVALID
8149 btree_repair_prev_link_by_class_oid (THREAD_ENTRY * thread_p, OID * oid, BTID * index_btid, bool repair)
8150 {
8151  OR_CLASSREP *cls_repr = NULL;
8152  OR_INDEX *curr;
8153  int i;
8154  int cache_idx = -1;
8155  DISK_ISVALID valid = DISK_VALID;
8156  char *index_name;
8157 
8159  {
8160  return DISK_ERROR;
8161  }
8162 
8163  cls_repr = heap_classrepr_get (thread_p, oid, NULL, NULL_REPRID, &cache_idx);
8164 
8165  if (cls_repr == NULL)
8166  {
8167  lock_unlock_object (thread_p, oid, oid_Root_class_oid, IS_LOCK, true);
8168  return DISK_ERROR;
8169  }
8170 
8171  for (i = 0, curr = cls_repr->indexes; i < cls_repr->n_indexes && curr && valid == DISK_VALID; i++, curr++)
8172  {
8173  if (index_btid != NULL && !BTID_IS_EQUAL (&curr->btid, index_btid))
8174  {
8175  continue;
8176  }
8177 
8178  heap_get_indexinfo_of_btid (thread_p, oid, &curr->btid, NULL, NULL, NULL, NULL, &index_name, NULL);
8179  valid = btree_repair_prev_link_by_btid (thread_p, &curr->btid, repair, index_name);
8180  if (index_name)
8181  {
8182  free_and_init (index_name);
8183  }
8184  }
8185 
8186  lock_unlock_object (thread_p, oid, oid_Root_class_oid, IS_LOCK, true);
8187  if (cls_repr)
8188  {
8189  heap_classrepr_free_and_init (cls_repr, &cache_idx);
8190  }
8191 
8192  return valid;
8193 }
8194 
8195 /*
8196  * btree_repair_prev_link () -
8197  * oid(in) :
8198  * index_btid(in) :
8199  * repair(in) :
8200  * return:
8201  */
8203 btree_repair_prev_link (THREAD_ENTRY * thread_p, OID * oid, BTID * index_btid, bool repair)
8204 {
8205  BTID btid;
8206  DISK_ISVALID valid;
8207  char *index_name = NULL;
8208  OID class_oid = OID_INITIALIZER;
8209 
8210  if (oid != NULL && !OID_ISNULL (oid))
8211  {
8212  return btree_repair_prev_link_by_class_oid (thread_p, oid, index_btid, repair);
8213  }
8214 
8215  valid = DISK_VALID;
8216 
8217  /* Go to each file, check only the btree files */
8218  VFID_SET_NULL (&btid.vfid);
8219  while (true)
8220  {
8221  if (file_tracker_interruptable_iterate (thread_p, FILE_BTREE, &btid.vfid, &class_oid) != NO_ERROR)
8222  {
8223  ASSERT_ERROR ();
8224  valid = valid == DISK_VALID ? DISK_ERROR : valid;
8225  break;
8226  }
8227  if (VFID_ISNULL (&btid.vfid))
8228  {
8229  /* no more b-trees */
8230  break;
8231  }
8232 
8233  if (btree_get_btid_from_file (thread_p, &btid.vfid, &btid) != NO_ERROR)
8234  {
8235  ASSERT_ERROR ();
8236  valid = valid == DISK_VALID ? DISK_ERROR : valid;
8237  break;
8238  }
8239 
8240  /* get the index name of the index key */
8241  if (heap_get_indexinfo_of_btid (thread_p, &class_oid, &btid, NULL, NULL, NULL, NULL, &index_name, NULL)
8242  != NO_ERROR)
8243  {
8244  ASSERT_ERROR ();
8245  valid = valid == DISK_VALID ? DISK_ERROR : valid;
8246  break;
8247  }
8248 
8249  valid = btree_repair_prev_link_by_btid (thread_p, &btid, repair, index_name);
8250  if (valid == DISK_ERROR)
8251  {
8252  ASSERT_ERROR ();
8253  break;
8254  }
8255  }
8256  if (!OID_ISNULL (&class_oid))
8257  {
8258  lock_unlock_object (thread_p, &class_oid, oid_Root_class_oid, SCH_S_LOCK, true);
8259  }
8260 
8261  return valid;
8262 }
8263 
8264 /*
8265  * btree_check_all () -
8266  * return: either: DISK_INVALID, DISK_VALID, DISK_ERROR
8267  *
8268  * Note: Verify that all pages of all btree indices are valid.
8269  */
8272 {
8273  DISK_ISVALID valid, allvalid; /* Validation return code */
8274  BTID btid;
8275 
8276  OID class_oid = OID_INITIALIZER;
8277 
8278  int error_code = NO_ERROR;
8279 
8280  allvalid = DISK_VALID;
8281  /* Go to each file, check only the btree files */
8282  VFID_SET_NULL (&btid.vfid);
8283  while (true)
8284  {
8285  error_code = file_tracker_interruptable_iterate (thread_p, FILE_BTREE, &btid.vfid, &class_oid);
8286  if (error_code != NO_ERROR)
8287  {
8288  ASSERT_ERROR ();
8289  allvalid = (allvalid == DISK_VALID) ? DISK_ERROR : allvalid;
8290  break;
8291  }
8292  if (VFID_ISNULL (&btid.vfid))
8293  {
8294  /* no more b-tree files */
8295  break;
8296  }
8297  assert (!OID_ISNULL (&class_oid));
8298 
8299  /* Check BTree file */
8300  valid = btree_check_by_btid (thread_p, &btid);
8301  if (valid == DISK_ERROR)
8302  {
8303  ASSERT_ERROR ();
8304  allvalid = (allvalid == DISK_VALID) ? DISK_ERROR : allvalid;
8305  break;
8306  }
8307  if (valid == DISK_INVALID)
8308  {
8309  assert_release (false);
8310  allvalid = valid;
8311  }
8312  }
8313 
8314  if (!OID_ISNULL (&class_oid))
8315  {
8316  lock_unlock_object (thread_p, &class_oid, oid_Root_class_oid, SCH_S_LOCK, true);
8317  }
8318  return allvalid;
8319 }
8320 
8321 /*
8322  * btree_keyoid_checkscan_start () -
8323  * return: NO_ERROR
8324  * btid(in): B+tree index identifier
8325  * btscan(out): Set to key-oid check scan structure.
8326  *
8327  * Note: Start a <key-oid> check scan on the index.
8328  */
8329 int
8331 {
8332  assert (btid != NULL);
8333 
8334  /* initialize scan structure */
8335  btscan->btid = *btid;
8336  BTREE_INIT_SCAN (&btscan->btree_scan);
8337 
8338  /* Initialize OID list. */
8340  if (btscan->oid_list.oidp == NULL)
8341  {
8343  return ER_OUT_OF_VIRTUAL_MEMORY;
8344  }
8345  btscan->oid_list.oid_cnt = 0;
8347  btscan->oid_list.max_oid_cnt = btscan->oid_list.capacity;
8348  btscan->oid_list.next_list = NULL;
8349 
8350  return NO_ERROR;
8351 }
8352 
8353 /*
8354  * btree_keyoid_checkscan_check () -
8355  * return: either: DISK_INVALID, DISK_VALID, DISK_ERROR
8356  * btscan(in): B+tree key-oid check scan structure.
8357  * cls_oid(in):
8358  * key(in): Key pointer
8359  * oid(in): Object identifier for the key
8360  *
8361  * Note: Check if the given key-oid pair exists in the index.
8362  */
8365  OID * oid)
8366 {
8367  int k; /* Loop iteration variable */
8368  INDX_SCAN_ID isid;
8369  DISK_ISVALID status;
8370  key_val_range kv_range;
8372 
8373  mvcc_snapshot = logtb_get_mvcc_snapshot (thread_p);
8374  if (mvcc_snapshot == NULL)
8375  {
8376  ASSERT_ERROR ();
8377  return DISK_INVALID;
8378  }
8379 
8380  /* initialize scan structure */
8381  BTREE_INIT_SCAN (&btscan->btree_scan);
8382 
8383  scan_init_index_scan (&isid, &btscan->oid_list, mvcc_snapshot);
8384 
8386 
8387  pr_share_value (key, &kv_range.key1);
8388  pr_share_value (key, &kv_range.key2);
8389  kv_range.range = GE_LE;
8390  kv_range.num_index_term = 0;
8391 
8392  do
8393  {
8394  /* search index */
8395  btscan->oid_list.oid_cnt =
8396  btree_keyval_search (thread_p, &btscan->btid, S_SELECT, &btscan->btree_scan, &kv_range, cls_oid, NULL,
8397  &isid, false);
8398  assert (btscan->oid_list.oid_cnt <= btscan->oid_list.capacity);
8399 
8400  if (DB_VALUE_DOMAIN_TYPE (key) == DB_TYPE_MIDXKEY && key->data.midxkey.domain == NULL)
8401  {
8402  /* set the appropriate domain, as it might be needed for printing if the given key-oid pair does not exist in
8403  * the index. */
8404  key->data.midxkey.domain = btscan->btree_scan.btid_int.key_type;
8405  }
8406 
8407  if (btscan->oid_list.oid_cnt < 0)
8408  {
8409  status = DISK_ERROR;
8410  goto end;
8411  }
8412 
8413  /* search current set of OIDs to see if given <key-oid> pair exists */
8414  for (k = 0; k < btscan->oid_list.oid_cnt; k++)
8415  {
8416  if (OID_EQ (&btscan->oid_list.oidp[k], oid))
8417  { /* <key-oid> pair found */
8418  status = DISK_VALID;
8419  goto end;
8420  }
8421  }
8422  }
8423  while (!BTREE_END_OF_SCAN (&btscan->btree_scan));
8424 
8425  /* indicate <key_oid> pair is not found */
8426  status = DISK_INVALID;
8427 
8428 end:
8429 
8430  btree_scan_clear_key (&btscan->btree_scan);
8431 
8432  /* do not use copy_buf for key-val scan, only use for key-range scan */
8433 
8434  return status;
8435 }
8436 
8437 /*
8438  * btree_keyoid_checkscan_end () -
8439  * return:
8440  * btscan(in): B+tree key-oid check scan structure.
8441  *
8442  * Note: End the <key-oid> check scan on the index.
8443  */
8444 void
8446 {
8447  /* Deallocate allocated areas */
8448  if (btscan->oid_list.oidp)
8449  {
8450  os_free_and_init (btscan->oid_list.oidp);
8451  btscan->oid_list.capacity = 0;
8452  btscan->oid_list.max_oid_cnt = 0;
8453  }
8454 }
8455 
8456 /*
8457  * b+tree space routines
8458  */
8459 
8460 /*
8461  * btree_get_subtree_capacity () -
8462  * return: NO_ERROR
8463  * btid(in):
8464  * pg_ptr(in):
8465  * cpc(in):
8466  */
8467 static int
8469 {
8470  RECDES rec; /* Page record descriptor */
8471  int free_space; /* Total free space of the Page */
8472  int key_cnt; /* Page key count */
8473  NON_LEAF_REC nleaf_ptr; /* NonLeaf Record pointer */
8474  VPID page_vpid; /* Child page identifier */
8475  PAGE_PTR page = NULL; /* Child page pointer */
8476  int i; /* Loop counter */
8477  int offset; /* Offset to the beginning of OID list */
8478  int oid_cnt; /* Number of OIDs */
8479  VPID ovfl_vpid; /* Overflow page identifier */
8480  RECDES orec; /* Overflow record descriptor */
8481  LEAF_REC leaf_pnt;
8482 
8483  bool clear_key = false;
8484  PAGE_PTR ovfp = NULL;
8485  DB_VALUE key1;
8486  int ret = NO_ERROR;
8487  BTREE_NODE_HEADER *header = NULL;
8488  BTREE_NODE_TYPE node_type;
8489 
8490  /* initialize */
8491  leaf_pnt.key_len = 0;
8492  VPID_SET_NULL (&leaf_pnt.ovfl);
8493 
8494  btree_init_temp_key_value (&clear_key, &key1);
8495 
8496  /* initialize capacity structure */
8497  cpc->dis_key_cnt = 0;
8498  cpc->tot_val_cnt = 0;
8499  cpc->avg_val_per_key = 0;
8500  cpc->leaf_pg_cnt = 0;
8501  cpc->nleaf_pg_cnt = 0;
8502  cpc->tot_pg_cnt = 0;
8503  cpc->height = 0;
8504  cpc->sum_rec_len = 0;
8505  cpc->sum_key_len = 0;
8506  cpc->avg_key_len = 0;
8507  cpc->avg_rec_len = 0;
8508  cpc->tot_free_space = 0;
8509  cpc->tot_space = 0;
8510  cpc->tot_used_space = 0;
8511  cpc->avg_pg_key_cnt = 0;
8512  cpc->avg_pg_free_sp = 0;
8513 
8514  free_space = spage_get_free_space (thread_p, pg_ptr);
8515 
8516  key_cnt = btree_node_number_of_keys (thread_p, pg_ptr);
8517 
8518  header = btree_get_node_header (thread_p, pg_ptr);
8519  if (header == NULL)
8520  {
8521  goto exit_on_error;
8522  }
8523 
8524  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
8525 
8526  if (node_type == BTREE_NON_LEAF_NODE)
8527  { /* a non-leaf page */
8528  BTREE_CAPACITY cpc2;
8529 
8530  /* traverse all the subtrees of this non_leaf page and accumulate the statistical data in the cpc structure */
8531  for (i = 1; i <= key_cnt; i++)
8532  {
8533  if (spage_get_record (thread_p, pg_ptr, i, &rec, PEEK) != S_SUCCESS)
8534  {
8535  goto exit_on_error;
8536  }
8538  page_vpid = nleaf_ptr.pnt;
8539  page = pgbuf_fix (thread_p, &page_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
8540  if (page == NULL)
8541  {
8542  goto exit_on_error;
8543  }
8544 
8545  (void) pgbuf_check_page_ptype (thread_p, page, PAGE_BTREE);
8546 
8547  ret = btree_get_subtree_capacity (thread_p, btid, page, &cpc2);
8548  if (ret != NO_ERROR)
8549  {
8550  goto exit_on_error;
8551  }
8552 
8553  /* form the cpc structure for a non-leaf node page */
8554  cpc->dis_key_cnt += cpc2.dis_key_cnt;
8555  cpc->tot_val_cnt += cpc2.tot_val_cnt;
8556  cpc->leaf_pg_cnt += cpc2.leaf_pg_cnt;
8557  cpc->nleaf_pg_cnt += cpc2.nleaf_pg_cnt;
8558  cpc->tot_pg_cnt += cpc2.tot_pg_cnt;
8559  cpc->height = cpc2.height + 1;
8560  cpc->sum_rec_len += cpc2.sum_rec_len;
8561  cpc->sum_key_len += cpc2.sum_key_len;
8562  cpc->tot_free_space += cpc2.tot_free_space;
8563  cpc->tot_space += cpc2.tot_space;
8564  cpc->tot_used_space += cpc2.tot_used_space;
8565  pgbuf_unfix_and_init (thread_p, page);
8566  } /* for */
8567  cpc->avg_val_per_key = ((cpc->dis_key_cnt > 0) ? (cpc->tot_val_cnt / cpc->dis_key_cnt) : 0);
8568  cpc->nleaf_pg_cnt += 1;
8569  cpc->tot_pg_cnt += 1;
8570  cpc->tot_free_space += free_space;
8571  cpc->tot_space += DB_PAGESIZE;
8572  cpc->tot_used_space += (DB_PAGESIZE - free_space);
8573  cpc->avg_key_len = ((cpc->dis_key_cnt > 0) ? ((int) (cpc->sum_key_len / cpc->dis_key_cnt)) : 0);
8574  cpc->avg_rec_len = ((cpc->dis_key_cnt > 0) ? ((int) (cpc->sum_rec_len / cpc->dis_key_cnt)) : 0);
8575  cpc->avg_pg_key_cnt = ((cpc->leaf_pg_cnt > 0) ? ((int) (cpc->dis_key_cnt / cpc->leaf_pg_cnt)) : 0);
8576  cpc->avg_pg_free_sp = ((cpc->tot_pg_cnt > 0) ? (cpc->tot_free_space / cpc->tot_pg_cnt) : 0);
8577  }
8578  else
8579  { /* a leaf page */
8580 
8581  /* form the cpc structure for a leaf node page */
8582  cpc->dis_key_cnt = key_cnt;
8583  cpc->leaf_pg_cnt = 1;
8584  cpc->nleaf_pg_cnt = 0;
8585  cpc->tot_pg_cnt = 1;
8586  cpc->height = 1;
8587  for (i = 1; i <= cpc->dis_key_cnt; i++)
8588  {
8589  if (spage_get_record (thread_p, pg_ptr, i, &rec, PEEK) != S_SUCCESS)
8590  {
8591  goto exit_on_error;
8592  }
8593  cpc->sum_rec_len += rec.length;
8594 
8595  /* read the current record key */
8596  if (btree_read_record (thread_p, btid, pg_ptr, &rec, &key1, &leaf_pnt, BTREE_LEAF_NODE, &clear_key, &offset,
8598  {
8599  goto exit_on_error;
8600  }
8601  cpc->sum_key_len += btree_get_disk_size_of_key (&key1);
8602  btree_clear_key_value (&clear_key, &key1);
8603 
8604  /* find the value (OID) count for the record */
8605  oid_cnt = btree_record_get_num_oids (thread_p, btid, &rec, offset, BTREE_LEAF_NODE);
8606 
8607  ovfl_vpid = leaf_pnt.ovfl;
8608  if (!VPID_ISNULL (&ovfl_vpid))
8609  { /* overflow pages exist */
8610  do
8611  {
8612  ovfp = pgbuf_fix (thread_p, &ovfl_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
8613  if (ovfp == NULL)
8614  {
8615  goto exit_on_error;
8616  }
8617 
8618  (void) pgbuf_check_page_ptype (thread_p, ovfp, PAGE_BTREE);
8619 
8620  btree_get_next_overflow_vpid (thread_p, ovfp, &ovfl_vpid);
8621 
8622  if (spage_get_record (thread_p, ovfp, 1, &orec, PEEK) != S_SUCCESS)
8623  {
8624  goto exit_on_error;
8625  }
8626 
8627  oid_cnt += btree_record_get_num_oids (thread_p, btid, &orec, 0, BTREE_OVERFLOW_NODE);
8628  pgbuf_unfix_and_init (thread_p, ovfp);
8629  }
8630  while (!VPID_ISNULL (&ovfl_vpid));
8631  } /* if */
8632  cpc->tot_val_cnt += oid_cnt;
8633 
8634  } /* for */
8635  cpc->avg_val_per_key = ((cpc->dis_key_cnt > 0) ? (cpc->tot_val_cnt / cpc->dis_key_cnt) : 0);
8636  cpc->avg_key_len = ((cpc->dis_key_cnt > 0) ? ((int) (cpc->sum_key_len / cpc->dis_key_cnt)) : 0);
8637  cpc->avg_rec_len = ((cpc->dis_key_cnt > 0) ? ((int) (cpc->sum_rec_len / cpc->dis_key_cnt)) : 0);
8638  cpc->tot_free_space = (float) free_space;
8639  cpc->tot_space = DB_PAGESIZE;
8640  cpc->tot_used_space = (cpc->tot_space - cpc->tot_free_space);
8641  cpc->avg_pg_key_cnt = ((cpc->leaf_pg_cnt > 0) ? (cpc->dis_key_cnt / cpc->leaf_pg_cnt) : 0);
8642  cpc->avg_pg_free_sp = ((cpc->tot_pg_cnt > 0) ? (cpc->tot_free_space / cpc->tot_pg_cnt) : 0);
8643 
8644  } /* if-else */
8645 
8646  return ret;
8647 
8648 exit_on_error:
8649 
8650  if (page)
8651  {
8652  pgbuf_unfix_and_init (thread_p, page);
8653  }
8654  if (ovfp)
8655  {
8656  pgbuf_unfix_and_init (thread_p, ovfp);
8657  }
8658 
8659  btree_clear_key_value (&clear_key, &key1);
8660 
8661  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
8662 }
8663 
8664 /*
8665  * btree_index_capacity () -
8666  * return: NO_ERROR
8667  * btid(in): B+tree index identifier
8668  * cpc(out): Set to contain index capacity information
8669  *
8670  * Note: Form and return index capacity/space related information
8671  */
8672 int
8674 {
8675  VPID root_vpid; /* root page identifier */
8676  PAGE_PTR root = NULL; /* root page pointer */
8678  BTREE_ROOT_HEADER *root_header = NULL;
8679  int ret = NO_ERROR;
8680 
8681  /* read root page */
8682  root_vpid.pageid = btid->root_pageid;
8683  root_vpid.volid = btid->vfid.volid;
8684  root = pgbuf_fix (thread_p, &root_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
8685  if (root == NULL)
8686  {
8687  goto exit_on_error;
8688  }
8689 
8690  (void) pgbuf_check_page_ptype (thread_p, root, PAGE_BTREE);
8691 
8692  root_header = btree_get_root_header (thread_p, root);
8693  if (root_header == NULL)
8694  {
8695  goto exit_on_error;
8696  }
8697 
8698  btid_int.sys_btid = btid;
8699  ret = btree_glean_root_header_info (thread_p, root_header, &btid_int);
8700  if (ret != NO_ERROR)
8701  {
8702  goto exit_on_error;
8703  }
8704 
8705  /* traverse the tree and store the capacity info */
8706  ret = btree_get_subtree_capacity (thread_p, &btid_int, root, cpc);
8707  if (ret != NO_ERROR)
8708  {
8709  goto exit_on_error;
8710  }
8711 
8712  pgbuf_unfix_and_init (thread_p, root);
8713 
8714  return ret;
8715 
8716 exit_on_error:
8717 
8718  if (root)
8719  {
8720  pgbuf_unfix_and_init (thread_p, root);
8721  }
8722 
8723  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
8724 }
8725 
8726 /*
8727  * btree_dump_capacity () -
8728  * return: NO_ERROR
8729  * btid(in): B+tree index identifier
8730  *
8731  * Note: Dump index capacity/space information.
8732  */
8733 int
8734 btree_dump_capacity (THREAD_ENTRY * thread_p, FILE * fp, BTID * btid)
8735 {
8736  BTREE_CAPACITY cpc;
8737  int ret = NO_ERROR;
8738  char *index_name = NULL;
8739  char *class_name = NULL;
8740  FILE_DESCRIPTORS fdes;
8741 
8742  assert (fp != NULL && btid != NULL);
8743 
8744  /* get index capacity information */
8745  ret = btree_index_capacity (thread_p, btid, &cpc);
8746  if (ret != NO_ERROR)
8747  {
8748  ASSERT_ERROR ();
8749  goto exit;
8750  }
8751 
8752  ret = file_descriptor_get (thread_p, &btid->vfid, &fdes);
8753  if (ret != NO_ERROR)
8754  {
8755  ASSERT_ERROR ();
8756  goto exit;
8757  }
8758 
8759  if (heap_get_class_name (thread_p, &fdes.btree.class_oid, &class_name) != NO_ERROR)
8760  {
8761  ASSERT_ERROR_AND_SET (ret);
8762  goto exit;
8763  }
8764 
8765  /* get index name */
8766  ret = heap_get_indexinfo_of_btid (thread_p, &fdes.btree.class_oid, btid, NULL, NULL, NULL, NULL, &index_name, NULL);
8767  if (ret != NO_ERROR)
8768  {
8769  ASSERT_ERROR ();
8770  goto exit;
8771  }
8772 
8773  fprintf (fp, "\n-------------------------------------------------------------\n");
8774  fprintf (fp, "BTID: {{%d, %d}, %d}, %s ON %s, CAPACITY INFORMATION:\n", btid->vfid.volid, btid->vfid.fileid,
8775  btid->root_pageid, (index_name == NULL) ? "*UNKOWN_INDEX*" : index_name,
8776  (class_name == NULL) ? "*UNKNOWN_CLASS*" : class_name);
8777 
8778  /* dump the capacity information */
8779  fprintf (fp, "\nDistinct Key Count: %d\n", cpc.dis_key_cnt);
8780  fprintf (fp, "Total Value Count: %d\n", cpc.tot_val_cnt);
8781  fprintf (fp, "Average Value Count Per Key: %d\n", cpc.avg_val_per_key);
8782  fprintf (fp, "Total Page Count: %d\n", cpc.tot_pg_cnt);
8783  fprintf (fp, "Leaf Page Count: %d\n", cpc.leaf_pg_cnt);
8784  fprintf (fp, "NonLeaf Page Count: %d\n", cpc.nleaf_pg_cnt);
8785  fprintf (fp, "Height: %d\n", cpc.height);
8786  fprintf (fp, "Average Key Length: %d\n", cpc.avg_key_len);
8787  fprintf (fp, "Average Record Length: %d\n", cpc.avg_rec_len);
8788  fprintf (fp, "Total Index Space: %.0f bytes\n", cpc.tot_space);
8789  fprintf (fp, "Used Index Space: %.0f bytes\n", cpc.tot_used_space);
8790  fprintf (fp, "Free Index Space: %.0f bytes\n", cpc.tot_free_space);
8791  fprintf (fp, "Average Page Free Space: %.0f bytes\n", cpc.avg_pg_free_sp);
8792  fprintf (fp, "Average Page Key Count: %d\n", cpc.avg_pg_key_cnt);
8793  fprintf (fp, "-------------------------------------------------------------\n");
8794 
8795 exit:
8796 
8797  if (class_name != NULL)
8798  {
8799  free_and_init (class_name);
8800  }
8801 
8802  if (index_name != NULL)
8803  {
8804  free_and_init (index_name);
8805  }
8806 
8807  return ret;
8808 }
8809 
8810 /*
8811  * b+tree dump routines
8812  */
8813 
8814 /*
8815  * btree_print_space () -
8816  * return:
8817  * n(in):
8818  */
8819 static void
8820 btree_print_space (FILE * fp, int n)
8821 {
8822 
8823  while (n--) /* print n space character */
8824  {
8825  fprintf (fp, " ");
8826  }
8827 
8828 }
8829 
8830 /*
8831  * btree_dump_page () -
8832  * return: nothing
8833  * btid(in): B+tree index identifier
8834  * page_ptr(in): Page pointer
8835  * pg_vpid(in): Page identifier
8836  * n(in): Identation left margin (number of preceding blanks)
8837  * level(in):
8838  *
8839  * Note: Dumps the content of the given page of the tree.
8840  */
8841 static void
8842 btree_dump_page (THREAD_ENTRY * thread_p, FILE * fp, const OID * class_oid_p, BTID_INT * btid, const char *btname,
8843  PAGE_PTR page_ptr, VPID * pg_vpid, int depth, int level)
8844 {
8845  int key_cnt;
8846  int i;
8847  RECDES rec;
8848  BTREE_NODE_HEADER *header = NULL;
8849  BTREE_NODE_TYPE node_type;
8850  VPID vpid;
8851 
8852  if (pg_vpid == NULL)
8853  {
8854  pgbuf_get_vpid (page_ptr, &vpid);
8855  pg_vpid = &vpid;
8856  }
8857 
8858  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
8859 
8860  /* get the header record */
8861  header = btree_get_node_header (thread_p, page_ptr);
8862  if (header == NULL)
8863  {
8864  btree_print_space (fp, depth * 4);
8865  fprintf (fp, "btree_dump_page: get node header failure: %d\n", key_cnt);
8866  return;
8867  }
8868 
8869  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
8870 
8871  btree_print_space (fp, depth * 4);
8872  fprintf (fp,
8873  "[%s PAGE {%d, %d}, level: %d, depth: %d, keys: %d, Prev: {%d, %d}, Next: {%d, %d}, Max key len: %d]\n",
8874  node_type_to_string (node_type), pg_vpid->volid, pg_vpid->pageid, header->node_level, depth, key_cnt,
8875  header->prev_vpid.volid, header->prev_vpid.pageid, header->next_vpid.volid, header->next_vpid.pageid,
8876  header->max_key_len);
8877 
8878  if (class_oid_p && !OID_ISNULL (class_oid_p))
8879  {
8880  char *class_name_p = NULL;
8881  if (heap_get_class_name (thread_p, class_oid_p, &class_name_p) != NO_ERROR)
8882  {
8883  ASSERT_ERROR ();
8884  return;
8885  }
8886 
8887  btree_print_space (fp, depth * 4);
8888  fprintf (fp, "INDEX %s ON CLASS %s (CLASS_OID:%2d|%4d|%2d) \n\n", (btname) ? btname : "*UNKNOWN-INDEX*",
8889  (class_name_p) ? class_name_p : "*UNKNOWN-CLASS*", class_oid_p->volid, class_oid_p->pageid,
8890  class_oid_p->slotid);
8891  if (class_name_p)
8892  {
8893  free_and_init (class_name_p);
8894  }
8895  }
8896 
8897  fflush (fp);
8898 
8899  if (key_cnt < 0)
8900  {
8901  btree_print_space (fp, depth * 4);
8902  fprintf (fp, "btree_dump_page: node key count underflow: %d\n", key_cnt);
8903  return;
8904  }
8905 
8906  if (level > 1)
8907  {
8908  /* output the content of each record */
8909  for (i = 1; i <= key_cnt; i++)
8910  {
8911  (void) spage_get_record (thread_p, page_ptr, i, &rec, PEEK);
8912  if (node_type == BTREE_LEAF_NODE)
8913  {
8915  {
8916  fprintf (fp, "(F)");
8917  }
8918  else
8919  {
8920  fprintf (fp, " ");
8921  }
8922 
8923  btree_dump_leaf_record (thread_p, fp, btid, &rec, depth);
8924  }
8925  else
8926  {
8927  btree_dump_non_leaf_record (thread_p, fp, btid, &rec, depth, 1);
8928  }
8929  /* fprintf (fp, "\n"); */
8930  }
8931  }
8932 
8933  fprintf (fp, "\n");
8934 }
8935 
8936 /*
8937  * btree_dump_page_with_subtree () -
8938  * return: nothing
8939  * btid(in): B+tree index identifier
8940  * pg_ptr(in): Page pointer
8941  * pg_vpid(in): Page identifier
8942  * n(in): Identation left margin (number of preceding blanks)
8943  * level(in):
8944  *
8945  * Note: Dumps the content of the given page together with its subtrees
8946  */
8947 static void
8948 btree_dump_page_with_subtree (THREAD_ENTRY * thread_p, FILE * fp, BTID_INT * btid, PAGE_PTR pg_ptr, VPID * pg_vpid,
8949  int depth, int level)
8950 {
8951  int key_cnt;
8952  int i;
8953  NON_LEAF_REC nleaf_ptr;
8954  VPID page_vpid;
8955  PAGE_PTR page = NULL;
8956  RECDES rec;
8957  BTREE_NODE_HEADER *header = NULL;
8958  BTREE_NODE_TYPE node_type;
8959 
8960  key_cnt = btree_node_number_of_keys (thread_p, pg_ptr);
8961 
8962  btree_dump_page (thread_p, fp, NULL, btid, NULL, pg_ptr, pg_vpid, depth, level); /* dump current page */
8963 
8964  /* get the header record */
8965  header = btree_get_node_header (thread_p, pg_ptr);
8966  if (header == NULL)
8967  {
8968  fprintf (fp, "btree_dump_page_with_subtree: get node header failure: %d.\n", key_cnt);
8969  return;
8970  }
8971 
8972  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
8973 
8974  if (node_type == BTREE_NON_LEAF_NODE)
8975  { /* page is non_leaf */
8976 #if !defined(NDEBUG)
8977  if (key_cnt < 0)
8978  {
8979  fprintf (fp, "btree_dump_page_with_subtree: node key count underflow: %d.\n", key_cnt);
8980  return;
8981  }
8982 #endif
8983 
8984  /* for each child page pointer in this non_leaf page, dump the corresponding subtree */
8985  for (i = 1; i <= key_cnt; i++)
8986  {
8987  (void) spage_get_record (thread_p, pg_ptr, i, &rec, PEEK);
8989  page_vpid = nleaf_ptr.pnt;
8990  page = pgbuf_fix (thread_p, &page_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
8991  if (page == NULL)
8992  {
8993  return;
8994  }
8995  btree_dump_page_with_subtree (thread_p, fp, btid, page, &page_vpid, depth + 1, level);
8996  pgbuf_unfix_and_init (thread_p, page);
8997  }
8998  }
8999 
9000  return;
9001 }
9002 
9003 /*
9004  * btree_dump () -
9005  * return: nothing
9006  * btid(in): B+tree index identifier
9007  * level(in):
9008  *
9009  * Note: Dumps the content of the each page in the B+tree by
9010  * traversing the tree in an "inorder" manner. The header
9011  * information, as well as the content of each record in a page
9012  * are dumped. The header information for a non_leaf page
9013  * contains the key count and maximum key length information.
9014  * Maximum key length refers to the longest key in the page and
9015  * in its subtrees. The header information for a leaf page
9016  * contains also the next_page information, which is the page
9017  * identifier of the next sibling page, and the overflow page
9018  * count information. root header information contains
9019  * statistical data for the whole tree. These consist of total
9020  * key count of the tree, total page count, leaf page count,
9021  * non_leaf page count, total overflow page count and the height
9022  * of the tree. Total key count refers only to those keys that
9023  * are stored in the leaf pages of the tree. The index key type
9024  * is also stored in the root header.
9025  *
9026  * NOTE: never used
9027  */
9028 void
9029 btree_dump (THREAD_ENTRY * thread_p, FILE * fp, BTID * btid, int level)
9030 {
9031  VPID p_vpid;
9032  PAGE_PTR root = NULL;
9034  BTREE_ROOT_HEADER *root_header = NULL;
9035 
9036  p_vpid.pageid = btid->root_pageid; /* read root page */
9037  p_vpid.volid = btid->vfid.volid;
9038  root = pgbuf_fix (thread_p, &p_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
9039  if (root == NULL)
9040  {
9041  return;
9042  }
9043 
9044  root_header = btree_get_root_header (thread_p, root);
9045  if (root_header == NULL)
9046  {
9047  goto end; /* do nothing */
9048  }
9049 
9050  btid_int.sys_btid = btid;
9051  if (btree_glean_root_header_info (thread_p, root_header, &btid_int) != NO_ERROR)
9052  {
9053  goto end; /* do nothing */
9054  }
9055 
9056  fprintf (fp, "\n------------ The B+Tree Index Dump Start ---------------------\n\n\n");
9057  btree_dump_root_header (thread_p, fp, root); /* output root header information */
9058 
9059  if (level != 0)
9060  {
9061  btree_dump_page_with_subtree (thread_p, fp, &btid_int, root, &p_vpid, 0, level);
9062  }
9063 
9064  fprintf (fp, "\n------------ The B+Tree Index Dump End ---------------------\n\n\n");
9065 
9066 end:
9067  pgbuf_unfix_and_init (thread_p, root);
9068 
9069  return;
9070 }
9071 
9072 
9073 /*
9074  * btree_read_key_type () -
9075  * return:
9076  * btid(in):
9077  */
9078 TP_DOMAIN *
9080 {
9081  VPID p_vpid;
9082  PAGE_PTR root = NULL;
9083  TP_DOMAIN *key_type = NULL;
9084  BTREE_ROOT_HEADER *root_header = NULL;
9085 
9086  p_vpid.pageid = btid->root_pageid; /* read root page */
9087  p_vpid.volid = btid->vfid.volid;
9088  root = pgbuf_fix (thread_p, &p_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
9089  if (root == NULL)
9090  {
9091  return NULL;
9092  }
9093 
9094  (void) pgbuf_check_page_ptype (thread_p, root, PAGE_BTREE);
9095 
9096  root_header = btree_get_root_header (thread_p, root);
9097  if (root_header == NULL)
9098  {
9099  pgbuf_unfix_and_init (thread_p, root);
9100  return NULL;
9101  }
9102 
9103  (void) or_unpack_domain (root_header->packed_key_domain, &key_type, 0);
9104 
9105  pgbuf_unfix_and_init (thread_p, root);
9106 
9107  return key_type;
9108 }
9109 
9110 /*
9111  * btree_delete_key_from_leaf () - Delete a b-tree key completely.
9112  *
9113  * return : Error code.
9114  * thread_p (in) : Thread entry.
9115  * btid (in) : B-tree info.
9116  * leaf_pg (in) : Leaf page.
9117  * leafrec_pnt (in) : Leaf record info.
9118  * delete_helper (in) : B-tree delete helper.
9119  * search_key (in) : Search key result.
9120  */
9121 static int
9122 btree_delete_key_from_leaf (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR leaf_pg, LEAF_REC * leafrec_pnt,
9123  BTREE_DELETE_HELPER * delete_helper, BTREE_SEARCH_KEY_HELPER * search_key)
9124 {
9125  int ret = NO_ERROR; /* Error code. */
9126  int key_cnt; /* Node key count. */
9127  BTREE_NODE_HEADER *header = NULL; /* Node header. */
9128  LOG_LSA prev_lsa;
9129  char leaf_record_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
9130  RECDES leaf_record = RECDES_INITIALIZER;
9131 
9132  assert (delete_helper->is_system_op_started == false);
9133  assert (delete_helper->purpose != BTREE_OP_INSERT_MVCC_DELID
9134  && delete_helper->purpose != BTREE_OP_DELETE_UNDO_INSERT_UNQ_MULTIUPD);
9135 
9136  /* Is this an overflow key? Should we delete it too? */
9137  /* If this is undo of inserted object, overflow key deletion will be handled automatically. Don't delete here. */
9138  if (leafrec_pnt->key_len < 0)
9139  {
9141  log_sysop_start (thread_p);
9142  delete_helper->is_system_op_started = true;
9143 
9144  /* Delete overflow key. */
9145  ret = btree_delete_overflow_key (thread_p, btid, leaf_pg, search_key->slotid, BTREE_LEAF_NODE);
9146  if (ret != NO_ERROR)
9147  {
9148  ASSERT_ERROR ();
9149  goto exit_on_error;
9150  }
9151  /* Overflow key deleted. */
9152  }
9153 
9155 
9156  header = btree_get_node_header (thread_p, leaf_pg);
9157  if (header == NULL)
9158  {
9159  assert_release (false);
9160  ret = ER_FAILED;
9161  goto exit_on_error;
9162  }
9163 
9164  if (delete_helper->is_system_op_started)
9165  {
9166  /* Before deleting the slot, we will need the record data for undo logging. */
9167  leaf_record.area_size = DB_PAGESIZE;
9168  leaf_record.data = PTR_ALIGN (leaf_record_buffer, BTREE_MAX_ALIGN);
9169  if (spage_get_record (thread_p, leaf_pg, search_key->slotid, &leaf_record, COPY) != S_SUCCESS)
9170  {
9171  assert_release (false);
9172  ret = ER_FAILED;
9173  goto exit_on_error;
9174  }
9175  }
9176 
9177  /* now delete the btree slot */
9178  assert (search_key->slotid > 0);
9179  if (spage_delete (thread_p, leaf_pg, search_key->slotid) != search_key->slotid)
9180  {
9181  ASSERT_ERROR_AND_SET (ret);
9182  goto exit_on_error;
9183  }
9184 
9186 
9187  /* key deleted, update node header */
9188  key_cnt = btree_node_number_of_keys (thread_p, leaf_pg);
9189  if (key_cnt == 0)
9190  {
9191  header->max_key_len = 0;
9192  }
9193 
9195 
9196  /* We need to log previous lsa. */
9197  LSA_COPY (&prev_lsa, pgbuf_get_lsa (leaf_pg));
9198 
9199  /* Save redo logging. */
9200  /* Flag recovery that key is being removed completely. */
9202  /* Since this is completely removed, no debugging info is required. */
9203  assert (!BTREE_RV_HAS_DEBUG_INFO (delete_helper->leaf_addr.offset));
9204 
9205  /* Add logging. */
9206  btree_rv_log_delete_object (thread_p, *delete_helper, delete_helper->leaf_addr, leaf_record.length, 0,
9207  leaf_record.data, NULL);
9208 
9209  btree_delete_log (delete_helper, BTREE_DELETE_MODIFY_MSG ("removed key"),
9210  BTREE_DELETE_MODIFY_ARGS (thread_p, delete_helper, leaf_pg, &prev_lsa, true, search_key->slotid, 0,
9211  btid->sys_btid));
9212 
9213  if (delete_helper->is_system_op_started)
9214  {
9215  btree_delete_sysop_end (thread_p, delete_helper);
9216  }
9217 
9218 #if !defined(NDEBUG)
9219  (void) btree_verify_node (thread_p, btid, leaf_pg);
9220 #endif
9221 
9222  return ret;
9223 
9224 exit_on_error:
9225 
9226  if (delete_helper->is_system_op_started)
9227  {
9228  log_sysop_abort (thread_p);
9229  delete_helper->is_system_op_started = false;
9230  }
9231 
9232  assert_release (ret != NO_ERROR);
9233  return ret;
9234 }
9235 
9236 /*
9237  * btree_replace_first_oid_with_ovfl_oid () - Replace the object in leaf page with an object from the first overflow page.
9238  *
9239  * return : Error code.
9240  * thread_p (in) : Thread entry.
9241  * btid (in) : B-tree info.
9242  * key (in) : Key value.
9243  * delete_helper (in) : B-tree delete helper.
9244  * leaf_page (in) : Leaf page.
9245  * search_key (in) : Search key result.
9246  * leaf_rec (in) : Key leaf record.
9247  * ovfl_vpid (in) : VPID of first overflow page.
9248  */
9249 static int
9251  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR leaf_page,
9252  BTREE_SEARCH_KEY_HELPER * search_key, RECDES * leaf_rec, VPID * ovfl_vpid)
9253 {
9254  int ret = NO_ERROR; /* Error code. */
9255  BTREE_OBJECT_INFO last_ovf_object;
9256  PAGE_PTR ovfl_page = NULL; /* First overflow page. */
9257  RECDES ovfl_copy_rec; /* Overflow record. */
9258  /* Buffer to store overflow record data. */
9259  char ovfl_copy_rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
9260  int offset_to_ovfl_object = 0; /* Offset to last object. */
9261  bool is_sytem_op_started = false;
9262  bool save_system_op_started = false;
9263 
9264  LOG_LSA prev_lsa;
9265 
9266  /* Recovery data. */
9267  char rv_undo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
9268  char *rv_undo_data = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
9269  char *rv_undo_data_ptr = NULL;
9270  int rv_undo_data_length = 0;
9271  int rv_redo_data_length = 0;
9272 
9273 #if !defined (NDEBUG)
9274  OID save_oid;
9275 #endif /* !NDEBUG */
9276 
9277  /* Assert expected arguments. */
9278  assert (btid != NULL);
9279  assert (leaf_page != NULL);
9280  assert (search_key != NULL && search_key->result == BTREE_KEY_FOUND && search_key->slotid > 0);
9281  assert (delete_helper != NULL);
9282  assert (leaf_rec != NULL);
9283  assert (ovfl_vpid != NULL);
9284  assert (btree_is_delete_object_purpose (delete_helper->purpose));
9285  assert (delete_helper->rv_redo_data != NULL);
9286  assert (delete_helper->leaf_addr.offset != 0 && delete_helper->leaf_addr.pgptr == leaf_page);
9287 
9288  /* Since we cannot leave the leaf record without any objects, we will need to replace it with an overflow object.
9289  * Logging physical delete, because it has logical undo, cannot work well under system operation. For this reason,
9290  * the operation will be split into two sub-operations: 1. Swap first object in leaf record with last object in first
9291  * overflow page. This is done under a system operation that is committed at the end. 2. Remove relocated object from
9292  * overflow record. */
9293 
9294  /* Get overflow record. */
9295  ovfl_copy_rec.area_size = DB_PAGESIZE;
9296  ovfl_copy_rec.data = PTR_ALIGN (ovfl_copy_rec_buf, BTREE_MAX_ALIGN);
9297  ovfl_page = pgbuf_fix (thread_p, ovfl_vpid, OLD_PAGE, PGBUF_LATCH_WRITE, PGBUF_UNCONDITIONAL_LATCH);
9298  if (ovfl_page == NULL)
9299  {
9300  ASSERT_ERROR_AND_SET (ret);
9301  goto exit_on_error;
9302  }
9303 
9304 #if !defined (NDEBUG)
9305  (void) pgbuf_check_page_ptype (thread_p, ovfl_page, PAGE_BTREE);
9306 #endif /* !NDEBUG */
9307 
9308  if (spage_get_record (thread_p, ovfl_page, 1, &ovfl_copy_rec, COPY) != S_SUCCESS)
9309  {
9310  assert_release (false);
9311  ret = ER_FAILED;
9312  goto exit_on_error;
9313  }
9314 
9315 #if !defined (NDEBUG)
9316  (void) btree_check_valid_record (thread_p, btid, &ovfl_copy_rec, BTREE_OVERFLOW_NODE, NULL);
9317 #endif /* NDEBUG */
9318  /* Get last object. */
9319  ret = btree_record_get_last_object (thread_p, btid, &ovfl_copy_rec, BTREE_OVERFLOW_NODE, 0, &last_ovf_object.oid,
9320  &last_ovf_object.class_oid, &last_ovf_object.mvcc_info, &offset_to_ovfl_object);
9321  if (ret != NO_ERROR)
9322  {
9323  ASSERT_ERROR ();
9324  goto exit_on_error;
9325  }
9326  /* Last object obtained. */
9327 
9328  /* Swap operation must use system op. */
9329  save_system_op_started = delete_helper->is_system_op_started;
9330  log_sysop_start (thread_p);
9331  is_sytem_op_started = true;
9332  delete_helper->is_system_op_started = true;
9333 
9334  /* So let's swap last object with leaf record. */
9335 
9336  /* Get first object complete information. */
9337 #if !defined (NDEBUG)
9338  COPY_OID (&save_oid, &delete_helper->object_info.oid);
9339 #endif /* !NDEBUG */
9340  ret =
9341  btree_leaf_get_first_object (btid, leaf_rec, &delete_helper->object_info.oid, &delete_helper->object_info.class_oid,
9342  &delete_helper->object_info.mvcc_info);
9343  assert (OID_EQ (&save_oid, &delete_helper->object_info.oid));
9344 
9345  /* Replace first object with overflow object. */
9346  delete_helper->rv_redo_data_ptr = delete_helper->rv_redo_data;
9347  rv_undo_data_ptr = rv_undo_data;
9348 #if !defined (NDEBUG)
9349  BTREE_RV_UNDOREDO_SET_DEBUG_INFO (&delete_helper->leaf_addr, delete_helper->rv_redo_data_ptr, rv_undo_data_ptr, btid,
9351 #endif /* !NDEBUG */
9353  btree_leaf_change_first_object (thread_p, leaf_rec, btid, &last_ovf_object.oid, &last_ovf_object.class_oid,
9354  &last_ovf_object.mvcc_info, NULL, &rv_undo_data_ptr,
9355  &delete_helper->rv_redo_data_ptr);
9356  if (spage_update (thread_p, leaf_page, search_key->slotid, leaf_rec) != SP_SUCCESS)
9357  {
9358  assert_release (false);
9359  ret = ER_FAILED;
9360  goto exit_on_error;
9361  }
9362 
9363  LSA_COPY (&prev_lsa, pgbuf_get_lsa (leaf_page));
9364  BTREE_RV_GET_DATA_LENGTH (delete_helper->rv_redo_data_ptr, delete_helper->rv_redo_data, rv_redo_data_length);
9365  BTREE_RV_GET_DATA_LENGTH (rv_undo_data_ptr, rv_undo_data, rv_undo_data_length);
9366  log_append_undoredo_data (thread_p, RVBT_RECORD_MODIFY_UNDOREDO, &delete_helper->leaf_addr, rv_undo_data_length,
9367  rv_redo_data_length, rv_undo_data, delete_helper->rv_redo_data);
9368  pgbuf_set_dirty (thread_p, leaf_page, DONT_FREE);
9369 
9370  btree_delete_log (delete_helper, BTREE_DELETE_MODIFY_MSG ("remove first object by replacing with an overflow object")
9371  "\t" BTREE_OBJINFO_MSG ("overflow object"),
9372  BTREE_DELETE_MODIFY_ARGS (thread_p, delete_helper, leaf_page, &prev_lsa, true, search_key->slotid,
9373  leaf_rec->length, btid->sys_btid),
9374  BTREE_OBJINFO_AS_ARGS (&last_ovf_object));
9375 
9376  /* Replace object in overflow with object we want to delete. */
9377  ret =
9378  btree_overflow_record_replace_object (thread_p, btid, delete_helper, ovfl_page, &ovfl_copy_rec,
9379  &offset_to_ovfl_object, &delete_helper->object_info);
9380  if (ret != NO_ERROR)
9381  {
9382  ASSERT_ERROR ();
9383  goto exit_on_error;
9384  }
9385 
9386  /* Swap execute successfully. Commit it. */
9387  /* Commit swap. */
9388  /* todo: with new system op we can get rid of this swapping hack and use logical undo */
9389  log_sysop_commit (thread_p);
9390  is_sytem_op_started = false;
9391  delete_helper->is_system_op_started = save_system_op_started;
9392 
9393  /* Now remove relocated object from overflow page. */
9394  /* Reset logging for btree_overflow_remove_object. */
9395  delete_helper->leaf_addr.offset = search_key->slotid;
9396  delete_helper->rv_redo_data_ptr = delete_helper->rv_redo_data;
9397  ret =
9398  btree_overflow_remove_object (thread_p, key, btid, delete_helper, &ovfl_page, leaf_page, leaf_page, leaf_rec,
9399  search_key, offset_to_ovfl_object);
9400  if (ret != NO_ERROR)
9401  {
9402  ASSERT_ERROR ();
9403  goto exit_on_error;
9404  }
9405 
9406  if (ovfl_page != NULL)
9407  {
9408  pgbuf_unfix_and_init (thread_p, ovfl_page);
9409  }
9410 
9411  return NO_ERROR;
9412 
9413 exit_on_error:
9414 
9415  if (delete_helper->purpose == BTREE_OP_DELETE_UNDO_INSERT
9417  {
9418  assert_release (false);
9419  }
9420  if (is_sytem_op_started)
9421  {
9422  log_sysop_abort (thread_p);
9423  }
9424  delete_helper->is_system_op_started = save_system_op_started;
9425  if (ovfl_page != NULL)
9426  {
9427  pgbuf_unfix_and_init (thread_p, ovfl_page);
9428  }
9429 
9430  assert_release (ret != NO_ERROR);
9431  return ret;
9432 }
9433 
9434 /*
9435  * btree_modify_leaf_ovfl_vpid () - Modify the link to first overflow page in leaf record.
9436  *
9437  * return : Error code.
9438  * thread_p (in) : Thread entry.
9439  * btid_int (in) : B-tree info.
9440  * delete_helper (in) : B-tree delete helper.
9441  * leaf_page (in) : Leaf page.
9442  * leaf_record (in) : Leaf record.
9443  * search_key (in) : Search key result.
9444  * next_ovfl_vpid (in) : New link to first overflow page.
9445  */
9446 static int
9448  PAGE_PTR leaf_page, RECDES * leaf_record, BTREE_SEARCH_KEY_HELPER * search_key,
9449  VPID * next_ovfl_vpid)
9450 {
9451  char rv_undo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
9452  char *rv_undo_data = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
9453  char *rv_undo_data_ptr = NULL;
9454  int rv_undo_data_length = 0;
9455  int rv_redo_data_length = 0;
9456 
9457  LOG_LSA prev_lsa;
9458 
9459  /* Assert expected arguments. */
9460  assert (btid_int != NULL);
9461  assert (delete_helper != NULL);
9462  assert (leaf_page != NULL);
9463  assert (search_key != NULL && search_key->result == BTREE_KEY_FOUND && search_key->slotid > 0);
9464  assert (leaf_record != NULL);
9465  assert (next_ovfl_vpid != NULL);
9466  assert (delete_helper->rv_redo_data != NULL && delete_helper->rv_redo_data_ptr != NULL);
9467  assert (delete_helper->is_system_op_started);
9468 
9469  /* We need undoredo logging. */
9470  rv_undo_data_ptr = rv_undo_data;
9471 
9472 #if !defined (NDEBUG)
9473  /* For debugging recovery. */
9474  BTREE_RV_UNDOREDO_SET_DEBUG_INFO (&delete_helper->leaf_addr, delete_helper->rv_redo_data_ptr, rv_undo_data_ptr,
9475  btid_int, BTREE_RV_DEBUG_ID_OVF_LINK);
9476 #endif /* !NDEBUG */
9478 
9479  btree_leaf_record_change_overflow_link (thread_p, btid_int, leaf_record, next_ovfl_vpid, &rv_undo_data_ptr,
9480  &delete_helper->rv_redo_data_ptr);
9481 
9482  if (spage_update (thread_p, leaf_page, search_key->slotid, leaf_record) != SP_SUCCESS)
9483  {
9484  /* Unexpected. */
9485  assert_release (false);
9486  return ER_FAILED;
9487  }
9488 
9489  /* We need to log previous lsa. */
9490  LSA_COPY (&prev_lsa, pgbuf_get_lsa (leaf_page));
9491 
9492  /* Add logging. */
9493  BTREE_RV_GET_DATA_LENGTH (delete_helper->rv_redo_data_ptr, delete_helper->rv_redo_data, rv_redo_data_length);
9494  BTREE_RV_GET_DATA_LENGTH (rv_undo_data_ptr, rv_undo_data, rv_undo_data_length);
9495  log_append_undoredo_data (thread_p, RVBT_RECORD_MODIFY_UNDOREDO, &delete_helper->leaf_addr, rv_undo_data_length,
9496  rv_redo_data_length, rv_undo_data, delete_helper->rv_redo_data);
9497 
9499 
9500  pgbuf_set_dirty (thread_p, leaf_page, DONT_FREE);
9501 
9502  btree_delete_log (delete_helper, BTREE_DELETE_MODIFY_MSG ("remove object and first overflow page (unknown vpid).")
9503  "\t" "new link vpid = %d|%d",
9504  BTREE_DELETE_MODIFY_ARGS (thread_p, delete_helper, leaf_page, &prev_lsa, true, search_key->slotid,
9505  leaf_record->length, btid_int->sys_btid), VPID_AS_ARGS (next_ovfl_vpid));
9506 
9507  /* Success. */
9508  return NO_ERROR;
9509 }
9510 
9511 /*
9512  * btree_modify_overflow_link () - Modify next overflow link in overflow page
9513  * header.
9514  *
9515  * return : Error code.
9516  * thread_p (in) : Thread entry.
9517  * btid_int (in) : B-tree info.
9518  * delete_helper (in) : B-tree delete helper.
9519  * ovfl_page (in) : Overflow page.
9520  * next_ovfl_vpid (in) : New link to next overflow.
9521  */
9522 static int
9524  PAGE_PTR ovfl_page, VPID * next_ovfl_vpid)
9525 {
9526  LOG_DATA_ADDR ovf_addr;
9527  BTREE_OVERFLOW_HEADER ovf_header_info;
9528  RECDES overflow_header_record;
9529 
9530  LOG_LSA prev_lsa;
9531 
9532  char rv_undo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
9533  char *rv_undo_data = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
9534  int rv_undo_data_length = 0;
9535  int rv_redo_data_length;
9536 
9537  /* Assert expected arguments. */
9538  assert (btid_int != NULL);
9539  assert (delete_helper != NULL);
9540  assert (ovfl_page != NULL);
9541  assert (next_ovfl_vpid != NULL);
9542  assert (delete_helper->rv_redo_data != NULL && delete_helper->rv_redo_data_ptr != NULL);
9543  assert (delete_helper->is_system_op_started);
9544 
9545  /* Create record with new overflow header info and update page. */
9546 
9547  /* We need undoredo logging. */
9548  overflow_header_record.area_size = sizeof (BTREE_OVERFLOW_HEADER);
9549  overflow_header_record.data = rv_undo_data;
9550  if (spage_get_record (thread_p, ovfl_page, HEADER, &overflow_header_record, COPY) != S_SUCCESS)
9551  {
9552  assert_release (false);
9553  return ER_FAILED;
9554  }
9555  rv_undo_data_length = sizeof (BTREE_OVERFLOW_HEADER);
9556 
9557  /* Update overflow header info. */
9558  VPID_COPY (&ovf_header_info.next_vpid, next_ovfl_vpid);
9559 
9560  /* Create record with new overflow header info and update page. */
9561  overflow_header_record.data = (char *) &ovf_header_info;
9562  overflow_header_record.length = sizeof (BTREE_OVERFLOW_HEADER);
9563 
9564  if (spage_update (thread_p, ovfl_page, HEADER, &overflow_header_record) != SP_SUCCESS)
9565  {
9566  /* Unexpected. */
9567  assert_release (false);
9568  return ER_FAILED;
9569  }
9570 
9571  /* We need to log previous lsa. */
9572  LSA_COPY (&prev_lsa, pgbuf_get_lsa (ovfl_page));
9573 
9574  /* Log the change. */
9575  ovf_addr.offset = HEADER;
9576  ovf_addr.pgptr = ovfl_page;
9577  ovf_addr.vfid = &btid_int->sys_btid->vfid;
9578 
9579  /* Redo logging. */
9580  BTREE_RV_SET_OVERFLOW_NODE (&ovf_addr);
9581  /* Update entire record. */
9583  /* Pack record data. */
9584  memcpy (delete_helper->rv_redo_data_ptr, overflow_header_record.data, overflow_header_record.length);
9585  delete_helper->rv_redo_data_ptr += overflow_header_record.length;
9586 
9587  /* Add logging. */
9588  BTREE_RV_GET_DATA_LENGTH (delete_helper->rv_redo_data_ptr, delete_helper->rv_redo_data, rv_redo_data_length);
9589  log_append_undoredo_data (thread_p, RVBT_RECORD_MODIFY_UNDOREDO, &ovf_addr, rv_undo_data_length,
9590  rv_redo_data_length, rv_undo_data, delete_helper->rv_redo_data);
9591 
9593 
9594  pgbuf_set_dirty (thread_p, ovfl_page, DONT_FREE);
9595 
9596  btree_delete_log (delete_helper, BTREE_DELETE_MODIFY_MSG ("remove object and non-first overflow page (unknown vpid)")
9597  "\t" "new link vpid = %d|%d",
9598  BTREE_DELETE_MODIFY_ARGS (thread_p, delete_helper, ovfl_page, &prev_lsa, false, HEADER,
9599  overflow_header_record.length, btid_int->sys_btid),
9600  VPID_AS_ARGS (next_ovfl_vpid));
9601 
9602  return NO_ERROR;
9603 }
9604 
9605 /*
9606  * btree_delete_meta_record - delete record for merge
9607  *
9608  * return: (error code)
9609  * thread_p(in):
9610  * btid(in): B+tree index identifier
9611  * page_ptr(in):
9612  * slot_id(in):
9613  * node_type:
9614  *
9615  */
9616 static int
9617 btree_delete_meta_record (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr, int slot_id)
9618 {
9619  int ret = NO_ERROR;
9620  RECDES rec;
9621  int dummy_offset;
9622  bool dummy_clear_key;
9623  NON_LEAF_REC nleaf_pnt = { {NULL_PAGEID, NULL_VOLID}, 0 };
9624  LEAF_REC leaf_pnt = { {NULL_PAGEID, NULL_VOLID}, 0 };
9625  char *recset_data;
9626  int recset_data_length;
9627  PGLENGTH log_addr_offset;
9628  char recset_data_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
9629  BTREE_NODE_HEADER *header = NULL;
9630  BTREE_NODE_TYPE node_type;
9631 
9632  /* init */
9633  recset_data = PTR_ALIGN (recset_data_buf, BTREE_MAX_ALIGN);
9634 
9635  assert (slot_id > 0);
9636  if (spage_get_record (thread_p, page_ptr, slot_id, &rec, PEEK) != S_SUCCESS)
9637  {
9638  goto exit_on_error;
9639  }
9640 
9641  header = btree_get_node_header (thread_p, page_ptr);
9642  if (header == NULL)
9643  {
9644  goto exit_on_error;
9645  }
9646 
9647  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
9648 
9649  assert (node_type == BTREE_NON_LEAF_NODE);
9650 
9651  if (node_type == BTREE_NON_LEAF_NODE)
9652  {
9654 
9655  /* prepare undo log record */
9656  btree_rv_write_log_record (recset_data, &recset_data_length, &rec, BTREE_NON_LEAF_NODE);
9657 
9658  if (nleaf_pnt.key_len < 0)
9659  { /* overflow key */
9660  /* get the overflow manager to delete the key */
9661  ret = btree_delete_overflow_key (thread_p, btid, page_ptr, slot_id, BTREE_NON_LEAF_NODE);
9662  if (ret != NO_ERROR)
9663  {
9664  goto exit_on_error;
9665  }
9666  }
9667  }
9668  else
9669  {
9670  ret =
9671  btree_read_record (thread_p, btid, page_ptr, &rec, NULL, &leaf_pnt, BTREE_LEAF_NODE, &dummy_clear_key,
9672  &dummy_offset, PEEK_KEY_VALUE, NULL);
9673  if (ret != NO_ERROR)
9674  {
9675  goto exit_on_error;
9676  }
9677 
9678  /* prepare undo log record */
9679  btree_rv_write_log_record (recset_data, &recset_data_length, &rec, BTREE_LEAF_NODE);
9680 
9681  if (leaf_pnt.key_len < 0)
9682  { /* overflow key */
9683  /* get the overflow manager to delete the key */
9684  ret = btree_delete_overflow_key (thread_p, btid, page_ptr, slot_id, BTREE_LEAF_NODE);
9685  if (ret != NO_ERROR)
9686  {
9687  goto exit_on_error;
9688  }
9689  }
9690  }
9691 
9692  assert (slot_id > 0);
9693  if (spage_delete (thread_p, page_ptr, slot_id) != slot_id)
9694  {
9695  goto exit_on_error;
9696  }
9697 
9698  /* log the deleted slot_id for undo/redo purposes */
9699  log_addr_offset = slot_id;
9700  log_append_undoredo_data2 (thread_p, RVBT_NDRECORD_DEL, &btid->sys_btid->vfid, page_ptr, log_addr_offset,
9701  recset_data_length, sizeof (log_addr_offset), recset_data, &log_addr_offset);
9702 
9703  return ret;
9704 
9705 exit_on_error:
9706 
9707  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
9708 }
9709 
9710 /*
9711  * btree_write_default_split_info() -
9712  * return:
9713  * info(in/out):
9714  */
9715 static void
9717 {
9718  assert (info != NULL);
9719 
9721  info->index = 1;
9722 }
9723 
9724 /*
9725  * btree_merge_root () -
9726  * return: NO_ERROR
9727  * btid(in): B+tree index identifier
9728  * P(in): Page pointer for the root to be merged
9729  * Q(in): Page pointer for the root child page to be merged
9730  * R(in): Page pointer for the root child page to be merged
9731  * P_vpid(in): Page identifier for page P
9732  * Q_vpid(in): Page identifier for page Q
9733  * R_vpid(in): Page identifier for page R
9734  *
9735  * Note: When the root page has only two children (non_leaf)
9736  * that can be merged together, then they are merged through
9737  * this specific root merge operation. The main distinction of
9738  * this routine from the regular merge operation is that in this
9739  * the content of the two child pages are moved to the root, in
9740  * order not to change the originial root page. The root can also
9741  * be a specific non-leaf page, that is, it may have only one key
9742  * and one child page pointer. In this case, R_id, the page
9743  * identifier for the page R is NULL_PAGEID. In both cases, the
9744  * height of the tree is reduced by one, after the merge
9745  * operation. The two (one) child pages are not deallocated by
9746  * this routine. Deallocation of these pages are left to the
9747  * calling routine.
9748  *
9749  * Note: Page Q and Page R contents are not changed by this routine,
9750  * since these pages will be deallocated by the calling routine.
9751  */
9752 static int
9754 {
9755  int left_cnt, right_cnt;
9756  RECDES peek_rec;
9757  int i, j;
9758  char *recset_data; /* for recovery purposes */
9759  int recset_length; /* for recovery purposes */
9760  RECSET_HEADER recset_header; /* for recovery purposes */
9761  int ret = NO_ERROR;
9762  char recset_data_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
9763  LOG_DATA_ADDR addr;
9764  BTREE_ROOT_HEADER *root_header = NULL;
9765  int Q_end, R_start;
9766 #if !defined(NDEBUG)
9767  int p_level, q_level, r_level;
9768 #endif
9769  BTREE_NODE_HEADER *q_header = NULL, *r_header = NULL;
9770 
9771 #if !defined(NDEBUG)
9773  {
9774  VPID *P_vpid = pgbuf_get_vpid_ptr (P);
9775  VPID *Q_vpid = pgbuf_get_vpid_ptr (Q);
9776  VPID *R_vpid = pgbuf_get_vpid_ptr (R);
9777 
9778  printf ("btree_merge_root: P{%d, %d}, Q{%d, %d}, R{%d, %d}\n", P_vpid->volid, P_vpid->pageid, Q_vpid->volid,
9779  Q_vpid->pageid, R_vpid->volid, R_vpid->pageid);
9780  }
9781 
9782  p_level = btree_get_node_level (thread_p, P);
9783  assert (p_level > 2);
9784 
9785  q_level = btree_get_node_level (thread_p, Q);
9786  assert (q_level > 1);
9787 
9788  r_level = btree_get_node_level (thread_p, R);
9789  assert (r_level > 1);
9790 
9791  assert (q_level == r_level);
9792  assert (p_level == q_level + 1);
9793  assert (p_level == r_level + 1);
9794 
9795  btree_verify_node (thread_p, btid, P);
9796  btree_verify_node (thread_p, btid, Q);
9797  btree_verify_node (thread_p, btid, R);
9798 #endif
9799 
9800  /* initializations */
9801  recset_data = NULL;
9802 
9803  /* log the P record contents for undo purposes, if a crash happens the records of root page P will be inserted back.
9804  * There is no need for undo logs for pages Q and R, since they are not changed by this routine, because they will be
9805  * deallocated after a successful merge operation. There is also no need for redo logs for pages Q and R, since these
9806  * pages will be deallocated by the caller routine. */
9807 
9808  /* for recovery purposes */
9809  recset_data = PTR_ALIGN (recset_data_buf, BTREE_MAX_ALIGN);
9810  assert (recset_data != NULL);
9811 
9812  /* remove fence key for merge */
9813  left_cnt = btree_node_number_of_keys (thread_p, Q);
9814  right_cnt = btree_node_number_of_keys (thread_p, R);
9815 
9816  Q_end = left_cnt;
9817  if (left_cnt > 0)
9818  {
9819  /* read the last record to check upper fence_key */
9820  if (spage_get_record (thread_p, Q, left_cnt, &peek_rec, PEEK) != S_SUCCESS)
9821  {
9822  goto exit_on_error;
9823  }
9824 
9825  /* delete left page upper fence_key before merge */
9827  {
9828  assert_release (left_cnt >= 1);
9830  Q_end--;
9831  }
9832  }
9833 
9834  left_cnt = Q_end;
9835 
9836  R_start = 1;
9837  if (right_cnt > 0)
9838  {
9839  /* read the first record to check lower fence_key */
9840  if (spage_get_record (thread_p, R, 1, &peek_rec, PEEK) != S_SUCCESS)
9841  {
9842  goto exit_on_error;
9843  }
9844 
9845  /* delete right page lower fence_key before merge */
9847  {
9848  assert_release (right_cnt >= 1);
9850  R_start++;
9851  }
9852  }
9853 
9854  right_cnt = right_cnt - (R_start + 1);
9855 
9856  /* delete all records in P (should be just 2) */
9857 
9858  /* delete second record */
9859  ret = btree_delete_meta_record (thread_p, btid, P, 2);
9860  if (ret != NO_ERROR)
9861  {
9862  goto exit_on_error;
9863  }
9864 
9865  /* delete first record */
9866  ret = btree_delete_meta_record (thread_p, btid, P, 1);
9867  if (ret != NO_ERROR)
9868  {
9869  goto exit_on_error;
9870  }
9871 
9872  /* Log the page Q records for undo/redo purposes on page P. */
9873  recset_header.rec_cnt = left_cnt;
9874  recset_header.first_slotid = 1;
9875  ret = btree_rv_util_save_page_records (thread_p, Q, 1, Q_end, 1, recset_data, &recset_length);
9876  if (ret != NO_ERROR)
9877  {
9878  goto exit_on_error;
9879  }
9880 
9881  /* move content of the left page to the root page */
9882  for (i = 1; i <= Q_end; i++)
9883  {
9884  if (spage_get_record (thread_p, Q, i, &peek_rec, PEEK) != S_SUCCESS
9885  || spage_insert_at (thread_p, P, i, &peek_rec) != SP_SUCCESS)
9886  {
9887  if (i > 1)
9888  {
9889  recset_header.rec_cnt = i - 1;
9890  recset_header.first_slotid = 1;
9891  log_append_undo_data2 (thread_p, RVBT_INS_PGRECORDS, &btid->sys_btid->vfid, P, -1, sizeof (RECSET_HEADER),
9892  &recset_header);
9893  }
9894  goto exit_on_error;
9895  }
9896  } /* for */
9897 
9898  log_append_undoredo_data2 (thread_p, RVBT_INS_PGRECORDS, &btid->sys_btid->vfid, P, -1, sizeof (RECSET_HEADER),
9899  recset_length, &recset_header, recset_data);
9900 
9901  /* Mark page as deallocated by setting its level to -1. This should cover cases when leaf page is re-fixed and used
9902  * before its deallocation. See BTREE_IS_PAGE_VALID_LEAF. */
9903  addr.vfid = NULL;
9904  addr.pgptr = Q;
9905  addr.offset = 0;
9906  q_header = btree_get_node_header (thread_p, Q);
9907  assert (q_header != NULL);
9908  log_append_undo_data (thread_p, RVBT_MARK_DEALLOC_PAGE, &addr, sizeof (q_header->node_level), &q_header->node_level);
9909  q_header->node_level = -1;
9910  pgbuf_set_dirty (thread_p, Q, DONT_FREE);
9911 
9912  /* Log the page R records for undo purposes on page P. */
9913  right_cnt = btree_node_number_of_keys (thread_p, R);
9914 
9915  recset_header.rec_cnt = right_cnt;
9916  recset_header.first_slotid = left_cnt + 1;
9917 
9918  ret = btree_rv_util_save_page_records (thread_p, R, R_start, right_cnt, left_cnt + 1, recset_data, &recset_length);
9919  if (ret != NO_ERROR)
9920  {
9921  goto exit_on_error;
9922  }
9923 
9924  /* move content of the right page to the root page */
9925  assert (R_start > 0);
9926  for (i = R_start, j = 1; j <= right_cnt; i++, j++)
9927  {
9928  assert (left_cnt + j > 0);
9929  if (spage_get_record (thread_p, R, i, &peek_rec, PEEK) != S_SUCCESS
9930  || spage_insert_at (thread_p, P, left_cnt + j, &peek_rec) != SP_SUCCESS)
9931  {
9932  if (j > 1)
9933  {
9934  recset_header.rec_cnt = j - 1;
9935  recset_header.first_slotid = left_cnt + 1;
9936  log_append_undo_data2 (thread_p, RVBT_INS_PGRECORDS, &btid->sys_btid->vfid, P, -1, sizeof (RECSET_HEADER),
9937  &recset_header);
9938  }
9939  goto exit_on_error;
9940  }
9941  } /* for */
9942 
9943  log_append_undoredo_data2 (thread_p, RVBT_INS_PGRECORDS, &btid->sys_btid->vfid, P, -1, sizeof (RECSET_HEADER),
9944  recset_length, &recset_header, recset_data);
9945 
9946  /* Mark page as deallocated by setting its level to -1. This should cover cases when leaf page is re-fixed and used
9947  * before its deallocation. See BTREE_IS_PAGE_VALID_LEAF. */
9948  addr.vfid = NULL;
9949  addr.pgptr = R;
9950  addr.offset = 0;
9951  r_header = btree_get_node_header (thread_p, R);
9952  assert (r_header != NULL);
9953  log_append_undo_data (thread_p, RVBT_MARK_DEALLOC_PAGE, &addr, sizeof (r_header->node_level), &r_header->node_level);
9954  r_header->node_level = -1;
9955  pgbuf_set_dirty (thread_p, R, DONT_FREE);
9956 
9957  /* update root page */
9958  root_header = btree_get_root_header (thread_p, P);
9959  if (root_header == NULL)
9960  {
9961  goto exit_on_error;
9962  }
9963 
9964  btree_node_header_undo_log (thread_p, &btid->sys_btid->vfid, P);
9965 
9966  VPID_SET_NULL (&root_header->node.prev_vpid);
9967  VPID_SET_NULL (&root_header->node.next_vpid);
9969  root_header->node.node_level--;
9970  assert_release (root_header->node.node_level > 1);
9971 
9972  btree_node_header_redo_log (thread_p, &btid->sys_btid->vfid, P);
9973 
9974 #if !defined(NDEBUG)
9975  {
9976  BTREE_NODE_HEADER *qheader = NULL, *rheader = NULL;
9977 
9978  qheader = btree_get_node_header (thread_p, Q);
9979  assert (qheader != NULL);
9980 
9981  rheader = btree_get_node_header (thread_p, R);
9982  assert (rheader != NULL);
9983 
9984  assert (root_header->node.max_key_len == MAX (qheader->max_key_len, rheader->max_key_len));
9985  }
9986 #endif
9987 
9988  pgbuf_set_dirty (thread_p, P, DONT_FREE);
9989 
9991 
9992 #if !defined(NDEBUG)
9993  btree_verify_node (thread_p, btid, P);
9994 #endif
9995 
9996  return ret;
9997 
9998 exit_on_error:
9999 
10000  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
10001 }
10002 
10003 /*
10004  * btree_merge_node () -
10005  * return: NO_ERROR
10006  * btid(in): The B+tree index identifier
10007  * P(in): Page pointer for the parent page of page Q
10008  * Q(in): Page pointer for the child page of P that will be merged
10009  * R(in): Page pointer for the left or right sibling of page Q
10010  * next_page(in):
10011  * P_vpid(in): Page identifier for page P
10012  * Q_vpid(in): Page identifier for page Q
10013  * R_vpid(in): Page identifier for page R
10014  * p_slot_id(in): The slot of parent page P which points page to be merged (right page)
10015  * child_vpid(in): Child page identifier to be followed, Q or R.
10016  *
10017  * Note: Page Q is merged with page R which may be its left or right sibling. Depending on the efficiency of
10018  * the merge operation the merge operation may take place on Page Q or on page R to reduce the size of the data
10019  * that will moved. After the merge operation either page Q or page R becomes ready for deallocation.
10020  * Deallocation is left to the calling routine.
10021  *
10022  * Note: The page which will be deallocated by the caller after a successful merge operation is not changed
10023  * by this routine.
10024  */
10025 static int
10026 btree_merge_node (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR P, PAGE_PTR left_pg, PAGE_PTR right_pg,
10027  INT16 p_slot_id, VPID * child_vpid, BTREE_MERGE_STATUS status)
10028 {
10029  int left_cnt, right_cnt;
10030  int i, ret = NO_ERROR;
10031  VPID *left_vpid = pgbuf_get_vpid_ptr (left_pg);
10032 
10033  /* record decoding */
10034  RECDES peek_rec;
10035  NON_LEAF_REC nleaf_pnt;
10036  LEAF_REC leaf_pnt;
10037  int offset;
10038 
10039  /* recovery */
10040  LOG_DATA_ADDR addr;
10041  char *recset_data; /* for recovery purposes */
10042  int recset_length; /* for recovery purposes */
10043  char recset_data_buf[IO_MAX_PAGE_SIZE + MAX_ALIGNMENT];
10044 
10045  /* merge & recompress buff */
10046  int left_prefix, right_prefix;
10047  int left_used, right_used, total_size;
10048  RECDES rec[MAX_LEAF_REC_NUM];
10049  int rec_idx;
10050  char merge_buf[(IO_MAX_PAGE_SIZE * 4) + MAX_ALIGNMENT];
10051  char *merge_buf_ptr = merge_buf;
10052  int merge_buf_size = sizeof (merge_buf);
10053  int merge_idx = 0;
10054 
10055  PGSLOTID sp_id;
10056 
10057  PAGE_PTR next_page_after_merged = NULL;
10058 
10059  BTREE_NODE_HEADER *left_header = NULL;
10060  BTREE_NODE_HEADER *right_header = NULL;
10061 
10062  /* Remember first and last slot id's for non-fence records in both left and right nodes. */
10063  int left_first_non_fence_slotid = NULL_SLOTID;
10064  int left_last_non_fence_slotid = NULL_SLOTID;
10065  int right_first_non_fence_slotid = NULL_SLOTID;
10066  int right_last_non_fence_slotid = NULL_SLOTID;
10067 
10068  DB_VALUE left_fence_key;
10069  DB_VALUE right_fence_key;
10070  bool left_fence_key_clear = false;
10071  bool right_fence_key_clear = false;
10072 
10073  bool merged_has_lower_fence = false;
10074  bool merged_has_upper_fence = false;
10075  RECDES merged_upper_fence_record;
10076  char merged_upper_fence_record_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
10077  int merged_prefix = 0;
10078 
10079 #if !defined(NDEBUG)
10081  {
10082  VPID *P_vpid = pgbuf_get_vpid_ptr (P);
10083  VPID *right_vpid = pgbuf_get_vpid_ptr (right_pg);
10084 
10085  printf ("btree_merge_node: P{%d, %d}, Q{%d, %d}, R{%d, %d}\n", P_vpid->volid, P_vpid->pageid, left_vpid->volid,
10086  left_vpid->pageid, right_vpid->volid, right_vpid->pageid);
10087  }
10088 #endif
10089 
10090 #if !defined(NDEBUG)
10091  btree_verify_node (thread_p, btid, P);
10092  btree_verify_node (thread_p, btid, left_pg);
10093 #endif
10094 
10095  /***********************************************************
10096  *** Merging two b-tree nodes (leaf or non-leaf).
10097  *** All records from right page must be moved to the left page.
10098  *** Next link in merged page and previous link in next page must be
10099  *** updated.
10100  *** If leaf nodes are merged and if they have fences, records may need
10101  *** to be decompressed and compressed again.
10102  ***********************************************************/
10103 
10104  /***********************************************************
10105  *** STEP 0: initializations, save undo image of left
10106  *** calculate uncompress size & memory alloc
10107  ***********************************************************/
10108  /* initializations */
10109  VPID_SET_NULL (child_vpid);
10110  recset_data = PTR_ALIGN (recset_data_buf, MAX_ALIGNMENT);
10111 
10112  left_cnt = btree_node_number_of_keys (thread_p, left_pg);
10113  right_cnt = btree_node_number_of_keys (thread_p, right_pg);
10114 
10115  left_header = btree_get_node_header (thread_p, left_pg);
10116  right_header = btree_get_node_header (thread_p, right_pg);
10117  assert (left_header != NULL && right_header != NULL);
10118 
10119  btree_init_temp_key_value (&left_fence_key_clear, &left_fence_key);
10120  btree_init_temp_key_value (&right_fence_key_clear, &right_fence_key);
10121 
10122  left_used = btree_node_size_uncompressed (thread_p, btid, left_pg);
10123  if (left_used < 0)
10124  {
10125  ASSERT_ERROR ();
10126  return left_used;
10127  }
10128  left_prefix = btree_node_common_prefix (thread_p, btid, left_pg);
10129  if (left_prefix < 0)
10130  {
10131  ASSERT_ERROR ();
10132  return left_prefix;
10133  }
10134 
10135  right_used = btree_node_size_uncompressed (thread_p, btid, right_pg);
10136  if (right_used < 0)
10137  {
10138  ASSERT_ERROR ();
10139  return right_used;
10140  }
10141  right_prefix = btree_node_common_prefix (thread_p, btid, right_pg);
10142  if (right_prefix < 0)
10143  {
10144  ASSERT_ERROR ();
10145  return right_prefix;
10146  }
10147 
10148  total_size = left_used + right_used + MAX_MERGE_ALIGN_WASTE;
10149  if (total_size > (int) sizeof (merge_buf))
10150  {
10151  merge_buf_size = total_size;
10152  merge_buf_ptr = (char *) db_private_alloc (thread_p, merge_buf_size);
10153  }
10154 
10155  /***********************************************************
10156  *** STEP 1: check current fences and new fences
10157  ***********************************************************/
10158  /* Handle left page fences. NOTE: If left page has only one record and that is fence, it is considered upper fence. */
10159  /* Left lower fence. */
10160  if (left_cnt >= 2 && btree_is_fence_key (left_pg, 1))
10161  {
10162  assert (left_header->node_level == 1);
10163  assert (!VPID_ISNULL (&left_header->prev_vpid));
10164 
10165  /* Non-fence records start from index 2. */
10166  left_first_non_fence_slotid = 2;
10167 
10168  /* This is used as lower fence for merged page. */
10169  merged_has_lower_fence = true;
10170 
10171  /* Lower fence will just be kept in left page. */
10172  /* Read lower fence value. */
10173  if (spage_get_record (thread_p, left_pg, 1, &peek_rec, PEEK) != S_SUCCESS)
10174  {
10175  assert_release (false);
10176  ret = ER_FAILED;
10177  goto exit_on_error;
10178  }
10179  ret =
10180  btree_read_record_without_decompression (thread_p, btid, &peek_rec, &left_fence_key, &leaf_pnt, BTREE_LEAF_NODE,
10181  &left_fence_key_clear, &offset, PEEK_KEY_VALUE);
10182  if (ret != NO_ERROR)
10183  {
10184  ASSERT_ERROR ();
10185  goto exit_on_error;
10186  }
10187  }
10188  else
10189  {
10190  /* No left lower fence. */
10191  left_first_non_fence_slotid = 1;
10192  }
10193 
10194  /* Left upper fence - it must be discarded */
10195  left_last_non_fence_slotid = (left_cnt >= 1 && btree_is_fence_key (left_pg, left_cnt)) ? (left_cnt - 1) : left_cnt;
10196 
10197  /* Handle right page fences. NOTE: If right page has only one record and that is fence, it is considered lower fence. */
10198  /* Right lower fence - it must be discarded */
10199  right_first_non_fence_slotid = (right_cnt >= 1 && btree_is_fence_key (right_pg, 1)) ? 2 : 1;
10200 
10201  /* Right upper fence. */
10202  if (right_cnt >= 2 && btree_is_fence_key (right_pg, right_cnt))
10203  {
10204  assert (right_header->node_level == 1);
10205  assert (!VPID_ISNULL (&right_header->next_vpid));
10206 
10207  /* Non-fence records stop before this. */
10208  right_last_non_fence_slotid = right_cnt - 1;
10209 
10210  /* This will be upper fence for merged page too. */
10211  merged_has_upper_fence = true;
10212 
10213  /* Copy fence record from right page (to be later moved to left page). */
10214  merged_upper_fence_record.area_size = DB_PAGESIZE;
10215  merged_upper_fence_record.data = PTR_ALIGN (merged_upper_fence_record_buffer, BTREE_MAX_ALIGN);
10216  if (spage_get_record (thread_p, right_pg, right_cnt, &merged_upper_fence_record, COPY) != S_SUCCESS)
10217  {
10218  assert_release (false);
10219  ret = ER_FAILED;
10220  goto exit_on_error;
10221  }
10222  ret =
10223  btree_read_record_without_decompression (thread_p, btid, &merged_upper_fence_record, &right_fence_key,
10224  &leaf_pnt, BTREE_LEAF_NODE, &right_fence_key_clear, &offset,
10225  PEEK_KEY_VALUE);
10226  if (ret != NO_ERROR)
10227  {
10228  ASSERT_ERROR ();
10229  goto exit_on_error;
10230  }
10231  }
10232  else
10233  {
10234  right_last_non_fence_slotid = right_cnt;
10235  }
10236 
10237  /* Merged page prefix. */
10238  /* Prefix after merge cannot be better than left or right page prefix. */
10239  if (TP_DOMAIN_TYPE (btid->key_type) == DB_TYPE_MIDXKEY && merged_has_lower_fence && merged_has_upper_fence)
10240  {
10241  /* Get common prefix of left_fence_key and right_fence_key. */
10242  assert (!DB_IS_NULL (&left_fence_key));
10243  assert (!DB_IS_NULL (&right_fence_key));
10244  merged_prefix = pr_midxkey_common_prefix (&left_fence_key, &right_fence_key);
10245  }
10246 
10247  /***********************************************************
10248  *** STEP 2: copy (or keep) left.
10249  *** Copy if common prefix has changed (to compress records
10250  *** again).
10251  *** Keep records as they are if common prefix didn't change.
10252  ***********************************************************/
10253  rec_idx = -1;
10254  merge_idx = 0;
10255 
10256 /* NEXT_MERGE_RECORD - advance in rec array and prepare new record descriptor
10257  */
10258 #define NEXT_MERGE_RECORD() \
10259  do \
10260  { \
10261  /* If not first record, add last record length to merge_idx. */ \
10262  merge_idx += (rec_idx >= 0) ? rec[rec_idx].length : 0; \
10263  /* Increment record index. */ \
10264  rec_idx++; \
10265  /* Set aligned record data pointer. */ \
10266  rec[rec_idx].data = \
10267  PTR_ALIGN (&merge_buf_ptr[merge_idx], BTREE_MAX_ALIGN); \
10268  /* Update merge_idx. */ \
10269  merge_idx = CAST_BUFLEN (rec[rec_idx].data - merge_buf_ptr); \
10270  /* Set area size. */ \
10271  rec[rec_idx].area_size = merge_buf_size - merge_idx; \
10272  rec[rec_idx].type = REC_HOME; \
10273  } \
10274  while (false)
10275 
10276  if (merged_prefix != left_prefix)
10277  {
10278  /* Left page records must be recompressed. */
10279  /* Copy current records in rec array, update them considering new fences and we will later add them back in page. */
10280  for (i = left_first_non_fence_slotid; i <= left_last_non_fence_slotid; i++)
10281  {
10282  NEXT_MERGE_RECORD ();
10283 
10284  assert (!btree_is_fence_key (left_pg, i));
10285  if (spage_get_record (thread_p, left_pg, i, &rec[rec_idx], COPY) != S_SUCCESS)
10286  {
10287  assert_release (false);
10288  ret = ER_FAILED;
10289  goto exit_on_error;
10290  }
10291  ret = btree_recompress_record (thread_p, btid, &rec[rec_idx], &left_fence_key, left_prefix, merged_prefix);
10292  if (ret != NO_ERROR)
10293  {
10294  ASSERT_ERROR ();
10295  goto exit_on_error;
10296  }
10297  }
10298  }
10299 
10300  /* Left fence key no longer required. */
10301  btree_clear_key_value (&left_fence_key_clear, &left_fence_key);
10302 
10303  /***********************************************************
10304  *** STEP 3: copy right page.
10305  ***********************************************************/
10306  for (i = right_first_non_fence_slotid; i <= right_last_non_fence_slotid; i++)
10307  {
10308  NEXT_MERGE_RECORD ();
10309 
10310  assert (!btree_is_fence_key (right_pg, i));
10311  if (spage_get_record (thread_p, right_pg, i, &rec[rec_idx], COPY) != S_SUCCESS)
10312  {
10313  assert_release (false);
10314  ret = ER_FAILED;
10315  goto exit_on_error;
10316  }
10317  ret = btree_recompress_record (thread_p, btid, &rec[rec_idx], &right_fence_key, right_prefix, merged_prefix);
10318  if (ret != NO_ERROR)
10319  {
10320  ASSERT_ERROR ();
10321  goto exit_on_error;
10322  }
10323  }
10324 
10325  /* Right fence key no longer required. */
10326  btree_clear_key_value (&right_fence_key_clear, &right_fence_key);
10327 
10328  /* Increment rec_idx one last time. */
10329  rec_idx++;
10330 
10331 /* NEXT_MERGE_RECORD definition no longer required. */
10332 #undef NEXT_MERGE_RECORD
10333 
10334 
10335  /***********************************************************
10336  *** STEP 4: Save left page (undo logging) before changing it.
10337  ***********************************************************/
10338  /* add undo logging for left_pg */
10339  log_append_undo_data2 (thread_p, RVBT_COPYPAGE, &btid->sys_btid->vfid, left_pg, -1, DB_PAGESIZE, left_pg);
10340 
10341  /***********************************************************
10342  *** STEP 5: remove records from left page.
10343  ***********************************************************/
10344  /* Remove upper fence (if there is one). */
10345  if (left_cnt > left_last_non_fence_slotid)
10346  {
10347  assert (left_cnt == left_last_non_fence_slotid + 1);
10348  assert (btree_is_fence_key (left_pg, left_cnt));
10349  if (spage_delete (thread_p, left_pg, left_cnt) != left_cnt)
10350  {
10351  assert_release (false);
10352  ret = ER_FAILED;
10353  goto exit_on_error;
10354  }
10355  }
10356  /* Remove non-fence records. */
10357  if (merged_prefix != left_prefix)
10358  {
10359  /* All records from left have changed. Remove existing records. Keep lower fence key. */
10360  for (i = left_last_non_fence_slotid; i >= left_first_non_fence_slotid; i--)
10361  {
10362  assert (i > 0);
10363  assert (!btree_is_fence_key (left_pg, i));
10364  if (spage_delete (thread_p, left_pg, i) != i)
10365  {
10366  assert_release (false);
10367  ret = ER_FAILED;
10368  goto exit_on_error;
10369  }
10370  }
10371  }
10372  else
10373  {
10374  /* Records need no changes. They have not been saved in rec array. They are kept as they are. */
10375  }
10376 
10377  /***********************************************************
10378  *** STEP 6: append rec array.
10379  ***********************************************************/
10380  for (i = 0; i < rec_idx; i++)
10381  {
10382  if (spage_insert (thread_p, left_pg, &rec[i], &sp_id) != SP_SUCCESS)
10383  {
10384  assert_release (false);
10385  ret = ER_FAILED;
10386  goto exit_on_error;
10387  }
10388 
10389  assert (sp_id > 0);
10390  }
10391 
10392  /***********************************************************
10393  *** STEP 7: append upper fence.
10394  ***********************************************************/
10395  if (merged_has_upper_fence)
10396  {
10397  if (spage_insert (thread_p, left_pg, &merged_upper_fence_record, &sp_id) != SP_SUCCESS)
10398  {
10399  assert_release (false);
10400  ret = ER_FAILED;
10401  goto exit_on_error;
10402  }
10403  }
10404 
10405  /***********************************************************
10406  *** STEP 8: update child link of page P
10407  ***********************************************************/
10408  /* get and log the old node record to be deleted for undo purposes */
10409  assert (p_slot_id > 0);
10410  if (spage_get_record (thread_p, P, p_slot_id, &peek_rec, PEEK) != S_SUCCESS)
10411  {
10412  assert_release (false);
10413  ret = ER_FAILED;
10414  goto exit_on_error;
10415  }
10416 
10417  btree_read_fixed_portion_of_non_leaf_record (&peek_rec, &nleaf_pnt);
10418 
10419  if (nleaf_pnt.key_len < 0)
10420  { /* overflow key */
10421  /* get the overflow manager to delete the key */
10422  ret = btree_delete_overflow_key (thread_p, btid, P, p_slot_id, BTREE_NON_LEAF_NODE);
10423  if (ret != NO_ERROR)
10424  {
10425  assert_release (false);
10426  goto exit_on_error;
10427  }
10428  }
10429 
10430  btree_rv_write_log_record (recset_data, &recset_length, &peek_rec, BTREE_NON_LEAF_NODE);
10431  log_append_undoredo_data2 (thread_p, RVBT_NDRECORD_DEL, &btid->sys_btid->vfid, P, p_slot_id, recset_length,
10432  sizeof (p_slot_id), recset_data, &p_slot_id);
10434 
10435  assert (p_slot_id > 0);
10436  if (spage_delete (thread_p, P, p_slot_id) != p_slot_id)
10437  {
10438  assert_release (false);
10439  ret = ER_FAILED;
10440  goto exit_on_error;
10441  }
10442 
10443  pgbuf_set_dirty (thread_p, P, DONT_FREE);
10444 
10445  *child_vpid = *left_vpid;
10446 
10447  /***********************************************************
10448  *** STEP 9: update left page header info
10449  *** write redo log for left
10450  ***********************************************************/
10451  VPID_COPY (&left_header->next_vpid, &right_header->next_vpid);
10452  left_header->max_key_len = MAX (left_header->max_key_len, right_header->max_key_len);
10453  btree_write_default_split_info (&(left_header->split_info));
10454 
10455  /***********************************************************
10456  *** STEP 10: log (redo) changes of left page.
10457  ***********************************************************/
10458  /* add redo logging for left_pg */
10459  log_append_redo_data2 (thread_p, RVBT_COPYPAGE, &btid->sys_btid->vfid, left_pg, -1, DB_PAGESIZE, left_pg);
10460  pgbuf_set_dirty (thread_p, left_pg, DONT_FREE);
10461 
10462  /***********************************************************
10463  *** STEP 11: Mark page as deallocated by setting its level to -1.
10464  *** This should cover cases when leaf page is re-fixed and used
10465  *** before its deallocation.
10466  *** See BTREE_IS_PAGE_VALID_LEAF.
10467  ***********************************************************/
10468  addr.vfid = NULL;
10469  addr.pgptr = right_pg;
10470  addr.offset = 0;
10471  assert (right_header != NULL);
10472  log_append_undo_data (thread_p, RVBT_MARK_DEALLOC_PAGE, &addr, sizeof (right_header->node_level),
10473  &right_header->node_level);
10474  right_header->node_level = -1;
10475  pgbuf_set_dirty (thread_p, right_pg, DONT_FREE);
10476 
10478 
10479 #if !defined(NDEBUG)
10480  btree_verify_node (thread_p, btid, P);
10481  btree_verify_node (thread_p, btid, left_pg);
10482 #endif
10483 
10484  /***********************************************************
10485  *** STEP 12: update link for next leaf node after the
10486  *** merged nodes to point to the left page.
10487  ***********************************************************/
10488  next_page_after_merged = btree_get_next_page (thread_p, left_pg);
10489  if (next_page_after_merged != NULL)
10490  {
10491  /* Update previous link. */
10492 #if !defined (NDEBUG)
10493  (void) pgbuf_check_page_ptype (thread_p, next_page_after_merged, PAGE_BTREE);
10494 #endif /* !NDEBUG */
10495  ret = btree_set_vpid_previous_vpid (thread_p, btid, next_page_after_merged, left_vpid);
10496  pgbuf_unfix_and_init (thread_p, next_page_after_merged);
10497  if (ret != NO_ERROR)
10498  {
10499  ASSERT_ERROR ();
10500  goto exit_on_error;
10501  }
10502  }
10503 
10504  if (merge_buf_ptr != merge_buf)
10505  {
10506  db_private_free_and_init (thread_p, merge_buf_ptr);
10507  }
10508 
10509  /* Success. */
10510  return NO_ERROR;
10511 
10512 exit_on_error:
10513 
10514  assert_release (ret != NO_ERROR);
10515 
10516  if (merge_buf_ptr != merge_buf)
10517  {
10518  db_private_free_and_init (thread_p, merge_buf_ptr);
10519  }
10520 
10521  btree_clear_key_value (&left_fence_key_clear, &left_fence_key);
10522  btree_clear_key_value (&right_fence_key_clear, &right_fence_key);
10523 
10524  return ret;
10525 }
10526 
10527 /*
10528  * btree_node_size_uncompressed -
10529  *
10530  * return:
10531  * thread_p(in):
10532  * btid(in):
10533  * page_ptr(in):
10534  *
10535  */
10536 static int
10538 {
10539  int used_size, key_cnt = 0, prefix, prefix_len, offset;
10540  RECDES rec;
10541  DB_VALUE key;
10542  bool clear_key = false;
10543  DB_MIDXKEY *midx_key = NULL;
10544  LEAF_REC leaf_pnt;
10545  int error;
10546 
10547  btree_init_temp_key_value (&clear_key, &key);
10548 
10549  used_size = DB_PAGESIZE - spage_get_free_space (thread_p, page_ptr);
10550 
10551  prefix = btree_node_common_prefix (thread_p, btid, page_ptr);
10552  if (prefix > 0)
10553  {
10554 #if !defined(NDEBUG)
10555  BTREE_NODE_HEADER *header = NULL;
10556 
10557  header = btree_get_node_header (thread_p, page_ptr);
10558 
10559  assert (header != NULL);
10560  assert (header->node_level == 1); /* BTREE_LEAF_NODE */
10561 #endif
10562 
10563  error = spage_get_record (thread_p, page_ptr, 1, &rec, PEEK);
10564  if (error != S_SUCCESS)
10565  {
10566  assert (false);
10567  return error;
10568  }
10569 
10570  error =
10571  btree_read_record_without_decompression (thread_p, btid, &rec, &key, &leaf_pnt, BTREE_LEAF_NODE, &clear_key,
10572  &offset, PEEK_KEY_VALUE);
10573  if (error != NO_ERROR)
10574  {
10575  return error;
10576  }
10577 
10580  assert (DB_VALUE_TYPE (&key) == DB_TYPE_MIDXKEY);
10581 
10582  midx_key = db_get_midxkey (&key);
10583 
10584  btree_clear_key_value (&clear_key, &key);
10585 
10586  prefix_len = pr_midxkey_get_element_offset (midx_key, prefix);
10587 
10588  /* at here, we can not calculate aligned size of uncompressed rec. alignment is already included in
10589  * CAN_MERGE_WHEN_EMPTY */
10590  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
10591  used_size += (key_cnt - 2) * prefix_len;
10592  }
10593  else if (prefix < 0)
10594  {
10595  return prefix;
10596  }
10597 
10598  return used_size;
10599 }
10600 
10601 /*
10602  * btree_node_mergeable -
10603  *
10604  * return:
10605  * thread_p(in):
10606  * btid(in):
10607  * L_page(in):
10608  * R_page(in):
10609  *
10610  */
10611 static BTREE_MERGE_STATUS
10612 btree_node_mergeable (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR L_page, PAGE_PTR R_page)
10613 {
10614  BTREE_NODE_HEADER *l_header = NULL, *r_header = NULL;
10615  BTREE_NODE_TYPE l_node_type, r_node_type;
10616  int L_used, R_used, L_cnt, R_cnt;
10617  int L_non_fence_cnt = 0, R_non_fence_cnt = 0;
10618 
10619  /* case 1 : one of page is empty. a page is considered empty if it has no keys or only fence keys. merge will be
10620  * forced. */
10621 
10622  L_cnt = btree_node_number_of_keys (thread_p, L_page);
10623  R_cnt = btree_node_number_of_keys (thread_p, R_page);
10624 
10625  if (L_cnt == 0)
10626  {
10627  /* Left page is completely empty. */
10628  return BTREE_MERGE_FORCE;
10629  }
10630  L_non_fence_cnt = L_cnt;
10631  if (btree_is_fence_key (L_page, 1))
10632  {
10633  L_non_fence_cnt--;
10634  }
10635  if (L_cnt > 1 && btree_is_fence_key (L_page, L_cnt))
10636  {
10637  L_non_fence_cnt--;
10638  }
10639  if (L_non_fence_cnt == 0)
10640  {
10641  /* Left page has only fence keys. If uncompressed right page fits a page, we should merge pages. */
10642  R_used = btree_node_size_uncompressed (thread_p, btid, R_page);
10643  if (R_used + FORCE_MERGE_WHEN_EMPTY < DB_PAGESIZE)
10644  {
10645  return BTREE_MERGE_FORCE;
10646  }
10647  else if (R_used + CAN_MERGE_WHEN_EMPTY < DB_PAGESIZE)
10648  {
10649  return BTREE_MERGE_TRY;
10650  }
10651  /* Uncompressed right page is too big. */
10652  return BTREE_MERGE_NO;
10653  }
10654 
10655  if (R_cnt == 0)
10656  {
10657  /* Right page is completely empty. */
10658  return BTREE_MERGE_FORCE;
10659  }
10660  R_non_fence_cnt = R_cnt;
10661  if (btree_is_fence_key (R_page, 1))
10662  {
10663  R_non_fence_cnt--;
10664  }
10665  if (R_cnt > 1 && btree_is_fence_key (R_page, R_cnt))
10666  {
10667  R_non_fence_cnt--;
10668  }
10669  if (R_non_fence_cnt == 0)
10670  {
10671  /* Right page has only fence keys. If uncompressed left page fits a page, we should merge pages. */
10672  L_used = btree_node_size_uncompressed (thread_p, btid, L_page);
10673  if (L_used + FORCE_MERGE_WHEN_EMPTY < DB_PAGESIZE)
10674  {
10675  return BTREE_MERGE_FORCE;
10676  }
10677  else if (L_used + CAN_MERGE_WHEN_EMPTY < DB_PAGESIZE)
10678  {
10679  return BTREE_MERGE_TRY;
10680  }
10681  /* Uncompressed right page is too big. */
10682  return BTREE_MERGE_NO;
10683  }
10684 
10685  /* case 2: each page has only one key. merge will be forced. */
10686 
10687  if (L_non_fence_cnt == 1 && R_non_fence_cnt == 1)
10688  {
10689  return BTREE_MERGE_FORCE;
10690  }
10691 
10692  /* case 3 : size */
10693 
10694  l_header = btree_get_node_header (thread_p, L_page);
10695  if (l_header == NULL)
10696  {
10697  assert_release (false);
10698  return BTREE_MERGE_NO;
10699  }
10700 
10701  r_header = btree_get_node_header (thread_p, R_page);
10702  if (r_header == NULL)
10703  {
10704  assert_release (false);
10705  return BTREE_MERGE_NO;
10706  }
10707 
10708  l_node_type = (l_header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
10709  r_node_type = (r_header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
10710 
10711  assert (l_node_type == r_node_type);
10712 
10713  L_used = DB_PAGESIZE - spage_get_free_space (thread_p, L_page);
10714  R_used = DB_PAGESIZE - spage_get_free_space (thread_p, R_page);
10715  if (L_used + R_used + CAN_MERGE_WHEN_EMPTY < DB_PAGESIZE)
10716  {
10717  /* check uncompressed size */
10718  if (l_node_type == BTREE_LEAF_NODE)
10719  {
10720  /* recalculate uncompressed left size */
10721  L_used = btree_node_size_uncompressed (thread_p, btid, L_page);
10722  if (L_used < 0)
10723  {
10724  return BTREE_MERGE_NO;
10725  }
10726 
10727  if (L_used + R_used + CAN_MERGE_WHEN_EMPTY >= DB_PAGESIZE)
10728  {
10729  return BTREE_MERGE_NO;
10730  }
10731 
10732  /* recalculate uncompressed right size */
10733  R_used = btree_node_size_uncompressed (thread_p, btid, R_page);
10734 
10735  if (L_used + R_used + CAN_MERGE_WHEN_EMPTY >= DB_PAGESIZE)
10736  {
10737  /* can recalculate used size after recompress with new fence key (can return true in some cases) but
10738  * split and merge will be done more frequently (trade off of space and SMO) */
10739  return BTREE_MERGE_NO;
10740  }
10741  }
10742 
10743  if (L_used + R_used + FORCE_MERGE_WHEN_EMPTY < DB_PAGESIZE)
10744  {
10745  /* Merge must be executed. */
10746  return BTREE_MERGE_FORCE;
10747  }
10748 
10749  /* Merge can be executed. If promotions fail, it will be skipped. */
10750  return BTREE_MERGE_TRY;
10751  }
10752 
10753  return BTREE_MERGE_NO;
10754 }
10755 
10756 /*
10757  * btree_key_append_object_as_new_overflow () - Insert object into a new overflow page.
10758  *
10759  * return : Error code.
10760  * thread_p (in) : Thread entry.
10761  * btid_int (in) : B-tree info.
10762  * leaf_page (in) : Leaf page.
10763  * object_info (in) : Object & info.
10764  * insert_helper (in) : B-tree insert helper.
10765  * search_key (in) : Search key result.
10766  * leaf_rec (in) : Leaf record.
10767  * first_ovfl_vpid (in) : VPID of first overflow.
10768  */
10769 static int
10772  BTREE_SEARCH_KEY_HELPER * search_key, RECDES * leaf_rec,
10773  VPID * first_ovfl_vpid)
10774 {
10775  int ret = NO_ERROR;
10776  VPID ovfl_vpid;
10777  PAGE_PTR ovfl_page = NULL;
10778  char rv_undo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
10779  char *rv_undo_data = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
10780  char *rv_undo_data_ptr = NULL;
10781  int rv_undo_data_length = 0;
10782  int rv_redo_data_length = 0;
10783  bool save_sysop_started = false;
10784 
10785  LOG_LSA prev_lsa;
10786 
10787  /* Assert expected arguments. */
10788  assert (btid_int != NULL);
10789  assert (leaf_page != NULL);
10790  assert (object_info != NULL);
10791  assert (insert_helper != NULL);
10792  assert (search_key != NULL);
10793  assert (leaf_rec != NULL);
10794  assert (first_ovfl_vpid != NULL);
10795  assert (insert_helper->rv_redo_data != NULL && insert_helper->rv_redo_data_ptr != NULL);
10796  assert (btree_is_insert_object_purpose (insert_helper->purpose));
10797 
10798  save_sysop_started = insert_helper->is_system_op_started;
10799  if (!insert_helper->is_system_op_started)
10800  {
10801  log_sysop_start (thread_p);
10802  insert_helper->is_system_op_started = true;
10803  }
10805  rv_undo_data_ptr = rv_undo_data;
10806 
10807  /* Create overflow page. */
10808  /* Note that this page may be leaked if server crashes before changing the link in leaf page. */
10809  ret =
10810  btree_start_overflow_page (thread_p, btid_int, object_info, first_ovfl_vpid, pgbuf_get_vpid_ptr (leaf_page),
10811  &ovfl_vpid, &ovfl_page);
10812  if (ret != NO_ERROR)
10813  {
10814  ASSERT_ERROR ();
10815  goto error;
10816  }
10817 
10818 #if !defined (NDEBUG)
10819  BTREE_RV_UNDOREDO_SET_DEBUG_INFO (&insert_helper->leaf_addr, insert_helper->rv_redo_data_ptr, rv_undo_data_ptr,
10820  btid_int, BTREE_RV_DEBUG_ID_INS_NEW_OVF);
10821 #endif /* !NDEBUG */
10823 
10824  /* Update link in leaf record. */
10825  btree_leaf_record_change_overflow_link (thread_p, btid_int, leaf_rec, &ovfl_vpid, &rv_undo_data_ptr,
10826  &insert_helper->rv_redo_data_ptr);
10827 
10828  /* Update record in page. */
10829  if (spage_update (thread_p, leaf_page, search_key->slotid, leaf_rec) != SP_SUCCESS)
10830  {
10831  assert_release (false);
10832  ret = ER_FAILED;
10833  goto error;
10834  }
10835 
10836  LSA_COPY (&prev_lsa, pgbuf_get_lsa (leaf_page));
10837 
10838  /* Logging changes on leaf. */
10839  BTREE_RV_GET_DATA_LENGTH (insert_helper->rv_redo_data_ptr, insert_helper->rv_redo_data, rv_redo_data_length);
10840  assert (rv_undo_data_ptr != NULL);
10841  /* Undo redo the operation. */
10842  /* Undo is physical in this case. */
10843  BTREE_RV_GET_DATA_LENGTH (rv_undo_data_ptr, rv_undo_data, rv_undo_data_length);
10844  log_append_undoredo_data (thread_p, RVBT_RECORD_MODIFY_UNDOREDO, &insert_helper->leaf_addr, rv_undo_data_length,
10845  rv_redo_data_length, rv_undo_data, insert_helper->rv_redo_data);
10846 
10847  if (!save_sysop_started)
10848  {
10849  /* End system operation. */
10850  btree_insert_sysop_end (thread_p, insert_helper);
10851  }
10852 
10853  btree_insert_log (insert_helper,
10854  BTREE_INSERT_MODIFY_MSG ("create new overflow") "\t" PGBUF_PAGE_STATE_MSG ("new overflow page"),
10855  BTREE_INSERT_MODIFY_ARGS (thread_p, insert_helper, insert_helper->leaf_addr.pgptr, &prev_lsa, true,
10856  search_key->slotid, leaf_rec->length, btid_int->sys_btid),
10857  PGBUF_PAGE_STATE_ARGS (ovfl_page));
10858 
10859  /* Mark pages dirty and free overflow page. */
10860  pgbuf_set_dirty (thread_p, ovfl_page, FREE);
10861  pgbuf_set_dirty (thread_p, leaf_page, DONT_FREE);
10862 
10863  return NO_ERROR;
10864 
10865 error:
10866  if (!save_sysop_started && insert_helper->is_system_op_started)
10867  {
10868  /* This might be a problem since compensate was not successfully executed. */
10870  log_sysop_abort (thread_p);
10871  insert_helper->is_system_op_started = false;
10872  }
10873  if (ovfl_page != NULL)
10874  {
10875  pgbuf_unfix_and_init (thread_p, ovfl_page);
10876  }
10877  return ret;
10878 }
10879 
10880 /*
10881  * btree_key_append_object_to_overflow () - Insert object in an existing overflow page. The page must be
10882  * checked for free space before calling this function.
10883  *
10884  * return : Error code.
10885  * thread_p (in) : Thread entry.
10886  * btid_int (in) : B-tree info.
10887  * ovfl_page (in) : Overflow page.
10888  * object_info (in) : Object & info to insert.
10889  * insert_helper (in) : B-tree insert helper.
10890  */
10891 static int
10894 {
10895  RECDES ovfl_rec;
10896  char ovfl_rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
10897  LOG_DATA_ADDR addr;
10898 
10899  LOG_LSA prev_lsa;
10900 
10901  /* Recovery. */
10902  char rv_undo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
10903  char *rv_undo_data = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
10904  char *rv_undo_data_ptr = NULL;
10905  int rv_undo_data_length = 0;
10906  char rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
10907  char *rv_redo_data = PTR_ALIGN (rv_redo_data_buffer, BTREE_MAX_ALIGN);
10909  int rv_redo_data_length;
10910 
10911  /* Assert expected arguments. */
10912  assert (btid_int != NULL);
10913  assert (ovfl_page != NULL);
10914  assert (object_info != NULL);
10915  assert (insert_helper != NULL);
10916  assert (btree_is_insert_object_purpose (insert_helper->purpose));
10917 
10918  /* Prepare record. */
10919  ovfl_rec.type = REC_HOME;
10920  ovfl_rec.area_size = DB_PAGESIZE;
10921  ovfl_rec.data = PTR_ALIGN (ovfl_rec_buf, BTREE_MAX_ALIGN);
10922 
10923  /* Get record. */
10924  if (spage_get_record (thread_p, ovfl_page, 1, &ovfl_rec, COPY) != S_SUCCESS)
10925  {
10926  assert_release (false);
10927  return ER_FAILED;
10928  }
10929 
10930  /* Prepare logging. */
10931  addr.offset = 1;
10932  addr.pgptr = ovfl_page;
10933  addr.vfid = &btid_int->sys_btid->vfid;
10934 
10935  if (insert_helper->is_system_op_started)
10936  {
10937  /* Physical undo is necessary. */
10938  rv_undo_data_ptr = rv_undo_data;
10939  }
10940 
10941 #if !defined (NDEBUG)
10942  /* For debugging recovery. */
10943  BTREE_RV_UNDOREDO_SET_DEBUG_INFO (&addr, rv_redo_data_ptr, rv_undo_data_ptr, btid_int, BTREE_RV_DEBUG_ID_INS_OLD_OVF);
10944 #endif /* NDEBUG */
10947 
10948  /* Object must have fixed size. */
10949  BTREE_MVCC_INFO_SET_FIXED_SIZE (&object_info->mvcc_info);
10950  btree_insert_object_ordered_by_oid (thread_p, &ovfl_rec, btid_int, object_info, &rv_undo_data_ptr, &rv_redo_data_ptr,
10951  NULL);
10952 
10953  if (spage_update (thread_p, ovfl_page, 1, &ovfl_rec) != SP_SUCCESS)
10954  {
10955  assert_release (false);
10956  return ER_FAILED;
10957  }
10958 
10959  /* We need to log previous lsa. */
10960  LSA_COPY (&prev_lsa, pgbuf_get_lsa (ovfl_page));
10961 
10962  BTREE_RV_GET_DATA_LENGTH (rv_redo_data_ptr, rv_redo_data, rv_redo_data_length);
10963  if (rv_undo_data_ptr != NULL)
10964  {
10965  BTREE_RV_GET_DATA_LENGTH (rv_undo_data_ptr, rv_undo_data, rv_undo_data_length);
10966  }
10967  btree_rv_log_insert_object (thread_p, *insert_helper, addr, rv_undo_data_length, rv_redo_data_length, rv_undo_data,
10968  rv_redo_data);
10969 
10970  btree_insert_log (insert_helper, BTREE_INSERT_MODIFY_MSG ("append object at the end of record"),
10971  BTREE_INSERT_MODIFY_ARGS (thread_p, insert_helper, ovfl_page, &prev_lsa, false, 1, ovfl_rec.length,
10972  btid_int->sys_btid));
10973 
10974  pgbuf_set_dirty (thread_p, ovfl_page, DONT_FREE);
10975 
10976  return NO_ERROR;
10977 }
10978 
10979 /*
10980  *
10981  */
10982 static int
10983 btree_rv_write_log_record_for_key_insert (char *log_rec, int *log_length, INT16 key_len, RECDES * recp)
10984 {
10985  *(INT16 *) ((char *) log_rec + LOFFS1) = key_len;
10986  *(INT16 *) ((char *) log_rec + LOFFS2) = BTREE_LEAF_NODE;
10987  *(INT16 *) ((char *) log_rec + LOFFS3) = recp->type;
10988  memcpy ((char *) log_rec + LOFFS4, recp->data, recp->length);
10989 
10990  *log_length = recp->length + LOFFS4;
10991 
10992  return NO_ERROR;
10993 }
10994 
10995 static int
10996 btree_rv_write_log_record (char *log_rec, int *log_length, RECDES * recp, BTREE_NODE_TYPE node_type)
10997 {
10998  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_NON_LEAF_NODE);
10999 
11000  *(INT16 *) ((char *) log_rec + OFFS1) = node_type;
11001  *(INT16 *) ((char *) log_rec + OFFS2) = recp->type;
11002  memcpy ((char *) log_rec + OFFS3, recp->data, recp->length);
11003 
11004  *log_length = recp->length + OFFS3;
11005 
11006  return NO_ERROR;
11007 }
11008 
11009 /*
11010  * btree_find_free_overflow_oids_page () - Find overflow page that has enough free space to store a new object.
11011  *
11012  * return : Error code.
11013  * thread_p (in) : Thread entry.
11014  * btid (in) : B-tree info.
11015  * first_ovfl_vpid (in) : VPID of first overflow page (or VPID NULL if there is no overflow).
11016  * overflow_page (out) : Output overflow page with enough free space.
11017  */
11018 static int
11019 btree_find_free_overflow_oids_page (THREAD_ENTRY * thread_p, BTID_INT * btid, VPID * first_ovfl_vpid,
11020  PAGE_PTR * overflow_page)
11021 {
11022  VPID ovfl_vpid;
11023  int space_needed = BTREE_OBJECT_FIXED_SIZE (btid);
11024  int error_code = NO_ERROR;
11025  PERF_UTIME_TRACKER ovf_fix_time_track;
11026 
11027  /* Assert expected arguments. */
11028  assert (btid != NULL);
11029  assert (first_ovfl_vpid != NULL);
11030  assert (overflow_page != NULL && *overflow_page == NULL);
11031 
11032  ovfl_vpid = *first_ovfl_vpid;
11033 
11034  PERF_UTIME_TRACKER_START (thread_p, &ovf_fix_time_track);
11035 
11036  while (!VPID_ISNULL (&ovfl_vpid))
11037  {
11038  *overflow_page = pgbuf_fix (thread_p, &ovfl_vpid, OLD_PAGE, PGBUF_LATCH_WRITE, PGBUF_UNCONDITIONAL_LATCH);
11039  if (*overflow_page == NULL)
11040  {
11041  ASSERT_ERROR_AND_SET (error_code);
11042  return error_code;
11043  }
11044 
11045 #if !defined (NDEBUG)
11046  (void) pgbuf_check_page_ptype (thread_p, *overflow_page, PAGE_BTREE);
11047 #endif /* !NDEBUG */
11048 
11049  if (spage_max_space_for_new_record (thread_p, *overflow_page) > space_needed)
11050  {
11051  btree_perf_ovf_oids_fix_time (thread_p, &ovf_fix_time_track);
11052  return NO_ERROR;
11053  }
11054 
11055  btree_get_next_overflow_vpid (thread_p, *overflow_page, &ovfl_vpid);
11056 
11057  pgbuf_unfix_and_init (thread_p, *overflow_page);
11058  }
11059 
11060  btree_perf_ovf_oids_fix_time (thread_p, &ovf_fix_time_track);
11061  return NO_ERROR;
11062 }
11063 
11064 /*
11065  * btree_find_oid_and_its_page () - Find OID in leaf/overflow pages and output its position.
11066  *
11067  * return : Error code.
11068  * thread_p (in) : Thread entry.
11069  * btid_int (in) : B-tree info.
11070  * oid (in) : Object OID.
11071  * leaf_page (in) : Fixed leaf page (where object's key is found).
11072  * purpose (in) : Purpose/context for the function call.
11073  * match_mvccinfo (in) : Non-null value to be matched or null if it doesn't matter.
11074  * leaf_record (in) : Key leaf record.
11075  * leaf_rec_info (in) : Key leaf record info.
11076  * after_key_offset (in) : Offset in leaf record where packed key is ended.
11077  * found_page (out) : Outputs leaf or overflow page where object is found.
11078  * prev_page (out) : Previous page of the overflow page where object object is found. If object is in leaf it
11079  * will output NULL. If object is in first overflow, it will output leaf page.
11080  * If argument is NULL, previous overflow page is unfixed.
11081  * offset_to_object (out) : Offset to object in the record of leaf/overflow.
11082  *
11083  * TODO: output overflow record
11084  */
11085 static int
11088  LEAF_REC * leaf_rec_info, int after_key_offset, PAGE_PTR * found_page,
11089  PAGE_PTR * prev_page, int *offset_to_object, BTREE_MVCC_INFO * object_mvcc_info)
11090 {
11091  int error_code = NO_ERROR;
11092  VPID overflow_vpid;
11093  PAGE_PTR overflow_page = NULL;
11094  PAGE_PTR prev_overflow_page = NULL;
11095  PERF_UTIME_TRACKER ovf_fix_time_track;
11096 
11097  /* Assert expected arguments. */
11098  assert (btid_int != NULL);
11099  assert (oid != NULL);
11100  assert (leaf_page != NULL);
11101  assert (leaf_record != NULL);
11102  assert (leaf_rec_info != NULL);
11103  assert (after_key_offset > 0);
11104  assert (prev_page == NULL || *prev_page == NULL);
11105  assert (found_page != NULL && *found_page == NULL);
11106  assert (offset_to_object != NULL);
11107 
11108  /* Find object in leaf. */
11109  error_code =
11110  btree_find_oid_from_leaf (thread_p, btid_int, leaf_record, after_key_offset, oid, match_mvccinfo, purpose,
11111  offset_to_object, object_mvcc_info);
11112  if (error_code != NO_ERROR)
11113  {
11114  ASSERT_ERROR ();
11115  return error_code;
11116  }
11117  if (*offset_to_object != NOT_FOUND)
11118  {
11119  /* Found object. */
11120  *found_page = leaf_page;
11121  return NO_ERROR;
11122  }
11123  if (VPID_ISNULL (&leaf_rec_info->ovfl))
11124  {
11125  /* Not found. */
11126  return NO_ERROR;
11127  }
11128  /* Search through overflow pages. */
11129  VPID_COPY (&overflow_vpid, &leaf_rec_info->ovfl);
11130  do
11131  {
11132  PERF_UTIME_TRACKER_START (thread_p, &ovf_fix_time_track);
11133  overflow_page = pgbuf_fix (thread_p, &overflow_vpid, OLD_PAGE, PGBUF_LATCH_WRITE, PGBUF_UNCONDITIONAL_LATCH);
11134  btree_perf_ovf_oids_fix_time (thread_p, &ovf_fix_time_track);
11135  if (overflow_page == NULL)
11136  {
11137  ASSERT_ERROR_AND_SET (error_code);
11138  goto error;
11139  }
11140  error_code =
11141  btree_find_oid_from_ovfl (thread_p, btid_int, overflow_page, oid, purpose, match_mvccinfo, offset_to_object,
11142  object_mvcc_info);
11143  if (error_code != NO_ERROR)
11144  {
11145  ASSERT_ERROR ();
11146  goto error;
11147  }
11148  if (*offset_to_object != NOT_FOUND)
11149  {
11150  /* Object was found. Stop looking. */
11151  break;
11152  }
11153  /* Object was not found. Go to next overflow page. */
11154  if (prev_overflow_page != NULL)
11155  {
11156  pgbuf_unfix_and_init (thread_p, prev_overflow_page);
11157  }
11158  prev_overflow_page = overflow_page;
11159  overflow_page = NULL;
11160 
11161  error_code = btree_get_next_overflow_vpid (thread_p, prev_overflow_page, &overflow_vpid);
11162  if (error_code != NO_ERROR)
11163  {
11164  ASSERT_ERROR ();
11165  goto error;
11166  }
11167  }
11168  while (!VPID_ISNULL (&overflow_vpid));
11169 
11170  if (*offset_to_object == NOT_FOUND)
11171  {
11172  /* Not found. */
11173  assert (overflow_page == NULL);
11174  if (prev_overflow_page != NULL)
11175  {
11176  pgbuf_unfix_and_init (thread_p, prev_overflow_page);
11177  }
11178  }
11179  else
11180  {
11181  assert (overflow_page != NULL);
11182  *found_page = overflow_page;
11183 
11184  if (prev_page != NULL)
11185  {
11186  *prev_page = prev_overflow_page != NULL ? prev_overflow_page : leaf_page;
11187  }
11188  else if (prev_overflow_page != NULL)
11189  {
11190  pgbuf_unfix_and_init (thread_p, prev_overflow_page);
11191  }
11192  }
11193  return NO_ERROR;
11194 
11195 error:
11196  assert_release (error_code != NO_ERROR);
11197 
11198  if (overflow_page != NULL)
11199  {
11200  pgbuf_unfix_and_init (thread_p, overflow_page);
11201  }
11202  if (prev_overflow_page != NULL)
11203  {
11204  pgbuf_unfix_and_init (thread_p, prev_overflow_page);
11205  }
11206  return error_code;
11207 }
11208 
11209 /*
11210  * btree_find_oid_does_mvcc_info_match () - Match an object by its MVCC info and the purpose of search.
11211  *
11212  * return : Error code.
11213  * thread_p (in) : Thread entry.
11214  * mvcc_info (in) : Object MVCC info.
11215  * purpose (in) : Btree operation purpose.
11216  * match_mvccinfo (in) : MVCC info to be matched (or NULL if not necessary).
11217  * is_match (out) : Outputs true if object MVCC info matches the expectations.
11218  *
11219  * NOTE: This function can handle mismatches between information stored in heap and b-tree. Because vacuum system
11220  * doesn't clean the entries for one object in both heap and b-trees, the information found in them can be
11221  * different (e.g. one can have insert MVCCID cleaned while the other doesn't).
11222  * Moreover, if the object OID's are reusable, there can be duplicate OID's in b-tree (one is deleted and must be
11223  * vacuumed and one is newer and can be recently inserted or even recently deleted).
11224  * Based on purpose of the search, we try to match the insert MVCCID or delete MVCCID or just check that object
11225  * doesn't have a valid delete MVCCID.
11226  */
11227 static int
11229  BTREE_MVCC_INFO * match_mvccinfo, bool * is_match)
11230 {
11231  /* Assert expected arguments. */
11232  assert (mvcc_info != NULL);
11233  assert (is_match != NULL);
11234 
11235  *is_match = false;
11236  switch (purpose)
11237  {
11239  /* Match insert MVCCID to vacuum. */
11240  assert (match_mvccinfo != NULL && BTREE_MVCC_INFO_IS_INSID_NOT_ALL_VISIBLE (match_mvccinfo));
11241  if (BTREE_MVCC_INFO_HAS_INSID (mvcc_info) && mvcc_info->insert_mvccid == match_mvccinfo->insert_mvccid)
11242  {
11243  /* This is the insert MVCCID to be vacuumed. */
11244  *is_match = true;
11245  }
11246  else
11247  {
11248  /* Not a match. */
11249  }
11250  return NO_ERROR;
11251 
11254  /* Match delete MVCCID to not remove the wrong object (reused). */
11255  assert (match_mvccinfo != NULL && BTREE_MVCC_INFO_IS_DELID_VALID (match_mvccinfo));
11256  if (BTREE_MVCC_INFO_HAS_DELID (mvcc_info) && mvcc_info->delete_mvccid == match_mvccinfo->delete_mvccid)
11257  {
11258  /* This is the object to be vacuumed/deleted. */
11259  *is_match = true;
11260  }
11261  else
11262  {
11263  /* Not a match. */
11264  }
11265  return NO_ERROR;
11266 
11268  /* We want to rollback an MVCC delete. Just removing the delete MVCCID is enough. If delete MVCCID does not
11269  * match, it means it must be an older object, before being reused, which was not vacuumed yet. */
11270  assert (match_mvccinfo != NULL && BTREE_MVCC_INFO_IS_DELID_VALID (match_mvccinfo));
11271  if (BTREE_MVCC_INFO_HAS_DELID (mvcc_info))
11272  {
11273  if (mvcc_info->delete_mvccid == match_mvccinfo->delete_mvccid)
11274  {
11275  /* Maybe we have to match insert MVCCID too. */
11276  if (BTREE_MVCC_INFO_IS_INSID_NOT_ALL_VISIBLE (match_mvccinfo)
11277  && BTREE_MVCC_INFO_INSID (mvcc_info) != match_mvccinfo->insert_mvccid)
11278  {
11279  /* Not a match */
11280  }
11281  else
11282  {
11283  /* It's a match. */
11284  *is_match = true;
11285  }
11286  }
11287  else
11288  {
11289  /* Not a match. */
11290  }
11291  }
11292  else
11293  {
11294  /*
11295  * No delete MVCCID. In case of multi updates, we may have the same OID twice in buffer, but with different
11296  * MVCC info. Thus, the OID may appear first with MVCC insert id only. Then, the same OID appears again with
11297  * MVCC delete id. We have to continue the search if the MVCC info does not match.
11298  */
11299  }
11300 
11301  return NO_ERROR;
11302 
11305  /* We just inserted this object and want to remove it. Insert MVCCID must match and should not be deleted. */
11306  if (BTREE_MVCC_INFO_IS_DELID_VALID (mvcc_info))
11307  {
11308  /* This must be an old object, reused, but not yet vacuumed. */
11309  /* Not a match. */
11310  return NO_ERROR;
11311  }
11312  if (match_mvccinfo != NULL && BTREE_MVCC_INFO_IS_INSID_NOT_ALL_VISIBLE (match_mvccinfo))
11313  {
11314  /* We must match insert MVCCID. */
11315  if (BTREE_MVCC_INFO_INSID (mvcc_info) == match_mvccinfo->insert_mvccid)
11316  {
11317  /* This is a match. */
11318  *is_match = true;
11319  return NO_ERROR;
11320  }
11321  else
11322  {
11323  /* Insert MVCCID is different or doesn't exist. We don't expect such case. */
11324  assert_release (false);
11325  return NO_ERROR;
11326  }
11327  }
11328  /* We don't have an insert MVCCID to be matched. Object MVCC info should either have no insert MVCCID or it
11329  * should be all visible. */
11331  {
11332  /* We don't expect this case. */
11333  assert_release (false);
11334  return ER_FAILED;
11335  }
11336  else
11337  {
11338  /* This is a match. */
11339  *is_match = true;
11340  return NO_ERROR;
11341  }
11342  /* Unreachable. */
11343  assert_release (false);
11344  return ER_FAILED;
11345 
11349  default:
11350  /* We are looking for an object not deleted. It is possible to find same OID, but deleted, if reusable. */
11351  if (!BTREE_MVCC_INFO_IS_DELID_VALID (mvcc_info))
11352  {
11353  /* This is the object we want to delete. */
11354  *is_match = true;
11355  }
11356  else
11357  {
11358  /* This must be same OID but before being reused. It should be vacuumed soon. */
11359  /* Not a match. */
11360  }
11361  return NO_ERROR;
11362  }
11363 }
11364 
11365 /*
11366  * btree_find_oid_from_leaf () - Find OID in leaf record and output its offset and MVCC info.
11367  *
11368  * return : Error code.
11369  * thread_p (in) : Thread entry.
11370  * btid (in) : B-tree info.
11371  * leaf_record (in) : Leaf record.
11372  * after_key_offset (in) : Offset in record where packed key is ended.
11373  * oid (in) : OID of object to find.
11374  * match_mvccinfo (in) : Non-null value to be matched or null if it doesn't matter.
11375  * purpose (in) : Purpose/context for the call.
11376  * offset_to_object (out) : Output offset to found object or NOT_FOUND.
11377  * mvcc_info (out) : Output object MVCC info when found.
11378  */
11379 static int
11380 btree_find_oid_from_leaf (THREAD_ENTRY * thread_p, BTID_INT * btid, RECDES * leaf_record, int after_key_offset,
11381  OID * oid, BTREE_MVCC_INFO * match_mvccinfo, BTREE_OP_PURPOSE purpose, int *offset_to_object,
11383 {
11384  OR_BUF buf; /* Buffer to read record. */
11385  OID inst_oid; /* OID read from record. */
11386  OID class_oid; /* Class OID read from record. */
11387  BTREE_MVCC_INFO local_mvcc_info; /* Local MVCC info. */
11388  int error_code = NO_ERROR; /* Error code. */
11389  bool is_match = false; /* Set to true when OID is found and MVCC info matches expectations. */
11390  bool is_first = true; /* Used to skip packed key. */
11391 
11392  assert (btid != NULL);
11393  assert (leaf_record != NULL);
11394  assert (after_key_offset > 0);
11395  assert (oid != NULL);
11396  assert (offset_to_object != NULL);
11397 
11398  if (mvcc_info == NULL)
11399  {
11400  /* MVCC info is not for output but is required internally. */
11401  mvcc_info = &local_mvcc_info;
11402  }
11403 
11404  BTREE_RECORD_OR_BUF_INIT (buf, leaf_record);
11405  while (buf.ptr < buf.endptr)
11406  {
11407  /* If the object has fixed size, it is forced to have both insert MVCCID and delete MVCCID. This can happen if:
11408  * 1. The index is unique, and this is not the key's first object. 2. The keys has overflow OID's and this is the
11409  * first object. In any other cases follow the MVCC flags. */
11410  *offset_to_object = CAST_BUFLEN (buf.ptr - buf.buffer);
11411 
11412  /* Get object and all its information from record. */
11413  if (btree_or_get_object (&buf, btid, BTREE_LEAF_NODE, after_key_offset, &inst_oid, &class_oid, mvcc_info) !=
11414  NO_ERROR)
11415  {
11416  assert_release (false);
11417  error_code = ER_FAILED;
11418  goto error;
11419  }
11420 
11421  /* Is the OID we're searching? */
11422  if (OID_EQ (&inst_oid, oid))
11423  {
11424  /* OID matches. */
11425  /* Is MVCC info according to expectations? */
11426  error_code = btree_find_oid_does_mvcc_info_match (thread_p, mvcc_info, purpose, match_mvccinfo, &is_match);
11427  if (error_code != NO_ERROR)
11428  {
11429  ASSERT_ERROR ();
11430  goto error;
11431  }
11432  if (is_match)
11433  {
11434  /* Object is a match. */
11435  return NO_ERROR;
11436  }
11437  /* Not our object. */
11438  /* Continue looking. */
11439  }
11440  if (is_first)
11441  {
11442  /* Skip key. */
11443  or_seek (&buf, after_key_offset);
11444  is_first = false;
11445  }
11446  }
11447  /* Object was not found. */
11448  *offset_to_object = NOT_FOUND;
11449  return NO_ERROR;
11450 
11451 error:
11452  assert_release (error_code != NO_ERROR);
11453  *offset_to_object = NOT_FOUND;
11454  return error_code;
11455 }
11456 
11457 /*
11458  * btree_find_oid_from_ovfl () - Find object in overflow page.
11459  *
11460  * return : Error code.
11461  * thread_p (in) : Thread entry.
11462  * btid_int (in) : B-tree info.
11463  * overflow_page (in) : Overflow page.
11464  * oid (in) : OID to find.
11465  * purpose (in) : Purpose of call.
11466  * match_mvccinfo (in) : Non-null value to be matched or null if it doesn't matter.
11467  * offset_to_object (out) : If object is found, it saves the offset to object. Otherwise, NOT_FOUND is output.
11468  * mvcc_info (out) : Output MVCC info if object is found.
11469  */
11470 static int
11472  BTREE_OP_PURPOSE purpose, BTREE_MVCC_INFO * match_mvccinfo, int *offset_to_object,
11474 {
11475  OID inst_oid; /* OID read from record. */
11476  int min, mid, max; /* min, mid, max values used for binary search. */
11477  int num_oids; /* Number of objects in record. */
11478  int size; /* Object and all its info size */
11479  int error_code = NO_ERROR; /* Error code. */
11480  char *oid_ptr = NULL, *ptr = NULL; /* Pointer in record data. */
11481  BTREE_MVCC_INFO local_mvcc_info; /* Local MVCC info. */
11482  RECDES ovf_record; /* Overflow record. */
11483  bool is_match = false; /* Set to true if object is found and its MVCC info matches expectations. */
11484 
11485  /* Assert expected arguments. */
11486  assert (btid_int != NULL);
11487  assert (overflow_page != NULL);
11488  assert (oid != NULL);
11489  assert (offset_to_object != NULL);
11490 
11491  if (mvcc_info == NULL)
11492  {
11493  /* MVCC info is not for output but is required internally. */
11494  mvcc_info = &local_mvcc_info;
11495  }
11496 
11497  *offset_to_object = NOT_FOUND;
11498  if (spage_get_record (thread_p, overflow_page, 1, &ovf_record, PEEK) != S_SUCCESS)
11499  {
11500  assert_release (false);
11501  return ER_FAILED;
11502  }
11503 
11504  /* Try early out: check first oid */
11505  BTREE_GET_OID (ovf_record.data, &inst_oid);
11506  assert ((inst_oid.slotid & BTREE_LEAF_RECORD_MASK) == 0);
11507  assert ((inst_oid.volid & BTREE_OID_MVCC_FLAGS_MASK) == 0);
11508 
11509  if (OID_LT (oid, &inst_oid))
11510  {
11511  /* Not in this page. */
11512  return NO_ERROR;
11513  }
11514  else if (OID_EQ (oid, &inst_oid))
11515  {
11516  /* OID matched. */
11517  /* Check MVCC info. */
11518  ptr = ovf_record.data + OR_OID_SIZE;
11519  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
11520  {
11521  ptr += OR_OID_SIZE;
11522  }
11524  error_code = btree_find_oid_does_mvcc_info_match (thread_p, mvcc_info, purpose, match_mvccinfo, &is_match);
11525  if (error_code != NO_ERROR)
11526  {
11527  ASSERT_ERROR ();
11528  return error_code;
11529  }
11530  if (is_match)
11531  {
11532  /* First object is a match. */
11533  *offset_to_object = 0;
11534  return NO_ERROR;
11535  }
11536  }
11537  /* First object is not a match. */
11538 
11539  /* Try early out: check last object. */
11540 
11541  /* Find last object. */
11542  /* Compute size of one object and all its info. */
11543  size = BTREE_OBJECT_FIXED_SIZE (btid_int);
11544  /* Get last object. */
11545  oid_ptr = ovf_record.data + (ovf_record.length - size);
11546  BTREE_GET_OID (oid_ptr, &inst_oid);
11547  assert ((inst_oid.slotid & BTREE_LEAF_RECORD_MASK) == 0);
11548  assert ((inst_oid.volid & BTREE_OID_MVCC_FLAGS_MASK) == 0);
11549 
11550  if (OID_GT (oid, &inst_oid))
11551  {
11552  /* Not in this page. */
11553  return NO_ERROR;
11554  }
11555  else if (OID_EQ (oid, &inst_oid))
11556  {
11557  /* OID matched. */
11558  /* Check MVCC info. */
11559  ptr = oid_ptr + OR_OID_SIZE;
11560  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
11561  {
11562  ptr += OR_OID_SIZE;
11563  }
11565  error_code = btree_find_oid_does_mvcc_info_match (thread_p, mvcc_info, purpose, match_mvccinfo, &is_match);
11566  if (error_code != NO_ERROR)
11567  {
11568  ASSERT_ERROR ();
11569  return error_code;
11570  }
11571  if (is_match)
11572  {
11573  /* Last object is a match. */
11574  *offset_to_object = CAST_BUFLEN (oid_ptr - ovf_record.data);
11575  return NO_ERROR;
11576  }
11577  }
11578  /* Early outs failed. Do a binary search after OID. */
11579 
11580  num_oids = btree_record_get_num_oids (thread_p, btid_int, &ovf_record, 0, BTREE_OVERFLOW_NODE);
11581 
11582  /* First and last object were already checked. Try others. */
11583  min = 1;
11584  max = num_oids - 2;
11585 
11586  /* Search OID. */
11587  while (min <= max)
11588  {
11589  /* Get MID object. */
11590  mid = (min + max) / 2;
11591  oid_ptr = ovf_record.data + (size * mid);
11592  BTREE_GET_OID (oid_ptr, &inst_oid);
11593  assert ((inst_oid.slotid & BTREE_LEAF_RECORD_MASK) == 0);
11594  assert ((inst_oid.volid & BTREE_OID_MVCC_FLAGS_MASK) == 0);
11595 
11596  /* Check OID. */
11597  if (OID_EQ (oid, &inst_oid))
11598  {
11599  char *oid_ptr_lower_bound;
11600  char *oid_ptr_upper_bound;
11601 
11602  /* check a sequence of objects (same OID with different MVCC info) */
11603  oid_ptr_lower_bound = oid_ptr - size * (mid - min);
11604  oid_ptr_upper_bound = oid_ptr + size * (max - mid);
11605 
11606  return btree_seq_find_oid_from_ovfl (thread_p, btid_int, oid, &ovf_record, oid_ptr, oid_ptr_lower_bound,
11607  oid_ptr_upper_bound, purpose, match_mvccinfo, offset_to_object,
11608  mvcc_info);
11609  }
11610  else if (OID_GT (oid, &inst_oid))
11611  {
11612  /* Try between mid + 1 and max. */
11613  min = mid + 1;
11614  }
11615  else
11616  {
11617  /* Try between min and mid - 1. */
11618  assert (OID_LT (oid, &inst_oid));
11619  max = mid - 1;
11620  }
11621  }
11622 
11623  /* Not found. */
11624  return NO_ERROR;
11625 }
11626 
11627 /*
11628  * btree_seq_find_oid_from_ovfl () - Find object in overflow page.
11629  *
11630  * return : Error code.
11631  * thread_p (in) : Thread entry.
11632  * btid_int (in) : B-tree info.
11633  * oid (in) : OID to find.
11634  * ovf_record(in) : overflow record
11635  * initial_oid_ptr (in) : pointer to OID initially found
11636  * oid_ptr_lower_bound (in) : pointer lower allowed bound within OID buffer
11637  * oid_ptr_upper_bound (in) : pointer upper allowed bound within OID buffer
11638  * purpose (in) : Purpose of call.
11639  * match_mvccinfo (in) : Non-null value to be matched or null if it doesn't matter.
11640  * offset_to_object (out) : If object is found, it saves the offset to object. Otherwise, NOT_FOUND is output.
11641  * mvcc_info (out) : Output MVCC info if object is found.
11642  */
11643 static int
11645  RECDES * ovf_record, char *initial_oid_ptr, char *oid_ptr_lower_bound,
11646  char *oid_ptr_upper_bound, BTREE_OP_PURPOSE purpose, BTREE_MVCC_INFO * match_mvccinfo,
11647  int *offset_to_object, BTREE_MVCC_INFO * mvcc_info)
11648 {
11649  OID inst_oid;
11650  char *oid_ptr;
11651  char *ptr;
11652  int obj_size = BTREE_OBJECT_FIXED_SIZE (btid_int);
11653  int error_code;
11654  bool is_match;
11655 
11656  /* first, check OID and previous ones */
11657  oid_ptr = initial_oid_ptr;
11658 
11659  while (oid_ptr >= oid_ptr_lower_bound)
11660  {
11661  BTREE_GET_OID (oid_ptr, &inst_oid);
11662  assert ((inst_oid.slotid & BTREE_LEAF_RECORD_MASK) == 0);
11663  assert ((inst_oid.volid & BTREE_OID_MVCC_FLAGS_MASK) == 0);
11664 
11665  /* Check OID. */
11666  if (!OID_EQ (oid, &inst_oid))
11667  {
11668  break;
11669  }
11670 
11671  /* OID matched. */
11672  /* Check MVCC info. */
11673  ptr = oid_ptr + OR_OID_SIZE;
11674  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
11675  {
11676  ptr += OR_OID_SIZE;
11677  }
11678 
11680  error_code = btree_find_oid_does_mvcc_info_match (thread_p, mvcc_info, purpose, match_mvccinfo, &is_match);
11681  if (error_code != NO_ERROR)
11682  {
11683  ASSERT_ERROR ();
11684  return error_code;
11685  }
11686 
11687  if (is_match)
11688  {
11689  /* Object is a match. */
11690  *offset_to_object = CAST_BUFLEN (oid_ptr - ovf_record->data);
11691  return NO_ERROR;
11692  }
11693 
11694  oid_ptr -= obj_size;
11695  }
11696 
11697  /* check next OIDs */
11698  oid_ptr = initial_oid_ptr + obj_size;
11699 
11700  while (oid_ptr <= oid_ptr_upper_bound)
11701  {
11702  BTREE_GET_OID (oid_ptr, &inst_oid);
11703  assert ((inst_oid.slotid & BTREE_LEAF_RECORD_MASK) == 0);
11704  assert ((inst_oid.volid & BTREE_OID_MVCC_FLAGS_MASK) == 0);
11705 
11706  /* Check OID. */
11707  if (!OID_EQ (oid, &inst_oid))
11708  {
11709  break;
11710  }
11711 
11712  /* OID matched. */
11713  /* Check MVCC info. */
11714  ptr = oid_ptr + OR_OID_SIZE;
11715  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
11716  {
11717  ptr += OR_OID_SIZE;
11718  }
11719 
11721  error_code = btree_find_oid_does_mvcc_info_match (thread_p, mvcc_info, purpose, match_mvccinfo, &is_match);
11722  if (error_code != NO_ERROR)
11723  {
11724  ASSERT_ERROR ();
11725  return error_code;
11726  }
11727 
11728  if (is_match)
11729  {
11730  /* Object is a match. */
11731  *offset_to_object = CAST_BUFLEN (oid_ptr - ovf_record->data);
11732  return NO_ERROR;
11733  }
11734 
11735  oid_ptr += obj_size;
11736  }
11737 
11738  return NO_ERROR;
11739 }
11740 
11741 /*
11742  * btree_get_prefix_separator () -
11743  * return: db_value containing the prefix key. This must be cleared when it is done being used.
11744  * key1(in): first key
11745  * key2(in): second key
11746  * prefix_key(in):
11747  *
11748  * Note: This function finds the prefix (the separator) of two strings.
11749  * Currently this is only done for one of the six string types,
11750  * but with multi-column indexes and uniques coming, we may want to do prefix keys for sequences as well.
11751  *
11752  * The purpose of this routine is to find a prefix that is greater than or equal to the first key but strictly less
11753  * than the second key. This routine assumes that the second key is strictly greater than the first key.
11754  *
11755  * If this function could not generate common prefix key
11756  * (ex: key domain == integer)
11757  * copy key2 to prefix_key (because Index separator use key2 in general case)
11758  */
11759 /* TODO: change key generation
11760  * (db_string_unique_prefix, pr_midxkey_unique_prefix)
11761  */
11762 int
11763 btree_get_prefix_separator (const DB_VALUE * key1, const DB_VALUE * key2, DB_VALUE * prefix_key, TP_DOMAIN * key_domain)
11764 {
11765  int c;
11766  int err = NO_ERROR;
11767 
11768  assert (DB_IS_NULL (key1) || (DB_VALUE_DOMAIN_TYPE (key1) == DB_VALUE_DOMAIN_TYPE (key2)));
11769  assert (!DB_IS_NULL (key2));
11770  assert_release (key_domain != NULL);
11771 
11772 #if !defined(NDEBUG)
11773  c = btree_compare_key ((DB_VALUE *) key1, (DB_VALUE *) key2, key_domain, 1, 1, NULL);
11774  assert (c == DB_LT);
11775 #endif
11776 
11777  if (DB_VALUE_DOMAIN_TYPE (key1) == DB_TYPE_MIDXKEY)
11778  {
11779  assert_release (TP_DOMAIN_TYPE (key_domain) == DB_TYPE_MIDXKEY);
11780 
11781  err = pr_midxkey_unique_prefix (key1, key2, prefix_key);
11782  }
11783  else if (pr_is_string_type (DB_VALUE_DOMAIN_TYPE (key1)))
11784  {
11785  assert_release (TP_DOMAIN_TYPE (key_domain) != DB_TYPE_MIDXKEY);
11786 
11787  err = db_string_unique_prefix (key1, key2, prefix_key, key_domain);
11788  }
11789  else
11790  {
11791  /* In this case, key2 is used as separator in B+tree so, copy key2 to prefix_key */
11792  err = pr_clone_value (key2, prefix_key);
11793  }
11794 
11795  if (err != NO_ERROR)
11796  {
11797  assert_release (false);
11798  return ER_FAILED;
11799  }
11800 
11801  c = btree_compare_key ((DB_VALUE *) key1, prefix_key, key_domain, 1, 1, NULL);
11802 
11803  if (c != DB_LT)
11804  {
11805  assert_release (false);
11806  return ER_FAILED;
11807  }
11808 
11809  c = btree_compare_key (prefix_key, (DB_VALUE *) key2, key_domain, 1, 1, NULL);
11810 
11811  if (!(c == DB_LT || c == DB_EQ))
11812  {
11813  assert_release (false);
11814  return ER_FAILED;
11815  }
11816 
11817  return err;
11818 }
11819 
11820 /*
11821  * btree_find_split_point () -
11822  * return: the key or key separator (prefix) to be moved to the
11823  * parent page, or NULL_KEY. The length of the returned
11824  * key, or prefix, is set in mid_keylen. The parameter
11825  * mid_slot is set to the record number of the split point record.
11826  * btid(in):
11827  * page_ptr(in): Pointer to the page
11828  * mid_slot(out): Set to contain the record number for the split point slot
11829  * key(in): Key to be inserted to the index (or modified).
11830  * helper(in): B-tree insert helper.
11831  * clear_midkey(in):
11832  *
11833  * Note: Finds the split point of the given page by considering the
11834  * length of the existing records and the length of the key.
11835  * For a leaf page split operation, if there are n keys in the
11836  * page, then mid_slot can be set to :
11837  *
11838  * 0 : all the records in the page are to be moved to the newly
11839  * allocated page, key is to be inserted into the original
11840  * page. Mid_key is between key and the first record key.
11841  *
11842  * n : all the records will be kept in the original page. Key is
11843  * to be inserted to the newly allocated page. Mid_key is
11844  * between the last record key and the key.
11845  * otherwise : slot point is in the range 1 to n-1, inclusive. The page
11846  * is to be split into half.
11847  *
11848  * Note: the returned db_value should be cleared and FREED by the caller.
11849  */
11850 static DB_VALUE *
11851 btree_find_split_point (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr, int *mid_slot, DB_VALUE * key,
11852  BTREE_INSERT_HELPER * helper, bool * clear_midkey)
11853 {
11854  RECDES rec;
11855  BTREE_NODE_HEADER *header = NULL;
11856  BTREE_NODE_TYPE node_type;
11857  INT16 slot_id = NULL_SLOTID;
11858  int new_ent_size = 0, new_fence_size = 0;
11859  int key_cnt = 0, key_len = 0, max_key_len = 0, offset = 0;
11860  INT16 tot_rec = 0;
11861  int i = 0, mid_size = 0;
11862  bool m_clear_key = false, n_clear_key = false;
11863  DB_VALUE *mid_key = NULL, *next_key = NULL, *prefix_key = NULL, *tmp_key;
11864  bool is_key_added_to_left = false, found = false;
11865  NON_LEAF_REC nleaf_pnt;
11866  LEAF_REC leaf_pnt;
11867  BTREE_SEARCH_KEY_HELPER search_key;
11868  int stop_at = 0, start_with = 0;
11869  int left_fence_size = 0;
11870  int right_fence_size = 0;
11871  int left_size = 0;
11872  int left_max_size = 0;
11873  int left_min_size = 0;
11874  int right_max_size = 0;
11875  int record_size = 0;
11876  int do_increment = 1;
11877 
11878  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
11879  if (key_cnt <= 0)
11880  {
11881  er_log_debug (ARG_FILE_LINE, "btree_find_split_point: node key count underflow: %d", key_cnt);
11882  goto error;
11883  }
11884 
11885  /* get the page header */
11886  header = btree_get_node_header (thread_p, page_ptr);
11887  if (header == NULL)
11888  {
11889  er_log_debug (ARG_FILE_LINE, "btree_find_split_point: get node header failure: %d", key_cnt);
11890  goto error;
11891  }
11892 
11893  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
11894 
11895  key_len = btree_get_disk_size_of_key (key);
11896  key_len = BTREE_GET_KEY_LEN_IN_PAGE (key_len);
11897 
11898  /* find the slot position of the key if it is to be located in the page */
11899  if (node_type == BTREE_LEAF_NODE)
11900  {
11901  if (btree_search_leaf_page (thread_p, btid, page_ptr, key, &search_key) != NO_ERROR)
11902  {
11903  ASSERT_ERROR ();
11904  goto error;
11905  }
11906  found = (search_key.result == BTREE_KEY_FOUND);
11907  slot_id = search_key.slotid;
11908  if (slot_id == NULL_SLOTID) /* leaf search failed */
11909  {
11910  assert (false);
11911  goto error;
11912  }
11913  }
11914  else
11915  {
11916  found = 0;
11917  slot_id = NULL_SLOTID;
11918  }
11919 
11920  /* Start splitting records into left and right nodes. The algorithm must consider next rules: 1. The size of split
11921  * records should be done as close as possible to the size indicated by split_info. Split info is not applied to page
11922  * header and fences. 2. Left and right nodes should have enough space for new data required by insert, and also for
11923  * new fences. This applies only to leaf nodes. 3. After split & insert, both nodes must have at least one non-fence
11924  * record. */
11925 
11926  /* Compute maximum page size considering headers. */
11927  left_max_size = BTREE_NODE_MAX_SPLIT_SIZE (thread_p, page_ptr);
11928  right_max_size = left_max_size;
11929 
11930  /* Compute total record size. Filter out fences here. */
11931  start_with = 1;
11932  if (btree_is_fence_key (page_ptr, start_with))
11933  {
11934  assert (node_type == BTREE_LEAF_NODE);
11935  left_fence_size = spage_get_space_for_record (thread_p, page_ptr, start_with);
11936 
11937  /* Left fence will be included in left leaf. Subtract its size from the maximum size allowed. */
11938  left_max_size -= left_fence_size;
11939  start_with++;
11940  }
11941 
11942  stop_at = key_cnt;
11943  if (btree_is_fence_key (page_ptr, stop_at))
11944  {
11945  assert (node_type == BTREE_LEAF_NODE);
11946  right_fence_size = spage_get_space_for_record (thread_p, page_ptr, stop_at);
11947 
11948  /* Right fence will be included in right leaf. Subtract its size from the maximum size allowed. */
11949  right_max_size -= right_fence_size;
11950  stop_at--;
11951  }
11952 
11953  if (node_type == BTREE_LEAF_NODE)
11954  {
11955  new_ent_size = btree_get_max_new_data_size (thread_p, btid, page_ptr, node_type, key_len, helper, found);
11956 
11957  /* Until we know where new entity belongs, we must reserve enough space in both left and right leaf. */
11958  left_max_size -= new_ent_size;
11959  right_max_size -= new_ent_size;
11960 
11961  /* New fences are added to both leaves: an upper fence for left leaf and lower fence for right leaf. We don't
11962  * know their size yet, but we can estimate the largest size using node maximum key length. */
11963  /* TODO: Fences currently optimize only midxkey key types. Save storage by not using fence keys when they are not
11964  * required. */
11965  max_key_len = MAX (key_len, header->max_key_len);
11966  new_fence_size = LEAF_FENCE_MAX_SIZE (max_key_len) + spage_slot_size ();
11967 
11968  /* Adjust maximum size for both leaves. */
11969  left_max_size -= new_fence_size;
11970  right_max_size -= new_fence_size;
11971  }
11972  else
11973  {
11974  /* New key is not added to non-leaf. */
11975  new_ent_size = 0;
11976 
11977  /* No fences in non-leaf. */
11978  new_fence_size = 0;
11979  }
11980 
11981  /* First find out the size of the data on the page, don't count the header record or fences. */
11982  for (i = start_with, tot_rec = 0; i <= stop_at; i++)
11983  {
11984  tot_rec += spage_get_space_for_record (thread_p, page_ptr, i);
11985  }
11986  tot_rec += new_ent_size;
11987 
11988  /* Compute mid_size, the desired size of left node according to split info. */
11989  mid_size = btree_split_find_pivot (tot_rec, &(header->split_info));
11990 
11991  /* Split records and new entity considering mid_size, left_max_size, and right_max_size. Since we work with left
11992  * node, translate right_max_size into left_min_size by subtracting from total records size. */
11993  left_min_size = tot_rec - right_max_size;
11994  /* Safe guard. */
11995  assert (left_min_size < left_max_size);
11996 
11997  /* Find the last slot ID belonging to left node (and save it in the mid_slot pointer). */
11998  for (i = start_with, left_size = 0; true; i = i + do_increment)
11999  {
12000  do_increment = 1;
12001  if (node_type == BTREE_LEAF_NODE && i == slot_id && !is_key_added_to_left)
12002  {
12003  /* New entity belongs to left leaf. Ignore it for non-leaf. */
12004  is_key_added_to_left = true;
12005 
12006  /* Adjust leaf sizes now that we know the key belongs to left leaf. */
12007  left_max_size += new_ent_size;
12008  right_max_size += new_ent_size;
12009  left_min_size -= new_ent_size;
12010 
12011  if (found)
12012  {
12013  /* Consider current record with new data. */
12014  record_size = spage_get_space_for_record (thread_p, page_ptr, i);
12015  record_size += new_ent_size;
12016  }
12017  else
12018  {
12019  /* Consider new record. */
12020  record_size = new_ent_size;
12021  /* Do not increment i this iteration. */
12022  do_increment = 0;
12023  }
12024  }
12025  else
12026  {
12027  record_size = spage_get_space_for_record (thread_p, page_ptr, i);
12028  }
12029  if (left_size < left_min_size)
12030  {
12031  /* Right node is too large. Keep adding records to left node. */
12032  left_size += record_size;
12033  continue;
12034  }
12035  /* Add new record to left and check its new size. */
12036  left_size += record_size;
12037  if (left_size > MIN (left_max_size, mid_size))
12038  {
12039  /* We reached the desired size, or the maximum size allowed for left node. Stop one record before this. */
12040  *mid_slot = i - 1;
12041 #if !defined (NDEBUG)
12042  /* Update left_size for debug checks. */
12043  left_size -= record_size;
12044 #endif /* !NDEBUG */
12045  break;
12046  }
12047  if (i == stop_at)
12048  {
12049  /* All non-fence records have been processed. Stop. */
12050  *mid_slot = i;
12051  break;
12052  }
12053  }
12054 
12055  /* Adjust mid_slot according to rule #3. */
12056  if (*mid_slot == (start_with - 1) && (node_type == BTREE_NON_LEAF_NODE || !is_key_added_to_left || found))
12057  {
12058  /* There are no records in the left node. Adjust mid_slot. */
12059  (*mid_slot)++;
12060 
12061 #if !defined (NDEBUG)
12062  /* Update left_size for debug checks. */
12063  left_size += spage_get_space_for_record (thread_p, page_ptr, *mid_slot);
12064 #endif /* !NDEBUG */
12065  }
12066  if (*mid_slot == stop_at && (node_type == BTREE_NON_LEAF_NODE || is_key_added_to_left || found))
12067  {
12068  /* There are no records in the right leaf. Adjust mid_slot. */
12069  (*mid_slot)--;
12070 #if !defined (NDEBUG)
12071  /* Update left_size for debug checks. */
12072  left_size -= spage_get_space_for_record (thread_p, page_ptr, *mid_slot);
12073 #endif /* !NDEBUG */
12074  }
12075 
12076  /* Safe guard: Rule #2. */
12077  assert (left_size <= left_max_size);
12078  assert (left_size >= left_min_size);
12079  assert (tot_rec - left_size <= right_max_size);
12080  assert (left_size + new_fence_size <= BTREE_NODE_MAX_SPLIT_SIZE (thread_p, page_ptr));
12081  assert (tot_rec - left_size + new_fence_size <= BTREE_NODE_MAX_SPLIT_SIZE (thread_p, page_ptr));
12082 
12083  /* Safe guard: Rules #3. */
12084  /* Left node will have at least one non-fence record. */
12085  assert (*mid_slot >= start_with
12086  || (*mid_slot == (start_with - 1) && node_type == BTREE_LEAF_NODE && is_key_added_to_left));
12087  /* Right node will have at least one non-fence record. */
12088  assert (*mid_slot < stop_at || (*mid_slot == stop_at && node_type == BTREE_LEAF_NODE && !is_key_added_to_left));
12089 
12090  /* TODO: Optimize memory usage. We don't need to allocated/deallocate all DB_VALUE types and their content in all
12091  * cases. */
12092  mid_key = (DB_VALUE *) db_private_alloc (thread_p, sizeof (DB_VALUE));
12093  if (mid_key == NULL)
12094  {
12095  goto error;
12096  }
12097 
12098  btree_init_temp_key_value (&m_clear_key, mid_key);
12099 
12100  if (*mid_slot == (slot_id - 1) && is_key_added_to_left && !found)
12101  {
12102  /* the new key is the split key */
12103  PR_TYPE *pr_type;
12104 
12105  /* Safe guard. */
12106  assert (*mid_slot != key_cnt);
12107 
12108  if (node_type == BTREE_LEAF_NODE)
12109  {
12110  pr_type = btid->key_type->type;
12111  }
12112  else
12113  {
12114  pr_type = btid->nonleaf_key_type->type;
12115  }
12116 
12117  m_clear_key = false;
12118 
12119  pr_type->setval (mid_key, key, m_clear_key);
12120  }
12121  else
12122  {
12123  /* the split key is one of the keys on the page */
12124  assert (*mid_slot > 0);
12125  if (spage_get_record (thread_p, page_ptr, *mid_slot, &rec, PEEK) != S_SUCCESS)
12126  {
12127  goto error;
12128  }
12129 
12130  /* we copy the key here because rec lives on the stack and mid_key is returned from this routine. */
12131  if (node_type == BTREE_LEAF_NODE)
12132  {
12133  if (btree_read_record (thread_p, btid, page_ptr, &rec, mid_key, (void *) &leaf_pnt, node_type, &m_clear_key,
12134  &offset, COPY_KEY_VALUE, NULL) != NO_ERROR)
12135  {
12136  goto error;
12137  }
12138  }
12139  else
12140  {
12141  if (btree_read_record (thread_p, btid, page_ptr, &rec, mid_key, (void *) &nleaf_pnt, node_type, &m_clear_key,
12142  &offset, COPY_KEY_VALUE, NULL) != NO_ERROR)
12143  {
12144  goto error;
12145  }
12146  }
12147  }
12148 
12149  /* The determination of the prefix key is dependent on the next key */
12150  next_key = (DB_VALUE *) db_private_alloc (thread_p, sizeof (DB_VALUE));
12151  if (next_key == NULL)
12152  {
12153  goto error;
12154  }
12155 
12156  btree_init_temp_key_value (&n_clear_key, next_key);
12157 
12158  if (*mid_slot == key_cnt && slot_id == (key_cnt + 1))
12159  {
12160  assert (node_type == BTREE_LEAF_NODE);
12161 
12162  n_clear_key = true;
12163  if (pr_clone_value (key, next_key) != NO_ERROR)
12164  {
12165  goto error;
12166  }
12167  }
12168  else
12169  {
12170  /* The next key is one of the keys on the page */
12171  assert ((*mid_slot) + 1 > 0);
12172  if (spage_get_record (thread_p, page_ptr, (*mid_slot) + 1, &rec, PEEK) != S_SUCCESS)
12173  {
12174  goto error;
12175  }
12176 
12177  /* we copy the key here because rec lives on the stack and mid_key is returned from this routine. */
12178  if (node_type == BTREE_LEAF_NODE)
12179  {
12180  if (btree_read_record (thread_p, btid, page_ptr, &rec, next_key, (void *) &leaf_pnt, node_type, &n_clear_key,
12181  &offset, COPY_KEY_VALUE, NULL) != NO_ERROR)
12182  {
12183  goto error;
12184  }
12185  }
12186  else
12187  {
12188  if (btree_read_record (thread_p, btid, page_ptr, &rec, next_key, (void *) &nleaf_pnt, node_type, &n_clear_key,
12189  &offset, COPY_KEY_VALUE, NULL) != NO_ERROR)
12190  {
12191  goto error;
12192  }
12193  }
12194  }
12195 
12196  /* now that we have the mid key and the next key, we can determine the prefix key. */
12197  prefix_key = (DB_VALUE *) db_private_alloc (thread_p, sizeof (DB_VALUE));
12198  if (prefix_key == NULL)
12199  {
12200  goto error;
12201  }
12202 
12203  /* Check if we can make use of prefix keys. We can't use them in the upper levels of the trees because the algorithm
12204  * will fall apart. We can only use them when splitting a leaf page. */
12205  if (node_type == BTREE_LEAF_NODE)
12206  {
12209  {
12210  /* if one of key is overflow key prefix key could be longer then max_key_len in page (that means insert could
12211  * be failed) so, in this case use next key itself as prefix key */
12212  pr_clone_value (next_key, prefix_key);
12213  }
12214  else
12215  {
12216  if (btree_get_prefix_separator (mid_key, next_key, prefix_key, btid->key_type) != NO_ERROR)
12217  {
12218  goto error;
12219  }
12220  }
12221  }
12222  else
12223  {
12224  /* return the next_key */
12225  pr_clone_value (next_key, prefix_key);
12226  }
12227 
12228  *clear_midkey = true; /* we must always clear prefix keys */
12229 
12230  /* replace the mid_key with the prefix_key */
12231  tmp_key = mid_key;
12232  mid_key = prefix_key;
12233  prefix_key = tmp_key; /* this makes sure we clear/free the old mid key */
12234  goto success;
12235 
12236  /* error handling and cleanup. */
12237 error:
12238 
12239  if (mid_key)
12240  {
12241  btree_clear_key_value (&m_clear_key, mid_key);
12242  db_private_free_and_init (thread_p, mid_key);
12243  }
12244  mid_key = NULL;
12245 
12246  /* fall through */
12247 
12248 success:
12249 
12250  if (next_key)
12251  {
12252  btree_clear_key_value (&n_clear_key, next_key);
12253  db_private_free_and_init (thread_p, next_key);
12254  }
12255  if (prefix_key)
12256  {
12257  pr_clear_value (prefix_key);
12258  db_private_free_and_init (thread_p, prefix_key);
12259  }
12260 
12261  return mid_key;
12262 }
12263 
12264 /*
12265  * btree_split_find_pivot () -
12266  * return:
12267  * total(in):
12268  * split_info(in):
12269  */
12270 static int
12272 {
12273  int split_point;
12274 
12275  if (split_info->pivot == 0
12276  || (split_info->pivot > BTREE_SPLIT_LOWER_BOUND && split_info->pivot < BTREE_SPLIT_UPPER_BOUND))
12277  {
12278  split_point = CEIL_PTVDIV (total, 2);
12279  }
12280  else
12281  {
12282  split_point = (int) (total * MAX (MIN (split_info->pivot, BTREE_SPLIT_MAX_PIVOT), BTREE_SPLIT_MIN_PIVOT));
12283  }
12284 
12285  return split_point;
12286 }
12287 
12288 /*
12289  * btree_split_next_pivot () -
12290  * return:
12291  * split_info(in):
12292  * new_value(in):
12293  * max_index(in):
12294  */
12295 static int
12296 btree_split_next_pivot (BTREE_NODE_SPLIT_INFO * split_info, float new_value, int max_index)
12297 {
12298  float new_pivot;
12299 
12300  assert (0.0f <= split_info->pivot);
12301  assert (split_info->pivot <= 1.0f);
12302 
12303  split_info->index = MIN (split_info->index + 1, max_index);
12304 
12305  if (split_info->pivot == 0)
12306  {
12307  new_pivot = new_value;
12308  }
12309  else
12310  {
12311  /* cumulative moving average(running average) */
12312  new_pivot = split_info->pivot;
12313  new_pivot = (new_pivot + ((new_value - new_pivot) / split_info->index));
12314  }
12315 
12316  split_info->pivot = MAX (0.0f, MIN (1.0f, new_pivot));
12317 
12318  return NO_ERROR;
12319 }
12320 
12321 static bool
12323 {
12324  int key_cnt;
12325  RECDES peek_rec;
12326  BTREE_NODE_HEADER *header = NULL;
12327  BTREE_NODE_TYPE node_type;
12328 
12329  assert (btid != NULL);
12330 
12331  if (TP_DOMAIN_TYPE (btid->key_type) != DB_TYPE_MIDXKEY)
12332  {
12333  return false;
12334  }
12335 
12336  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
12337  if (key_cnt < 2)
12338  {
12339  return false;
12340  }
12341 
12342  header = btree_get_node_header (thread_p, page_ptr);
12343  if (header == NULL)
12344  {
12345  return false;
12346  }
12347 
12348  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
12349 
12350  if (node_type == BTREE_NON_LEAF_NODE)
12351  {
12352  return false;
12353  }
12354 
12355  /* check if lower fence key */
12356  if (spage_get_record (thread_p, page_ptr, 1, &peek_rec, PEEK) != S_SUCCESS)
12357  {
12358  assert (false);
12359  }
12360  if (btree_leaf_is_flaged (&peek_rec, BTREE_LEAF_RECORD_FENCE) == false)
12361  {
12362  return false;
12363  }
12364 
12365  /* check if upper fence key */
12366  assert (key_cnt > 0);
12367  if (spage_get_record (thread_p, page_ptr, key_cnt, &peek_rec, PEEK) != S_SUCCESS)
12368  {
12369  assert (false);
12370  }
12371  if (btree_leaf_is_flaged (&peek_rec, BTREE_LEAF_RECORD_FENCE) == false)
12372  {
12373  return false;
12374  }
12375 
12376  return true;
12377 }
12378 
12379 static int
12381 {
12382  RECDES peek_rec;
12383  int diff_column;
12384  int key_cnt;
12385 
12386  DB_VALUE lf_key, uf_key;
12387  bool lf_clear_key = false, uf_clear_key = false;
12388  int offset;
12389  LEAF_REC leaf_pnt;
12390  int error = NO_ERROR;
12391 
12392  if (btree_node_is_compressed (thread_p, btid, page_ptr) == false)
12393  {
12394  return 0;
12395  }
12396 
12397  btree_init_temp_key_value (&lf_clear_key, &lf_key);
12398  btree_init_temp_key_value (&uf_clear_key, &uf_key);
12399 
12400  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
12401  assert (key_cnt >= 2);
12402 
12403  spage_get_record (thread_p, page_ptr, 1, &peek_rec, PEEK);
12404  error =
12405  btree_read_record_without_decompression (thread_p, btid, &peek_rec, &lf_key, &leaf_pnt, BTREE_LEAF_NODE,
12406  &lf_clear_key, &offset, PEEK_KEY_VALUE);
12407  if (error != NO_ERROR)
12408  {
12409  goto cleanup;
12410  }
12413  assert (DB_VALUE_TYPE (&lf_key) == DB_TYPE_MIDXKEY);
12414 
12415  assert (key_cnt > 0);
12416  spage_get_record (thread_p, page_ptr, key_cnt, &peek_rec, PEEK);
12417  error =
12418  btree_read_record_without_decompression (thread_p, btid, &peek_rec, &uf_key, &leaf_pnt, BTREE_LEAF_NODE,
12419  &uf_clear_key, &offset, PEEK_KEY_VALUE);
12420  if (error != NO_ERROR)
12421  {
12422  goto cleanup;
12423  }
12426  assert (DB_VALUE_TYPE (&uf_key) == DB_TYPE_MIDXKEY);
12427 
12428  diff_column = pr_midxkey_common_prefix (&lf_key, &uf_key);
12429 
12430 cleanup:
12431  /* clean up */
12432  btree_clear_key_value (&lf_clear_key, &lf_key);
12433  btree_clear_key_value (&uf_clear_key, &uf_key);
12434 
12435  if (error == NO_ERROR)
12436  {
12437  return diff_column;
12438  }
12439  else
12440  {
12441  return error;
12442  }
12443 }
12444 
12445 /*
12446  * btree_recompress_record () - Re-compress record for new prefix.
12447  *
12448  * return : Error code.
12449  * thread_p (in) : Thread entry.
12450  * btid_int (in) : B-tree info.
12451  * record (in) : B-tree leaf record.
12452  * fence_key (in) : Lower or upper fence key value.
12453  * old_prefix (in) : Old prefix.
12454  * new_prefix (in) : New prefix.
12455  */
12456 static int
12458  int old_prefix, int new_prefix)
12459 {
12460  int error_code = NO_ERROR;
12461  LEAF_REC dummy_leaf_record_info;
12462  int offset_after_key = 0;
12463  int new_offset_after_key = 0;
12464  int old_key_len = 0;
12465  int new_key_len = 0;
12466  int offset_before_key = 0;
12467  bool clear_key = false;
12468  DB_VALUE key;
12469  DB_VALUE recompress_key;
12470  OR_BUF write_key_buffer;
12471 
12472  assert (btid_int != NULL);
12473  assert (record != NULL);
12474 
12475  btree_init_temp_key_value (&clear_key, &key);
12476 
12477  if (old_prefix == new_prefix)
12478  {
12479  /* Recompression is not needed. */
12480  return NO_ERROR;
12481  }
12482 
12483  /* Fence key must have a value if uncompress is needed. */
12484  assert (old_prefix == 0 || (fence_key != NULL && !DB_IS_NULL (fence_key)));
12485 
12486  error_code =
12487  btree_read_record_without_decompression (thread_p, btid_int, record, &key, &dummy_leaf_record_info, BTREE_LEAF_NODE,
12488  &clear_key, &offset_after_key, PEEK_KEY_VALUE);
12489  if (error_code != NO_ERROR)
12490  {
12491  ASSERT_ERROR ();
12492  return error_code;
12493  }
12494 
12495  /* Save aligned size of old key. */
12496  old_key_len = btree_get_disk_size_of_key (&key);
12497  old_key_len = DB_ALIGN (old_key_len, INT_ALIGNMENT);
12498 
12499  /* Uncompress. */
12500  db_make_null (&recompress_key);
12501  if (old_prefix > 0)
12502  {
12503  pr_midxkey_add_prefix (&recompress_key, fence_key, &key, old_prefix);
12504  }
12505  else
12506  {
12507  pr_clone_value (&key, &recompress_key);
12508  }
12509 
12510  /* Compress. */
12511  if (new_prefix > 0)
12512  {
12513  pr_midxkey_remove_prefix (&recompress_key, new_prefix);
12514  }
12515 
12516  /* Save aligned size of new key. */
12517  new_key_len = btree_get_disk_size_of_key (&recompress_key);
12518  new_key_len = DB_ALIGN (new_key_len, INT_ALIGNMENT);
12519 
12520  offset_before_key = offset_after_key - old_key_len;
12521 
12522  /* Move the rest of the record first. */
12523  new_offset_after_key = offset_after_key + new_key_len - old_key_len;
12524  RECORD_MOVE_DATA (record, new_offset_after_key, offset_after_key);
12525 
12526  /* Pack new key. */
12527  or_init (&write_key_buffer, record->data + offset_before_key, new_key_len);
12528  btid_int->key_type->type->index_writeval (&write_key_buffer, &recompress_key);
12529  or_align (&write_key_buffer, INT_ALIGNMENT);
12530  assert (write_key_buffer.ptr == write_key_buffer.endptr);
12531 
12532  btree_clear_key_value (&clear_key, &key);
12533  pr_clear_value (&recompress_key);
12534 
12535  return NO_ERROR;
12536 }
12537 
12538 static int
12539 btree_compress_node (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr)
12540 {
12541  int i, key_cnt, diff_column;
12542  RECDES peek_rec, rec;
12543  char rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
12544  DB_VALUE key;
12545  bool clear_key = false;
12546  int offset, new_offset, key_len, new_key_len;
12547  LEAF_REC leaf_pnt;
12548  int error = NO_ERROR;
12549 
12550  rec.area_size = DB_PAGESIZE;
12551  rec.data = PTR_ALIGN (rec_buf, BTREE_MAX_ALIGN);
12552 
12553  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
12554 
12555  diff_column = btree_node_common_prefix (thread_p, btid, page_ptr);
12556  if (diff_column == 0)
12557  {
12558  return 0;
12559  }
12560  else if (diff_column < 0)
12561  {
12562  return diff_column;
12563  }
12564 
12565  btree_init_temp_key_value (&clear_key, &key);
12566 
12567  /* compress prefix */
12568  for (i = 2; i < key_cnt; i++)
12569  {
12570  (void) spage_get_record (thread_p, page_ptr, i, &peek_rec, PEEK);
12571 
12573 
12575  {
12576  /* do not compress overflow key */
12577  continue;
12578  }
12579 
12580  (void) spage_get_record (thread_p, page_ptr, i, &rec, COPY);
12581 
12582  error =
12583  btree_read_record_without_decompression (thread_p, btid, &rec, &key, &leaf_pnt, BTREE_LEAF_NODE, &clear_key,
12584  &offset, PEEK_KEY_VALUE);
12585  if (error != NO_ERROR)
12586  {
12587  return error;
12588  }
12589  assert (clear_key == false);
12590 
12591  key_len = btree_get_disk_size_of_key (&key);
12592  pr_midxkey_remove_prefix (&key, diff_column);
12593  new_key_len = btree_get_disk_size_of_key (&key);
12594 
12595  new_key_len = DB_ALIGN (new_key_len, INT_ALIGNMENT);
12596  key_len = DB_ALIGN (key_len, INT_ALIGNMENT);
12597 
12598  new_offset = offset + new_key_len - key_len;
12599 
12600  if (new_offset != offset)
12601  {
12602  /* move the remaining part of record */
12603  memmove (rec.data + new_offset, rec.data + offset, rec.length - offset);
12604  rec.length = new_offset + (rec.length - offset);
12605  }
12606 
12607 #if !defined (NDEBUG)
12608  btree_check_valid_record (thread_p, btid, &rec, BTREE_LEAF_NODE, &key);
12609 #endif
12610 
12611  spage_update (thread_p, page_ptr, i, &rec);
12612  btree_clear_key_value (&clear_key, &key);
12613  }
12614 
12615 #if !defined(NDEBUG)
12616  btree_verify_node (thread_p, btid, page_ptr);
12617 #endif
12618 
12619  return error;
12620 }
12621 
12622 /*
12623  * btree_split_node () -
12624  * return: NO_ERROR
12625  * child_vpid is set to page identifier for the child page to be
12626  * followed, Q or R, or the page identifier of a newly allocated
12627  * page to insert the key, or NULL_PAGEID. The parameter key is
12628  * set to the middle key that will be put into the parent page P.
12629  * btid(in): The index identifier
12630  * P(in): Page pointer for the parent page of page Q
12631  * Q(in): Page pointer for the page to be split
12632  * R(in): Page pointer for the newly allocated page
12633  * next_page(in):
12634  * P_vpid(in): Page identifier for page Q
12635  * Q_vpid(in): Page identifier for page Q
12636  * R_vpid(in): Page identifier for page R
12637  * p_slot_id(in): The slot of parent page P which points to page Q
12638  * node_type(in): shows whether page Q is a leaf page, or not
12639  * key(in): the key caller is trying to follow
12640  * helper(in): B-tree insert helper structure
12641  * child_vpid(out): Set to the child page identifier based on key
12642  *
12643  * Note: Page Q is split into two pages: Q and R. The second half of
12644  * of the page Q is move to page R. The middle key of of the
12645  * split operation is moved to parent page P. Depending on the
12646  * split point, the whole page Q may be moved to page R, or the
12647  * whole page content may be kept in page Q. If the key can not
12648  * fit into one of the pages after the split, a new page is
12649  * allocated for the key and its page identifier is returned.
12650  * The headers of all pages are updated, accordingly.
12651  */
12652 static int
12653 btree_split_node (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR P, PAGE_PTR Q, PAGE_PTR R, VPID * P_vpid,
12654  VPID * Q_vpid, VPID * R_vpid, INT16 p_slot_id, BTREE_NODE_TYPE node_type, DB_VALUE * key,
12655  BTREE_INSERT_HELPER * helper, VPID * child_vpid)
12656 {
12657  int key_cnt, leftcnt, rightcnt;
12658  RECDES peek_rec, rec;
12659  NON_LEAF_REC nleaf_rec;
12660  BTREE_NODE_HEADER *pheader = NULL, *qheader = NULL;
12661  BTREE_NODE_HEADER right_header_info, *rheader = NULL;
12662  int i, j, c;
12663  int sep_key_len, key_len;
12664  bool clear_sep_key;
12665  DB_VALUE *sep_key;
12666  int ret = NO_ERROR;
12667  char rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
12668  int key_type;
12669 
12670  bool flag_fence_insert = false;
12671  OID dummy_oid = { NULL_PAGEID, 0, 0 };
12672  int leftsize, rightsize;
12673  VPID right_next_vpid;
12674  int right_max_key_len;
12675 
12676  /* for recovery purposes */
12677  char *p_redo_data;
12678  int p_redo_length;
12679  char p_redo_data_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
12680 
12681  PAGE_PTR page_after_right = NULL;
12682 
12683  rheader = &right_header_info;
12684 
12685  /***********************************************************
12686  *** STEP 0: initializations
12687  ***********************************************************/
12688  p_redo_data = NULL;
12689 
12690  rec.data = NULL;
12691 
12692  /* initialize child page identifier */
12693  VPID_SET_NULL (child_vpid);
12694  sep_key = NULL;
12695 
12696  /* Assert expected arguments. */
12697  assert (P != NULL);
12698  assert (Q != NULL);
12699  assert (R != NULL);
12700  assert (!VPID_ISNULL (P_vpid));
12701  assert (!VPID_ISNULL (Q_vpid));
12702  assert (!VPID_ISNULL (R_vpid));
12706 
12707 #if !defined(NDEBUG)
12709  {
12710  printf ("btree_split_node: P{%d, %d}, Q{%d, %d}, R{%d, %d}\n", P_vpid->volid, P_vpid->pageid, Q_vpid->volid,
12711  Q_vpid->pageid, R_vpid->volid, R_vpid->pageid);
12712  }
12713 #endif
12714 
12715 #if !defined(NDEBUG)
12716  btree_verify_node (thread_p, btid, P);
12717  btree_verify_node (thread_p, btid, Q);
12718 #endif
12719  rec.area_size = DB_PAGESIZE;
12720  rec.data = PTR_ALIGN (rec_buf, BTREE_MAX_ALIGN);
12721 
12722  key_cnt = btree_node_number_of_keys (thread_p, Q);
12723  if (key_cnt <= 0)
12724  {
12725  ASSERT_ERROR_AND_SET (ret);
12726  goto exit_on_error;
12727  }
12728 
12729 #if !defined(NDEBUG)
12731  {
12732  btree_split_test (thread_p, btid, key, Q_vpid, Q, node_type);
12733  }
12734 #endif
12735 
12736  /********************************************************************
12737  *** STEP 1: find split point & sep_key
12738  *** make fence key to be inserted
12739  ***
12740  *** find the middle record of the page Q and find the number of
12741  *** keys after split in pages Q and R, respectively
12742  ********************************************************************/
12743  qheader = btree_get_node_header (thread_p, Q);
12744  if (qheader == NULL)
12745  {
12746  assert_release (false);
12747  ret = ER_FAILED;
12748  goto exit_on_error;
12749  }
12750 
12751  sep_key = btree_find_split_point (thread_p, btid, Q, &leftcnt, key, helper, &clear_sep_key);
12752  if (sep_key == NULL || DB_IS_NULL (sep_key))
12753  {
12754  er_log_debug (ARG_FILE_LINE, "btree_split_node: Null middle key after split. Operation Ignored.\n");
12755  ASSERT_ERROR_AND_SET (ret);
12756  goto exit_on_error;
12757  }
12758  assert (leftcnt <= key_cnt && leftcnt >= 0);
12759 
12760  /* make fence record */
12761  if (node_type == BTREE_LEAF_NODE)
12762  {
12763  PR_TYPE *pr_type = btid->key_type->type;
12764  sep_key_len = pr_type->get_index_size_of_value (sep_key);
12765 
12766  if (sep_key_len < BTREE_MAX_KEYLEN_INPAGE && sep_key_len <= qheader->max_key_len)
12767  {
12768  ret =
12769  btree_write_record (thread_p, btid, NULL, sep_key, BTREE_LEAF_NODE, BTREE_NORMAL_KEY, sep_key_len, false,
12770  &btid->topclass_oid, &dummy_oid, NULL, &rec);
12771  if (ret != NO_ERROR)
12772  {
12773  ASSERT_ERROR ();
12774  goto exit_on_error;
12775  }
12776 
12778 
12779  flag_fence_insert = true;
12780  }
12781  else
12782  {
12783  /* do not insert fence key if sep_key is overflow key */
12784  flag_fence_insert = false;
12785  }
12786  }
12787 
12789  {
12790  flag_fence_insert = false;
12791  }
12792 
12793  rightcnt = key_cnt - leftcnt;
12794 
12795  /*********************************************************************
12796  *** STEP 2: save undo image of Q
12797  *** update Q, R header info
12798  *********************************************************************/
12799  /* add undo logging for page Q */
12800  log_append_undo_data2 (thread_p, RVBT_COPYPAGE, &btid->sys_btid->vfid, Q, -1, DB_PAGESIZE, Q);
12801 
12802  /* We may need to update the max_key length if the mid key is larger than the max key length. This can happen due to
12803  * disk padding when the prefix key length approaches the fixed key length. */
12804  sep_key_len = btree_get_disk_size_of_key (sep_key);
12805  sep_key_len = BTREE_GET_KEY_LEN_IN_PAGE (sep_key_len);
12806  qheader->max_key_len = MAX (sep_key_len, qheader->max_key_len);
12807 
12808  /* set rheader max_key_len as qheader max_key_len */
12809  right_max_key_len = qheader->max_key_len;
12810  right_next_vpid = qheader->next_vpid;
12811 
12812  if (node_type == BTREE_LEAF_NODE)
12813  {
12814  qheader->next_vpid = *R_vpid;
12815  }
12816  else
12817  {
12818  VPID_SET_NULL (&qheader->next_vpid);
12819  }
12820 
12821  if (leftcnt == 0)
12822  {
12823  /* Only key length will exist in page. Set max key length. */
12824  /* Max key length would have been set when key is inserted. However, we set it here to suppress assert of
12825  * btree_verify_node. */
12826  qheader->max_key_len = BTREE_GET_KEY_LEN_IN_PAGE (btree_get_disk_size_of_key (key));
12827  }
12828 
12829  qheader->split_info.index = 1;
12830 
12831  rheader->node_level = qheader->node_level;
12832  rheader->max_key_len = right_max_key_len;
12833  if (key_cnt - leftcnt == 0 && flag_fence_insert == false)
12834  {
12835  /* Only key length will exist in page. Set max key length. */
12836  /* Max key length would have been set when key is inserted. However, we set it here to suppress assert of
12837  * btree_verify_node. */
12839  }
12840 
12841  rheader->next_vpid = right_next_vpid;
12842 
12843  if (node_type == BTREE_LEAF_NODE)
12844  {
12845  rheader->prev_vpid = *Q_vpid;
12846  }
12847  else
12848  {
12849  VPID_SET_NULL (&(rheader->prev_vpid));
12850  }
12851 
12852  rheader->split_info = qheader->split_info;
12853 
12855 
12856  ret = btree_init_node_header (thread_p, &btid->sys_btid->vfid, R, rheader, false);
12857  if (ret != NO_ERROR)
12858  {
12859  ASSERT_ERROR ();
12860  goto exit_on_error;
12861  }
12862 
12863  /*******************************************************************
12864  *** STEP 3: move second half of page Q to page R
12865  *** insert fence key to Q
12866  *** make redo image for Q
12867  *******************************************************************/
12868  /* lower fence key for R */
12869  rightsize = 0;
12870  j = 1;
12871  if (flag_fence_insert == true)
12872  {
12873  rightsize = j;
12874  assert (j > 0);
12875  if (spage_insert_at (thread_p, R, j++, &rec) != SP_SUCCESS)
12876  {
12877  ret = ER_FAILED;
12878  goto exit_on_error;
12879  }
12880  }
12881 
12882  /* move the second half of page Q to page R */
12883  for (i = 1; i <= rightcnt; i++, j++)
12884  {
12885  assert (leftcnt + 1 > 0);
12886  if (spage_get_record (thread_p, Q, leftcnt + 1, &peek_rec, PEEK) != S_SUCCESS)
12887  {
12888  assert_release (false);
12889  ret = ER_FAILED;
12890  goto exit_on_error;
12891  }
12892 
12893  assert (j > 0);
12894  if (spage_insert_at (thread_p, R, j, &peek_rec) != SP_SUCCESS)
12895  {
12896  assert_release (false);
12897  ret = ER_FAILED;
12898  goto exit_on_error;
12899  }
12900 
12901  rightsize = j;
12902 
12903  assert (leftcnt + 1 > 0);
12904  if (spage_delete (thread_p, Q, leftcnt + 1) != leftcnt + 1)
12905  {
12906  assert_release (false);
12907  ret = ER_FAILED;
12908  goto exit_on_error;
12909  }
12910  }
12911 
12912  leftsize = leftcnt;
12913  /* upper fence key for Q */
12914  if (flag_fence_insert == true)
12915  {
12916  assert (leftcnt + 1 > 0);
12917  if (spage_insert_at (thread_p, Q, leftcnt + 1, &rec) != SP_SUCCESS)
12918  {
12919  assert_release (false);
12920  ret = ER_FAILED;
12921  goto exit_on_error;
12922  }
12923  leftsize++;
12924  }
12925 
12927 
12928  ret = btree_compress_node (thread_p, btid, Q);
12929  if (ret != NO_ERROR)
12930  {
12931  ASSERT_ERROR ();
12932  goto exit_on_error;
12933  }
12934 
12935  /* add redo logging for page Q */
12936  log_append_redo_data2 (thread_p, RVBT_COPYPAGE, &btid->sys_btid->vfid, Q, -1, DB_PAGESIZE, Q);
12937 
12938  /***************************************************************************
12939  *** STEP 4: add redo log for R
12940  *** Log the second half of page Q for redo purposes on Page R,
12941  *** the records on the second half of page Q will be inserted to page R
12942  ***************************************************************************/
12943 
12944  ret = btree_compress_node (thread_p, btid, R);
12945  if (ret != NO_ERROR)
12946  {
12947  ASSERT_ERROR ();
12948  goto exit_on_error;
12949  }
12950 
12951  log_append_redo_data2 (thread_p, RVBT_COPYPAGE, &btid->sys_btid->vfid, R, -1, DB_PAGESIZE, R);
12952 
12954 
12955  /****************************************************************************
12956  *** STEP 5: insert sep_key to P
12957  *** add undo/redo log for page P
12958  ***
12959  *** update the parent page P to keep the middle key and to point to
12960  *** pages Q and R. Remember that this mid key will be on a non leaf page
12961  *** regardless of whether we are splitting a leaf or non leaf page.
12962  ****************************************************************************/
12963  nleaf_rec.pnt = *R_vpid;
12964  key_len = btree_get_disk_size_of_key (sep_key);
12965  if (key_len < BTREE_MAX_KEYLEN_INPAGE)
12966  {
12967  key_type = BTREE_NORMAL_KEY;
12968  nleaf_rec.key_len = key_len;
12969  }
12970  else
12971  {
12972  key_type = BTREE_OVERFLOW_KEY;
12973  nleaf_rec.key_len = -1;
12974  }
12975 
12976  ret =
12977  btree_write_record (thread_p, btid, &nleaf_rec, sep_key, BTREE_NON_LEAF_NODE, key_type, key_len, false, NULL, NULL,
12978  NULL, &rec);
12979  if (ret != NO_ERROR)
12980  {
12981  ASSERT_ERROR ();
12982  goto exit_on_error;
12983  }
12984 
12985  p_slot_id++;
12986 
12988 
12989  /* add undo/redo logging for page P */
12990  assert (p_slot_id > 0);
12991  if (spage_insert_at (thread_p, P, p_slot_id, &rec) != SP_SUCCESS)
12992  {
12993  assert_release (false);
12994  ret = ER_FAILED;
12995  goto exit_on_error;
12996  }
12997 
12998  p_redo_data = PTR_ALIGN (p_redo_data_buf, BTREE_MAX_ALIGN);
12999 
13000  btree_rv_write_log_record (p_redo_data, &p_redo_length, &rec, BTREE_NON_LEAF_NODE);
13001  log_append_undoredo_data2 (thread_p, RVBT_NDRECORD_INS, &btid->sys_btid->vfid, P, p_slot_id, sizeof (p_slot_id),
13002  p_redo_length, &p_slot_id, p_redo_data);
13003 
13005 
13006  key_cnt = btree_node_number_of_keys (thread_p, P);
13007  assert_release (key_cnt > 0);
13008 
13009  pheader = btree_get_node_header (thread_p, P);
13010  if (pheader == NULL)
13011  {
13012  assert_release (false);
13013  ret = ER_FAILED;
13014  goto exit_on_error;
13015  }
13016 
13017  assert_release (pheader->split_info.pivot >= 0);
13018 
13019  btree_node_header_undo_log (thread_p, &btid->sys_btid->vfid, P);
13020 
13021  btree_split_next_pivot (&pheader->split_info, (float) p_slot_id / key_cnt, key_cnt);
13022 
13023  /* We may need to update the max_key length if the mid key is larger than the max key length. This can happen due to
13024  * disk padding when the prefix key length approaches the fixed key length. */
13025  sep_key_len = btree_get_disk_size_of_key (sep_key);
13026  sep_key_len = BTREE_GET_KEY_LEN_IN_PAGE (sep_key_len);
13027  pheader->max_key_len = MAX (sep_key_len, pheader->max_key_len);
13028 
13029  btree_node_header_redo_log (thread_p, &btid->sys_btid->vfid, P);
13030 
13031  /* find the child page to be followed */
13032  c = btree_compare_key (key, sep_key, btid->key_type, 1, 1, NULL);
13033  assert (c == DB_LT || c == DB_EQ || c == DB_GT);
13034 
13035  if (c == DB_UNK)
13036  {
13037  assert_release (false);
13038  ret = ER_FAILED;
13039  goto exit_on_error;
13040  }
13041  else if (c < 0)
13042  {
13043  /* set child page pointer */
13044  *child_vpid = *Q_vpid;
13045  }
13046  else
13047  {
13048  /* set child page pointer */
13049  *child_vpid = *R_vpid;
13050  }
13051 
13052  /* TODO : update child_vpid max_key_len */
13053  if (sep_key)
13054  {
13055  btree_clear_key_value (&clear_sep_key, sep_key);
13056  db_private_free_and_init (thread_p, sep_key);
13057  }
13058 
13059  pgbuf_set_dirty (thread_p, P, DONT_FREE);
13060  pgbuf_set_dirty (thread_p, Q, DONT_FREE);
13061  pgbuf_set_dirty (thread_p, R, DONT_FREE);
13062 
13063  if (rheader->node_level == 1)
13064  {
13065  /* Since leaf level can be processed in reversed order, we need to update the prev link of next page after the
13066  * one that was split. */
13067  page_after_right = btree_get_next_page (thread_p, R);
13068  if (page_after_right != NULL)
13069  {
13070  ret = btree_set_vpid_previous_vpid (thread_p, btid, page_after_right, R_vpid);
13071  /* We don't expect any errors here. */
13072  assert (ret == NO_ERROR);
13073  pgbuf_unfix_and_init (thread_p, page_after_right);
13074  }
13075  }
13076 
13078 
13080 
13081 #if !defined(NDEBUG)
13082  btree_verify_node (thread_p, btid, P);
13083  btree_verify_node (thread_p, btid, Q);
13084  btree_verify_node (thread_p, btid, R);
13085 #endif
13086 
13087  return ret;
13088 
13089 exit_on_error:
13090 
13091  if (sep_key)
13092  {
13093  btree_clear_key_value (&clear_sep_key, sep_key);
13094  db_private_free_and_init (thread_p, sep_key);
13095  }
13096 
13097  assert (ret != NO_ERROR);
13098  return ret;
13099 }
13100 
13101 #if !defined(NDEBUG)
13102 /*
13103  * btree_set_split_point () -
13104  * return: the key or key separator (prefix) to be moved to the
13105  * parent page, or NULL_KEY. The length of the returned
13106  * key, or prefix, is set in mid_keylen. The parameter
13107  * mid_slot is set to the record number of the split point record.
13108  * btid(in):
13109  * page_ptr(in): Pointer to the page
13110  * mid_slot(in): Set to contain the record number for the split point slot
13111  * key(in): Key to be inserted to the index
13112  * clear_midkey(in):
13113  *
13114  */
13115 static DB_VALUE *
13116 btree_set_split_point (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR page_ptr, INT16 mid_slot, DB_VALUE * key,
13117  bool * clear_midkey)
13118 {
13119  RECDES rec;
13120  BTREE_NODE_HEADER *header = NULL;
13121  BTREE_NODE_TYPE node_type;
13122  INT16 slot_id;
13123  int key_cnt, offset;
13124  bool m_clear_key, n_clear_key;
13125  DB_VALUE *mid_key = NULL, *next_key = NULL, *prefix_key = NULL, *tmp_key;
13126  NON_LEAF_REC nleaf_pnt;
13127  LEAF_REC leaf_pnt;
13128  BTREE_SEARCH_KEY_HELPER search_key;
13129 
13130  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
13131  if (key_cnt <= 0)
13132  {
13133  assert (false);
13134  }
13135 
13136  /* get the page header */
13137  header = btree_get_node_header (thread_p, page_ptr);
13138  if (header == NULL)
13139  {
13140  assert (false);
13141  }
13142 
13143  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
13144 
13145  /* find the slot position of the key if it is to be located in the page */
13146  if (node_type == BTREE_LEAF_NODE)
13147  {
13148  if (btree_search_leaf_page (thread_p, btid, page_ptr, key, &search_key) != NO_ERROR)
13149  {
13150  assert (false);
13151  }
13152  slot_id = search_key.slotid;
13153  if (slot_id == NULL_SLOTID) /* leaf search failed */
13154  {
13155  assert (false);
13156  }
13157  }
13158  else
13159  {
13160  slot_id = NULL_SLOTID;
13161  }
13162 
13163  mid_key = (DB_VALUE *) db_private_alloc (thread_p, sizeof (DB_VALUE));
13164  if (mid_key == NULL)
13165  {
13166  assert (false);
13167  }
13168 
13169  btree_init_temp_key_value (&m_clear_key, mid_key);
13170 
13171  /* the split key is one of the keys on the page */
13172  assert (mid_slot > 0);
13173  if (spage_get_record (thread_p, page_ptr, mid_slot, &rec, PEEK) != S_SUCCESS)
13174  {
13175  assert (false);
13176  }
13177 
13178  /* we copy the key here because rec lives on the stack and mid_key is returned from this routine. */
13179  if (node_type == BTREE_LEAF_NODE)
13180  {
13181  if (btree_read_record (thread_p, btid, page_ptr, &rec, mid_key, (void *) &leaf_pnt, node_type, &m_clear_key,
13182  &offset, COPY_KEY_VALUE, NULL) != NO_ERROR)
13183  {
13184  assert (false);
13185  }
13186  }
13187  else
13188  {
13189  if (btree_read_record (thread_p, btid, page_ptr, &rec, mid_key, (void *) &nleaf_pnt, node_type, &m_clear_key,
13190  &offset, COPY_KEY_VALUE, NULL) != NO_ERROR)
13191  {
13192  assert (false);
13193  }
13194  }
13195 
13196  /* The determination of the prefix key is dependent on the next key */
13197  next_key = (DB_VALUE *) db_private_alloc (thread_p, sizeof (DB_VALUE));
13198  if (next_key == NULL)
13199  {
13200  assert (false);
13201  }
13202 
13203  btree_init_temp_key_value (&n_clear_key, next_key);
13204 
13205  if (mid_slot == key_cnt && slot_id == (key_cnt + 1))
13206  {
13207  /* the next key is the new key, we don't have to read it */
13208  n_clear_key = true;
13209  if (pr_clone_value (key, next_key) != NO_ERROR)
13210  {
13211  assert (false);
13212  }
13213  }
13214  else
13215  {
13216  /* The next key is one of the keys on the page */
13217  assert (mid_slot + 1 > 0);
13218  if (spage_get_record (thread_p, page_ptr, mid_slot + 1, &rec, PEEK) != S_SUCCESS)
13219  {
13220  assert (false);
13221  }
13222 
13223  /* we copy the key here because rec lives on the stack and mid_key is returned from this routine. */
13224  if (node_type == BTREE_LEAF_NODE)
13225  {
13226  if (btree_read_record (thread_p, btid, page_ptr, &rec, next_key, (void *) &leaf_pnt, node_type, &n_clear_key,
13227  &offset, COPY_KEY_VALUE, NULL) != NO_ERROR)
13228  {
13229  assert (false);
13230  }
13231  }
13232  else
13233  {
13234  if (btree_read_record (thread_p, btid, page_ptr, &rec, next_key, (void *) &nleaf_pnt, node_type, &n_clear_key,
13235  &offset, COPY_KEY_VALUE, NULL) != NO_ERROR)
13236  {
13237  assert (false);
13238  }
13239  }
13240  }
13241 
13242  /* now that we have the mid key and the next key, we can determine the prefix key. */
13243 
13244  prefix_key = (DB_VALUE *) db_private_alloc (thread_p, sizeof (DB_VALUE));
13245  if (prefix_key == NULL)
13246  {
13247  assert (false);
13248  }
13249 
13250  /* Check if we can make use of prefix keys. We can't use them in the upper levels of the trees because the algorithm
13251  * will fall apart. We can only use them when splitting a leaf page. */
13252  if (node_type == BTREE_LEAF_NODE)
13253  {
13254  if (btree_get_prefix_separator (mid_key, next_key, prefix_key, btid->key_type) != NO_ERROR)
13255  {
13256  assert (false);
13257  }
13258  }
13259  else
13260  {
13261  /* return the next_key */
13262  pr_clone_value (next_key, prefix_key);
13263  }
13264 
13265  *clear_midkey = true; /* we must always clear prefix keys */
13266 
13267  /* replace the mid_key with the prefix_key */
13268  tmp_key = mid_key;
13269  mid_key = prefix_key;
13270  prefix_key = tmp_key; /* this makes sure we clear/free the old mid key */
13271 
13272  if (next_key)
13273  {
13274  btree_clear_key_value (&n_clear_key, next_key);
13275  db_private_free_and_init (thread_p, next_key);
13276  }
13277  if (prefix_key)
13278  {
13279  pr_clear_value (prefix_key);
13280  db_private_free_and_init (thread_p, prefix_key);
13281  }
13282 
13283  return mid_key;
13284 }
13285 
13286 /*
13287  * btree_split_test () -
13288  *
13289  * btid(in):
13290  * key(in):
13291  * S_vpid(in):
13292  * S_page(in):
13293  * node_type(in):
13294  */
13295 static void
13296 btree_split_test (THREAD_ENTRY * thread_p, BTID_INT * btid, DB_VALUE * key, VPID * S_vpid, PAGE_PTR S_page,
13297  BTREE_NODE_TYPE node_type)
13298 {
13299  RECDES rec, peek_rec;
13300  int i, j, key_cnt, lcnt, rcnt, sep_key_len, ret;
13301  PAGE_PTR L_page, R_page;
13302  VPID L_vpid, R_vpid;
13303  BTREE_NODE_HEADER header_info, *header = NULL;
13304  DB_VALUE *sep_key;
13305  bool fence_insert = false;
13306  bool clear_sep_key = true;
13307  OID dummy_oid = { NULL_PAGEID, 0, 0 };
13308  char rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
13309 
13310  log_sysop_start (thread_p);
13311 
13312  header = &header_info;
13313 
13314  rec.area_size = DB_PAGESIZE;
13315  rec.data = PTR_ALIGN (rec_buf, BTREE_MAX_ALIGN);
13316 
13317  key_cnt = btree_node_number_of_keys (thread_p, S_page);
13318  assert (key_cnt > 0);
13319 
13320  L_page = btree_get_new_page (thread_p, btid, &L_vpid, S_vpid);
13321  R_page = btree_get_new_page (thread_p, btid, &R_vpid, S_vpid);
13322 
13323  /* dummy header */
13324  memset (header, 0, sizeof (BTREE_NODE_HEADER));
13325  btree_init_node_header (thread_p, &btid->sys_btid->vfid, L_page, header, false);
13326  btree_init_node_header (thread_p, &btid->sys_btid->vfid, R_page, header, false);
13327 
13328  for (lcnt = 1; lcnt < key_cnt; i++)
13329  {
13330  fence_insert = false;
13331  sep_key = btree_set_split_point (thread_p, btid, S_page, lcnt, key, &clear_sep_key);
13332  assert (sep_key != NULL);
13333 
13334  if (node_type == BTREE_LEAF_NODE)
13335  {
13336  PR_TYPE *pr_type;
13337 
13338  pr_type = btid->key_type->type;
13339  sep_key_len = pr_type->get_index_size_of_value (sep_key);
13340 
13341  if (sep_key_len < BTREE_MAX_KEYLEN_INPAGE)
13342  {
13343  ret =
13344  btree_write_record (thread_p, btid, NULL, sep_key, BTREE_LEAF_NODE, BTREE_NORMAL_KEY, sep_key_len,
13345  false, &btid->topclass_oid, &dummy_oid, NULL, &rec);
13346 
13348  fence_insert = true;
13349  }
13350  }
13351 
13352  rcnt = key_cnt - lcnt;
13353 
13354  /* Right page test */
13355  j = 1;
13356  /* lower fence key for Right */
13357  if (fence_insert == true)
13358  {
13359  assert (j > 0);
13360  ret = spage_insert_at (thread_p, R_page, j++, &rec);
13361  if (ret != SP_SUCCESS)
13362  {
13363  assert (false);
13364  }
13365  }
13366 
13367  /* move the second half of page P to page R */
13368  for (i = 1; i <= rcnt; i++, j++)
13369  {
13370  assert (lcnt + i > 0);
13371  ret = spage_get_record (thread_p, S_page, lcnt + i, &peek_rec, PEEK);
13372  if (ret != S_SUCCESS)
13373  {
13374  assert (false);
13375  }
13376 
13377  assert (j > 0);
13378  ret = spage_insert_at (thread_p, R_page, j, &peek_rec);
13379  if (ret != SP_SUCCESS)
13380  {
13381  assert (false);
13382  }
13383  }
13384 
13385  /* Left page test */
13386  for (i = 1; i <= lcnt; i++)
13387  {
13388  ret = spage_get_record (thread_p, S_page, i, &peek_rec, PEEK);
13389  if (ret != S_SUCCESS)
13390  {
13391  assert (false);
13392  }
13393 
13394  ret = spage_insert_at (thread_p, L_page, i, &peek_rec);
13395  if (ret != SP_SUCCESS)
13396  {
13397  assert (false);
13398  }
13399  }
13400 
13401  /* upper fence key for Left */
13402  if (fence_insert == true)
13403  {
13404  assert (i > 0);
13405  ret = spage_insert_at (thread_p, L_page, i, &rec);
13406  if (ret != SP_SUCCESS)
13407  {
13408  assert (false);
13409  }
13410  }
13411 
13412  /* clean up */
13413  if (fence_insert == true)
13414  {
13415  lcnt++, rcnt++;
13416  }
13417 
13418  assert (btree_node_number_of_keys (thread_p, L_page) == lcnt);
13419  assert (btree_node_number_of_keys (thread_p, R_page) == rcnt);
13420 
13421  for (i = 1; i <= lcnt; i++)
13422  {
13423  ret = spage_delete (thread_p, L_page, 1);
13424  if (ret != 1)
13425  {
13426  assert (false);
13427  }
13428  }
13429 
13430  for (i = 1; i <= rcnt; i++)
13431  {
13432  ret = spage_delete (thread_p, R_page, 1);
13433  if (ret != 1)
13434  {
13435  assert (false);
13436  }
13437  }
13438 
13439  assert (btree_node_number_of_keys (thread_p, L_page) == 0);
13440  assert (btree_node_number_of_keys (thread_p, R_page) == 0);
13441 
13442  btree_clear_key_value (&clear_sep_key, sep_key);
13443  db_private_free_and_init (thread_p, sep_key);
13444  }
13445 
13446  pgbuf_unfix_and_init (thread_p, L_page);
13447  pgbuf_unfix_and_init (thread_p, R_page);
13448 
13449  /* this was just a test, abort all changes */
13450  log_sysop_abort (thread_p);
13451 }
13452 #endif
13453 
13454 /*
13455  * btree_split_root () -
13456  * return: NO_ERROR
13457  * child_vpid parameter is set to the child page to be followed
13458  * after the split operation, or the page identifier of a newly
13459  * allocated page for future key insertion, or NULL_PAGEID.
13460  * The parameter key is set to the middle key of the split operation.
13461  * btid(in): B+tree index identifier
13462  * P(in): Page pointer for the root to be split
13463  * Q(in): Page pointer for the newly allocated page
13464  * R(in): Page pointer for the newly allocated page
13465  * P_vpid(in): Page identifier for root page P
13466  * Q_vpid(in): Page identifier for page Q
13467  * R_vpid(in): Page identifier for page R
13468  * node_type(in): shows whether root is currently a leaf page, or not
13469  * key(in): the key caller is trying to follow
13470  * helper(in): B-tree insert helper structure
13471  * child_vpid(out): Set to the child page identifier based on key.
13472  *
13473  * Note: The root page P is split into two pages: Q and R. In order
13474  * not to change the actual root page, the first half of the page
13475  * is moved to page Q and the second half is moved to page R.
13476  * Depending on the split point found, the whole root page may be
13477  * moved to Q, or R, leaving the other one empty for future key
13478  * insertion. If the key cannot fit into either Q or R after the
13479  * split, a new page is allocated and its page identifier is
13480  * returned. Two new records are formed within root page to point
13481  * to pages Q and R. The headers of all pages are updated.
13482  */
13483 static int
13484 btree_split_root (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR P, PAGE_PTR Q, PAGE_PTR R, VPID * P_vpid,
13485  VPID * Q_vpid, VPID * R_vpid, BTREE_NODE_TYPE node_type, DB_VALUE * key, BTREE_INSERT_HELPER * helper,
13486  VPID * child_vpid)
13487 {
13488  int key_cnt, leftcnt, rightcnt;
13489  RECDES rec, peek_rec;
13490  NON_LEAF_REC nleaf_rec;
13491  BTREE_ROOT_HEADER *pheader = NULL;
13492  BTREE_NODE_HEADER q_header_info, *qheader = NULL;
13493  BTREE_NODE_HEADER r_header_info, *rheader = NULL;
13494  int i, j, c;
13495  int sep_key_len, key_len;
13496  bool clear_sep_key;
13497  DB_VALUE *sep_key;
13498  DB_VALUE *neg_inf_key = NULL;
13499  char *recset_data; /* for recovery purposes */
13500  RECSET_HEADER recset_header; /* for recovery purposes */
13501  int recset_length; /* for recovery purposes */
13502  int sp_success;
13503  PGLENGTH log_addr_offset;
13504  int ret = NO_ERROR;
13505  char rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
13506  char recset_data_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
13507  int key_type;
13508  BTREE_NODE_SPLIT_INFO split_info;
13509  int node_level;
13510  bool flag_fence_insert = false;
13511  OID dummy_oid = { NULL_PAGEID, 0, 0 };
13512  int leftsize, rightsize;
13513 
13514  qheader = &q_header_info;
13515  rheader = &r_header_info;
13516 
13517  /***********************************************************
13518  *** STEP 0: initializations
13519  ***********************************************************/
13520  recset_data = NULL;
13521  rec.data = NULL;
13522 
13523  /* initialize child page identifier */
13524  VPID_SET_NULL (child_vpid);
13525  sep_key = NULL;
13526 
13527 #if !defined(NDEBUG)
13528  if ((!P || !Q || !R) || VPID_ISNULL (P_vpid) || VPID_ISNULL (Q_vpid) || VPID_ISNULL (R_vpid))
13529  {
13530  goto exit_on_error;
13531  }
13532 #endif
13533 
13534 #if !defined(NDEBUG)
13536  {
13537  printf ("btree_split_root: P{%d, %d}, Q{%d, %d}, R{%d, %d}\n", P_vpid->volid, P_vpid->pageid, Q_vpid->volid,
13538  Q_vpid->pageid, R_vpid->volid, R_vpid->pageid);
13539  }
13540 #endif
13541 
13542 #if !defined(NDEBUG)
13543  btree_verify_node (thread_p, btid, P);
13544 #endif
13545 
13546  /* initializations */
13547  rec.area_size = DB_PAGESIZE;
13548  rec.data = PTR_ALIGN (rec_buf, BTREE_MAX_ALIGN);
13549 
13550  /* log the whole root page P for undo purposes. */
13551  log_append_undo_data2 (thread_p, RVBT_COPYPAGE, &btid->sys_btid->vfid, P, -1, DB_PAGESIZE, P);
13552 
13553  /* get the number of keys in the root page P */
13554  key_cnt = btree_node_number_of_keys (thread_p, P);
13555  if (key_cnt <= 0)
13556  {
13557  goto exit_on_error;
13558  }
13559 
13560 #if !defined(NDEBUG)
13561  node_level = btree_get_node_level (thread_p, P);
13562  assert (node_level >= 1);
13563 #endif
13564 
13565  pheader = btree_get_root_header (thread_p, P);
13566  if (pheader == NULL)
13567  {
13568  goto exit_on_error;
13569  }
13570 
13571  split_info = pheader->node.split_info;
13572  split_info.index = 1;
13573 
13574 #if !defined(NDEBUG)
13576  {
13577  btree_split_test (thread_p, btid, key, P_vpid, P, node_type);
13578  }
13579 #endif
13580 
13581  /********************************************************************
13582  *** STEP 1: find split point & sep_key
13583  *** make fence key to be inserted
13584  ***
13585  *** find the middle record of the page Q and find the number of
13586  *** keys after split in pages Q and R, respectively
13587  ********************************************************************/
13588 
13589  sep_key = btree_find_split_point (thread_p, btid, P, &leftcnt, key, helper, &clear_sep_key);
13590  if (sep_key == NULL || DB_IS_NULL (sep_key))
13591  {
13592  er_log_debug (ARG_FILE_LINE, "btree_split_root: Null middle key after split. Operation Ignored.\n");
13593  goto exit_on_error;
13594  }
13595  assert (leftcnt <= key_cnt && leftcnt >= 0);
13596 
13597  /* make fence record */
13598  if (node_type == BTREE_LEAF_NODE)
13599  {
13600  PR_TYPE *pr_type;
13601 
13602  pr_type = btid->key_type->type;
13603 
13604  sep_key_len = pr_type->get_index_size_of_value (sep_key);
13605 
13606  if (sep_key_len < BTREE_MAX_KEYLEN_INPAGE && sep_key_len <= pheader->node.max_key_len)
13607  {
13608  ret =
13609  btree_write_record (thread_p, btid, NULL, sep_key, BTREE_LEAF_NODE, BTREE_NORMAL_KEY, sep_key_len, false,
13610  &btid->topclass_oid, &dummy_oid, NULL, &rec);
13611  if (ret != NO_ERROR)
13612  {
13613  goto exit_on_error;
13614  }
13615 
13617  flag_fence_insert = true;
13618  }
13619  else
13620  {
13621  /* do not insert fence key if sep_key is overflow key */
13622  flag_fence_insert = false;
13623  }
13624  }
13625 
13627  {
13628  flag_fence_insert = false;
13629  }
13630 
13631  /* neg-inf key is dummy key which is not used in comparison so set it as sep_key */
13632  neg_inf_key = sep_key;
13633 
13634  rightcnt = key_cnt - leftcnt;
13635 
13636  /*********************************************************************
13637  *** STEP 2: update P, Q, R header info
13638  *********************************************************************/
13639  /* update page P header */
13640  pheader->node.node_level++;
13641 
13642  /* We may need to update the max_key length if the sep key is larger than the max key length. This can happen due to
13643  * disk padding when the prefix key length approaches the fixed key length. */
13644  sep_key_len = btree_get_disk_size_of_key (sep_key);
13645  sep_key_len = BTREE_GET_KEY_LEN_IN_PAGE (sep_key_len);
13646  pheader->node.max_key_len = MAX (sep_key_len, pheader->node.max_key_len);
13648 
13649  btree_node_header_redo_log (thread_p, &btid->sys_btid->vfid, P);
13650 
13651  /* update page Q header */
13652  qheader->node_level = pheader->node.node_level - 1;
13653  qheader->max_key_len = pheader->node.max_key_len;
13654  if (leftcnt == 0 && flag_fence_insert == false)
13655  {
13656  /* Only key length will exist in page. Set max key length. */
13657  /* Max key length would have been set when key is inserted. However, we set it here to suppress assert of
13658  * btree_verify_node. */
13660  }
13661 
13662  VPID_SET_NULL (&(qheader->prev_vpid)); /* non leaf or first leaf node */
13663 
13664  if (node_type == BTREE_LEAF_NODE)
13665  {
13666  qheader->next_vpid = *R_vpid;
13667  }
13668  else
13669  {
13670  VPID_SET_NULL (&(qheader->next_vpid));
13671  }
13672 
13673  qheader->split_info = split_info;
13674 
13675  if (btree_init_node_header (thread_p, &btid->sys_btid->vfid, Q, qheader, true) != NO_ERROR)
13676  {
13677  goto exit_on_error;
13678  }
13679 
13680  /* update page R header */
13681  rheader->node_level = pheader->node.node_level - 1;
13682  rheader->max_key_len = pheader->node.max_key_len;
13683  if (key_cnt - leftcnt == 0 && flag_fence_insert == false)
13684  {
13685  /* Only key length will exist in page. Set max key length. */
13686  /* Max key length would have been set when key is inserted. However, we set it here to suppress assert of
13687  * btree_verify_node. */
13689  }
13690 
13691  VPID_SET_NULL (&(rheader->next_vpid)); /* non leaf or last leaf node */
13692 
13693  if (node_type == BTREE_LEAF_NODE)
13694  {
13695  rheader->prev_vpid = *Q_vpid;
13696  }
13697  else
13698  {
13699  VPID_SET_NULL (&(rheader->prev_vpid));
13700  }
13701 
13702  rheader->split_info = split_info;
13703 
13704  if (btree_init_node_header (thread_p, &btid->sys_btid->vfid, R, rheader, true) != NO_ERROR)
13705  {
13706  goto exit_on_error;
13707  }
13708 
13709 
13710  /*******************************************************************
13711  *** STEP 3: move second half of page P to page R
13712  *** insert fence key to R
13713  *** add undo / redo log for R
13714  *******************************************************************/
13715  /* move the second half of root page P to page R */
13716  assert (btree_node_number_of_keys (thread_p, P) == leftcnt + rightcnt);
13717 
13718  j = 1;
13719  /* lower fence key for page R */
13720  if (flag_fence_insert == true)
13721  {
13722  rightsize = j;
13723  assert (j > 0);
13724  if (spage_insert_at (thread_p, R, j++, &rec) != SP_SUCCESS)
13725  {
13726  goto exit_on_error;
13727  }
13728  }
13729 
13730  for (i = 1; i <= rightcnt; i++, j++)
13731  {
13732  assert (leftcnt + 1 > 0);
13733  if (spage_get_record (thread_p, P, leftcnt + 1, &peek_rec, PEEK) != S_SUCCESS)
13734  {
13735  goto exit_on_error;
13736  }
13737 
13738  assert (j > 0);
13739  sp_success = spage_insert_at (thread_p, R, j, &peek_rec);
13740  if (sp_success != SP_SUCCESS)
13741  {
13742  goto exit_on_error;
13743  }
13744  rightsize = j;
13745 
13746  assert (leftcnt + 1 > 0);
13747  if (spage_delete (thread_p, P, leftcnt + 1) != leftcnt + 1)
13748  {
13749  goto exit_on_error;
13750  }
13751  }
13752 
13753  /* for recovery purposes */
13754  recset_data = PTR_ALIGN (recset_data_buf, BTREE_MAX_ALIGN);
13755 
13756  /* Log page R records for redo purposes */
13757  ret = btree_rv_util_save_page_records (thread_p, R, 1, j - 1, 1, recset_data, &recset_length);
13758  if (ret != NO_ERROR)
13759  {
13760  goto exit_on_error;
13761  }
13762 
13763  log_append_redo_data2 (thread_p, RVBT_INS_PGRECORDS, &btid->sys_btid->vfid, R, -1, recset_length, recset_data);
13764 
13765 
13766  /*******************************************************************
13767  *** STEP 4: move first half of page P to page Q
13768  *** insert fence key to Q
13769  *** add undo / redo log for Q
13770  *******************************************************************/
13771  /* move the first half of root page P to page Q */
13772 
13773  for (i = 1; i <= leftcnt; i++)
13774  {
13775  if (spage_get_record (thread_p, P, 1, &peek_rec, PEEK) != S_SUCCESS)
13776  {
13777  goto exit_on_error;
13778  }
13779 
13780  sp_success = spage_insert_at (thread_p, Q, i, &peek_rec);
13781  if (sp_success != SP_SUCCESS)
13782  {
13783  goto exit_on_error;
13784  }
13785  leftsize = i;
13786 
13787  if (spage_delete (thread_p, P, 1) != 1)
13788  {
13789  goto exit_on_error;
13790  }
13791  }
13792 
13793  /* upper fence key for Q */
13794  if (flag_fence_insert == true)
13795  {
13796  assert (i > 0);
13797  if (spage_insert_at (thread_p, Q, i, &rec) != SP_SUCCESS)
13798  {
13799  goto exit_on_error;
13800  }
13801  leftsize = i;
13802  }
13803  else
13804  {
13805  i--;
13806  }
13807 
13808  /* Log page Q records for redo purposes */
13809  ret = btree_rv_util_save_page_records (thread_p, Q, 1, i, 1, recset_data, &recset_length);
13810  if (ret != NO_ERROR)
13811  {
13812  goto exit_on_error;
13813  }
13814  log_append_redo_data2 (thread_p, RVBT_INS_PGRECORDS, &btid->sys_btid->vfid, Q, -1, recset_length, recset_data);
13815 
13816  /****************************************************************************
13817  *** STEP 5: insert sep_key to P
13818  *** add redo log for page P
13819  ****************************************************************************/
13820 
13821  /* Log deletion of all page P records (except the header!!) for redo purposes */
13822  recset_header.rec_cnt = key_cnt;
13823  recset_header.first_slotid = 1;
13824  log_append_redo_data2 (thread_p, RVBT_DEL_PGRECORDS, &btid->sys_btid->vfid, P, -1, sizeof (RECSET_HEADER),
13825  &recset_header);
13826 
13827  /* update the root page P to keep the middle key and to point to page Q and R. Remember that this mid key will be on
13828  * a non leaf page regardless of whether we are splitting a leaf or non leaf page. */
13829  nleaf_rec.pnt = *Q_vpid;
13830  key_len = btree_get_disk_size_of_key (neg_inf_key);
13831  if (key_len < BTREE_MAX_KEYLEN_INPAGE)
13832  {
13833  key_type = BTREE_NORMAL_KEY;
13834  nleaf_rec.key_len = key_len;
13835  }
13836  else
13837  {
13838  key_type = BTREE_OVERFLOW_KEY;
13839  nleaf_rec.key_len = -1;
13840  }
13841 
13842  ret =
13843  btree_write_record (thread_p, btid, &nleaf_rec, neg_inf_key, BTREE_NON_LEAF_NODE, key_type, key_len, false, NULL,
13844  NULL, NULL, &rec);
13845  if (ret != NO_ERROR)
13846  {
13847  goto exit_on_error;
13848  }
13849 
13850  if (spage_insert_at (thread_p, P, 1, &rec) != SP_SUCCESS)
13851  {
13852  goto exit_on_error;
13853  }
13854 
13855  /* log the inserted record for undo/redo purposes, */
13856  btree_rv_write_log_record (recset_data, &recset_length, &rec, BTREE_NON_LEAF_NODE);
13857 
13858  log_addr_offset = 1;
13859  log_append_redo_data2 (thread_p, RVBT_NDRECORD_INS, &btid->sys_btid->vfid, P, log_addr_offset, recset_length,
13860  recset_data);
13861 
13862  nleaf_rec.pnt = *R_vpid;
13863  key_len = btree_get_disk_size_of_key (sep_key);
13864  if (key_len < BTREE_MAX_KEYLEN_INPAGE)
13865  {
13866  key_type = BTREE_NORMAL_KEY;
13867  nleaf_rec.key_len = key_len;
13868  }
13869  else
13870  {
13871  key_type = BTREE_OVERFLOW_KEY;
13872  nleaf_rec.key_len = -1;
13873  }
13874 
13875  ret =
13876  btree_write_record (thread_p, btid, &nleaf_rec, sep_key, BTREE_NON_LEAF_NODE, key_type, key_len, false, NULL, NULL,
13877  NULL, &rec);
13878  if (ret != NO_ERROR)
13879  {
13880  goto exit_on_error;
13881  }
13882 
13883  if (spage_insert_at (thread_p, P, 2, &rec) != SP_SUCCESS)
13884  {
13885  goto exit_on_error;
13886  }
13887 
13888  /* log the inserted record for undo/redo purposes, */
13889  btree_rv_write_log_record (recset_data, &recset_length, &rec, BTREE_NON_LEAF_NODE);
13890 
13891  log_addr_offset = 2;
13892  log_append_redo_data2 (thread_p, RVBT_NDRECORD_INS, &btid->sys_btid->vfid, P, log_addr_offset, recset_length,
13893  recset_data);
13894 
13895  /* find the child page to be followed */
13896 
13897  c = btree_compare_key (key, sep_key, btid->key_type, 1, 1, NULL);
13898  assert (c == DB_LT || c == DB_EQ || c == DB_GT);
13899 
13900  if (c == DB_UNK)
13901  {
13902  goto exit_on_error;
13903  }
13904  else if (c < 0)
13905  {
13906  /* set child page identifier */
13907  *child_vpid = *Q_vpid;
13908 
13909  }
13910  else
13911  {
13912  /* set child page identifier */
13913  *child_vpid = *R_vpid;
13914  }
13915 
13916  if (sep_key)
13917  {
13918  btree_clear_key_value (&clear_sep_key, sep_key);
13919  db_private_free_and_init (thread_p, sep_key);
13920  }
13921 
13922  pgbuf_set_dirty (thread_p, P, DONT_FREE);
13923  pgbuf_set_dirty (thread_p, Q, DONT_FREE);
13924  pgbuf_set_dirty (thread_p, R, DONT_FREE);
13925 
13927 
13928 #if !defined(NDEBUG)
13929  btree_verify_node (thread_p, btid, P);
13930  btree_verify_node (thread_p, btid, Q);
13931  btree_verify_node (thread_p, btid, R);
13932 #endif
13933 
13934  return ret;
13935 
13936 exit_on_error:
13937 
13938  if (sep_key)
13939  {
13940  btree_clear_key_value (&clear_sep_key, sep_key);
13941  db_private_free_and_init (thread_p, sep_key);
13942  }
13943 
13944  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
13945 }
13946 
13947 /*
13948  * btree_update () -
13949  * return: NO_ERROR
13950  * btid(in): B+tree index identifier
13951  * old_key(in): Old key value
13952  * new_key(in): New key value
13953  * locked_keys(in): keys already locked by the current transaction when search
13954  * cls_oid(in):
13955  * oid(in): Object identifier to be updated
13956  * op_type(in):
13957  * unique_stat_info(in):
13958  * unique(in):
13959  * p_mvcc_rec_header(in/out): array of MVCC_REC_HEADER of size 2 or NULL
13960  *
13961  * Note: Deletes the <old_key, oid> key-value pair from the B+tree
13962  * index and inserts the <new_key, oid> key-value pair to the
13963  * B+tree index which results in the update of the specified
13964  * index entry for the given object identifier.
13965  */
13966 int
13967 btree_update (THREAD_ENTRY * thread_p, BTID * btid, DB_VALUE * old_key, DB_VALUE * new_key, OID * cls_oid, OID * oid,
13968  int op_type, btree_unique_stats * unique_stat_info, int *unique, MVCC_REC_HEADER * p_mvcc_rec_header)
13969 {
13970  MVCC_REC_HEADER *p_local_rec_header = NULL;
13971  int ret = NO_ERROR;
13972 
13973  assert (old_key != NULL);
13974  assert (new_key != NULL);
13975  assert (unique != NULL);
13976 
13977 #if !defined (SERVER_MODE)
13978  assert_release (p_mvcc_rec_header == NULL);
13979 #endif /* SERVER_MODE */
13980 
13981  if (p_mvcc_rec_header != NULL)
13982  {
13983  /* in MVCC, logical deletion means DEL_ID insertion */
13984  /* Note that it is possible that update "in-place" is done instead of standard MVCC update, in which case the
13985  * "logical" deletion is no longer required. */
13986  ret =
13987  btree_mvcc_delete (thread_p, btid, old_key, cls_oid, oid, op_type, unique_stat_info, unique,
13988  &p_mvcc_rec_header[0]);
13989  if (ret != NO_ERROR)
13990  {
13991  ASSERT_ERROR ();
13992  goto exit_on_error;
13993  }
13994  }
13995  else
13996  {
13997  /* In-place update. Remove object physically. */
13998  ret = btree_physical_delete (thread_p, btid, old_key, oid, cls_oid, unique, op_type, unique_stat_info);
13999  if (ret != NO_ERROR)
14000  {
14001  /* if the btree we are updating is a btree for unique attributes it is possible that the btree update has
14002  * already been performed via the template unique checking. In this case, we will ignore the error from
14003  * btree_delete */
14004  if (*unique && er_errid () == ER_BTREE_UNKNOWN_KEY)
14005  {
14006  /* Is this still true? */
14007  goto end;
14008  }
14009  ASSERT_ERROR ();
14010  goto exit_on_error;
14011  }
14012  }
14013 
14014  if (p_mvcc_rec_header != NULL)
14015  {
14016  p_local_rec_header = &p_mvcc_rec_header[1];
14017  }
14018 
14019  ret = btree_insert (thread_p, btid, new_key, cls_oid, oid, op_type, unique_stat_info, unique, p_local_rec_header);
14020  if (ret != NO_ERROR)
14021  {
14022  ASSERT_ERROR ();
14023  goto exit_on_error;
14024  }
14025 
14026 end:
14027 
14029 
14030  return ret;
14031 
14032 exit_on_error:
14034 
14035  assert_release (ret != NO_ERROR);
14036  return ret;
14037 }
14038 
14039 /*
14040  * btree_reflect_global_unique_statistics () - reflects the global statistical information into btree header
14041  * return: NO_ERROR
14042  * unique_stat_info(in):
14043  * only_active_tran(in): if true then reflect statistics only if transaction is active
14044  *
14045  * Note: We don't need to log the changes at this point because the changes were
14046  * already logged at commit stage.
14047  */
14048 int
14050  bool only_active_tran)
14051 {
14052  VPID root_vpid;
14053  PAGE_PTR root = NULL;
14054  BTREE_ROOT_HEADER *root_header = NULL;
14055  int ret = NO_ERROR;
14056  LOG_LSA *page_lsa = NULL;
14057 
14058  /* check if unique_stat_info is NULL */
14059  if (unique_stat_info == NULL)
14060  {
14061  assert (false);
14062  return ER_FAILED;
14063  }
14064 
14065  /* fix the root page */
14066  root_vpid.pageid = unique_stat_info->btid.root_pageid;
14067  root_vpid.volid = unique_stat_info->btid.vfid.volid;
14068  root = pgbuf_fix (thread_p, &root_vpid, OLD_PAGE, PGBUF_LATCH_WRITE, PGBUF_UNCONDITIONAL_LATCH);
14069  if (root == NULL)
14070  {
14071  ASSERT_ERROR_AND_SET (ret);
14072  goto exit;
14073  }
14074 
14075  (void) pgbuf_check_page_ptype (thread_p, root, PAGE_BTREE);
14076 
14077  /* read the root information */
14078  root_header = btree_get_root_header (thread_p, root);
14079  if (root_header == NULL)
14080  {
14081  assert (false);
14082  ret = ER_FAILED;
14083  goto exit;
14084  }
14085 
14086  if (root_header->num_nulls != -1)
14087  {
14088  assert_release (BTREE_IS_UNIQUE (root_header->unique_pk));
14089 
14090  if (!only_active_tran || logtb_is_current_active (thread_p))
14091  {
14092  /* update header information */
14093  root_header->num_nulls = unique_stat_info->unique_stats.num_nulls;
14094  root_header->num_oids = unique_stat_info->unique_stats.num_oids;
14095  root_header->num_keys = unique_stat_info->unique_stats.num_keys;
14096 
14097  page_lsa = pgbuf_get_lsa (root);
14098  /* update the page's LSA to the last global unique statistics change that was made at commit, only if it is
14099  * newer than the last change recorded in the page's LSA. */
14100  if (LSA_LT (page_lsa, &unique_stat_info->last_log_lsa))
14101  {
14102  if (pgbuf_set_lsa (thread_p, root, &unique_stat_info->last_log_lsa) == NULL)
14103  {
14104  assert (false);
14105  ret = ER_FAILED;
14106  goto exit;
14107  }
14108  }
14109 
14110  /* set the root page as dirty page */
14111  pgbuf_set_dirty (thread_p, root, DONT_FREE);
14112 
14114  {
14116  "Reflect unique statistics to index (%d, %d|%d):"
14117  "nulls=%d, oids=%d, keys=%d. LSA=%lld|%d.\n", unique_stat_info->btid.root_pageid,
14118  unique_stat_info->btid.vfid.volid, unique_stat_info->btid.vfid.fileid,
14119  unique_stat_info->unique_stats.num_nulls, unique_stat_info->unique_stats.num_oids,
14120  unique_stat_info->unique_stats.num_keys,
14121  (long long int) unique_stat_info->last_log_lsa.pageid,
14122  (int) unique_stat_info->last_log_lsa.offset);
14123  }
14124  }
14125  }
14126 
14127 exit:
14128 
14129  if (root != NULL)
14130  {
14131  pgbuf_unfix_and_init (thread_p, root);
14132  }
14133 
14134  return ret;
14135 }
14136 
14137 /*
14138  * btree_locate_key () - Locate leaf node in b-tree for the given key.
14139  * return: error code.
14140  * btid_int (in) : B+tree index info.
14141  * key (in) : Key to locate
14142  * pg_vpid (out) : Outputs Leaf node page VPID.
14143  * slot_id (out) : Outputs slot ID of key if found, or slot ID of key if it was to be inserted.
14144  * leaf_page_out(out): Page pointer
14145  * found_p (out) : Outputs true if key was found and false otherwise.
14146  *
14147  * Note: Search the B+tree index to locate the page and record that contains
14148  * the key, or would contain the key if the key was to be located.
14149  */
14150 int
14151 btree_locate_key (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key, VPID * pg_vpid, INT16 * slot_id,
14152  PAGE_PTR * leaf_page_out, bool * found_p)
14153 {
14154  PAGE_PTR leaf_page = NULL; /* Leaf node page pointer. */
14155  int error = NO_ERROR;
14156 
14157  /* Search key result. */
14159 
14160  /* Assert expected arguments. */
14161  assert (btid_int != NULL);
14162  assert (btid_int->sys_btid != NULL);
14163  assert (found_p != NULL);
14164  assert (slot_id != NULL);
14165  assert (key != NULL && !DB_IS_NULL (key) && !btree_multicol_key_is_null (key));
14166  assert (!BTREE_INVALID_INDEX_ID (btid_int->sys_btid));
14167 
14168  *found_p = false;
14169  bool reuse_btid_int = true;
14170 
14171  /* Advance in b-tree following key until leaf node is reached. */
14172  error = btree_search_key_and_apply_functions (thread_p, btid_int->sys_btid, btid_int, key, NULL, &reuse_btid_int,
14173  btree_advance_and_find_key, slot_id, NULL, NULL, &search_key,
14174  &leaf_page);
14175  if (error != NO_ERROR)
14176  {
14177  ASSERT_ERROR ();
14178  assert (leaf_page == NULL);
14179  *leaf_page_out = NULL;
14180  return error;
14181  }
14182  assert (leaf_page != NULL);
14183 
14184  /* Output found and slot ID. */
14185  *found_p = (search_key.result == BTREE_KEY_FOUND);
14186  *slot_id = search_key.slotid;
14187  if (pg_vpid != NULL)
14188  {
14189  /* Output leaf node page VPID. */
14190  pgbuf_get_vpid (leaf_page, pg_vpid);
14191  }
14192  /* Assign leaf node page pointer. */
14193  *leaf_page_out = leaf_page;
14194 
14195  return error;
14196 }
14197 
14198 /*
14199  * btree_find_lower_bound_leaf () -
14200  * return: NO_ERROR
14201  * BTS(in):
14202  * stat_info(in):
14203  *
14204  * Note: Find the first/last leaf page of the B+tree index.
14205  */
14206 static int
14208 {
14209  int key_cnt;
14210  int ret = NO_ERROR;
14211  BTREE_NODE_HEADER *header = NULL;
14212  BTREE_NODE_TYPE node_type;
14213  RECDES rec;
14214 
14215  if (bts->use_desc_index)
14216  {
14217  assert_release (stat_info_p == NULL);
14218  bts->C_page = btree_find_rightmost_leaf (thread_p, bts->btid_int.sys_btid, &bts->C_vpid, stat_info_p);
14219  }
14220  else
14221  {
14222  bts->C_page = btree_find_leftmost_leaf (thread_p, bts->btid_int.sys_btid, &bts->C_vpid, stat_info_p);
14223  }
14224 
14225  if (bts->C_page == NULL)
14226  {
14227  goto exit_on_error;
14228  }
14229 
14230  /* get header information (key_cnt) */
14231  key_cnt = btree_node_number_of_keys (thread_p, bts->C_page);
14232 
14233  header = btree_get_node_header (thread_p, bts->C_page);
14234  if (header == NULL)
14235  {
14236  goto exit_on_error;
14237  }
14238 
14239  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
14240 
14241  if (node_type != BTREE_LEAF_NODE)
14242  {
14243  assert_release (false);
14244  goto exit_on_error;
14245  }
14246 
14247  /* set slot id and OID position */
14248  if (bts->use_desc_index)
14249  {
14250  bts->slot_id = key_cnt;
14251  }
14252  else
14253  {
14254  bts->slot_id = 1;
14255  }
14256 
14257  if (key_cnt == 0)
14258  {
14259  /* tree is empty; need to unfix current leaf page */
14260  ret = btree_find_next_index_record (thread_p, bts);
14261  if (ret != NO_ERROR)
14262  {
14263  goto exit_on_error;
14264  }
14265 
14267  }
14268  else
14269  {
14270  /* Key may be fence and fences must be filtered out. */
14271  if (spage_get_record (thread_p, bts->C_page, bts->slot_id, &rec, PEEK) != S_SUCCESS)
14272  {
14273  assert (false);
14274  goto exit_on_error;
14275  }
14276  assert (rec.length % 4 == 0);
14277 
14279  {
14280  /* Filter out fence key. */
14281  ret = btree_find_next_index_record (thread_p, bts);
14282  if (ret != NO_ERROR)
14283  {
14284  return ret;
14285  }
14286  }
14287  else
14288  {
14289  bts->oid_pos = 0;
14290  assert_release (bts->slot_id <= key_cnt);
14291  }
14292  }
14293 
14294  return ret;
14295 
14296 exit_on_error:
14297 
14298  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
14299 }
14300 
14301 /*
14302  * btree_find_leftmost_leaf () -
14303  * return: page pointer
14304  * btid(in):
14305  * pg_vpid(in):
14306  * stat_info_p(in):
14307  *
14308  * Note: Find the page identifier for the first leaf page of the B+tree index.
14309  */
14310 static PAGE_PTR
14311 btree_find_leftmost_leaf (THREAD_ENTRY * thread_p, BTID * btid, VPID * pg_vpid, BTREE_STATS * stat_info)
14312 {
14313  return btree_find_boundary_leaf (thread_p, btid, pg_vpid, stat_info, BTREE_BOUNDARY_FIRST);
14314 }
14315 
14316 /*
14317  * btree_find_rightmost_leaf () -
14318  * return: page pointer
14319  * btid(in):
14320  * pg_vpid(in):
14321  * stat_info(in):
14322  *
14323  * Note: Find the page identifier for the last leaf page of the B+tree index.
14324  */
14325 static PAGE_PTR
14326 btree_find_rightmost_leaf (THREAD_ENTRY * thread_p, BTID * btid, VPID * pg_vpid, BTREE_STATS * stat_info)
14327 {
14328  return btree_find_boundary_leaf (thread_p, btid, pg_vpid, stat_info, BTREE_BOUNDARY_LAST);
14329 }
14330 
14331 /*
14332  * btree_find_boundary_leaf () -
14333  * return: page pointer
14334  * btid(in):
14335  * pg_vpid(in):
14336  * stat_info(in):
14337  *
14338  * Note: Find the page identifier for the first/last leaf page of the B+tree index.
14339  */
14340 static PAGE_PTR
14341 btree_find_boundary_leaf (THREAD_ENTRY * thread_p, BTID * btid, VPID * pg_vpid, BTREE_STATS * stat_info,
14342  BTREE_BOUNDARY where)
14343 {
14344  PAGE_PTR P_page = NULL, C_page = NULL;
14345  VPID P_vpid, C_vpid;
14346  BTREE_ROOT_HEADER *root_header = NULL;
14347  BTREE_NODE_HEADER *header = NULL;
14348  BTREE_NODE_TYPE node_type;
14349  NON_LEAF_REC nleaf;
14350  RECDES rec;
14351  int key_cnt = 0, index = 0;
14352  int root_level = 0, depth = 0;
14353 
14354  VPID_SET_NULL (pg_vpid);
14355 
14356  /* read the root page */
14357  P_vpid.volid = btid->vfid.volid;
14358  P_vpid.pageid = btid->root_pageid;
14359  P_page = pgbuf_fix (thread_p, &P_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
14360  if (P_page == NULL)
14361  {
14362  ASSERT_ERROR ();
14363  goto error;
14364  }
14365 
14366  (void) pgbuf_check_page_ptype (thread_p, P_page, PAGE_BTREE);
14367 
14368  root_header = btree_get_root_header (thread_p, P_page);
14369  if (root_header == NULL)
14370  {
14371  goto error;
14372  }
14373 
14374  root_level = root_header->node.node_level;
14375  node_type = (root_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
14376 
14377  while (node_type == BTREE_NON_LEAF_NODE)
14378  {
14379  key_cnt = btree_node_number_of_keys (thread_p, P_page);
14380  if (key_cnt <= 0)
14381  { /* node record underflow */
14382  er_log_debug (ARG_FILE_LINE, "btree_find_boundary_leaf: node key count underflow: %d.Operation Ignored.",
14383  key_cnt);
14384  goto error;
14385  }
14386 
14387  assert (where == BTREE_BOUNDARY_FIRST || where == BTREE_BOUNDARY_LAST);
14388  if (where == BTREE_BOUNDARY_FIRST)
14389  {
14390  index = 1;
14391  }
14392  else
14393  {
14394  index = key_cnt;
14395  }
14396 
14397  depth++;
14398 
14399  /* get the child page to flow */
14400  assert (index > 0);
14401  if (spage_get_record (thread_p, P_page, index, &rec, PEEK) != S_SUCCESS)
14402  {
14403  goto error;
14404  }
14405 
14407  C_vpid = nleaf.pnt;
14408  C_page = pgbuf_fix (thread_p, &C_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
14409  if (C_page == NULL)
14410  {
14411  ASSERT_ERROR ();
14412  goto error;
14413  }
14414 
14415  (void) pgbuf_check_page_ptype (thread_p, C_page, PAGE_BTREE);
14416 
14417  pgbuf_unfix_and_init (thread_p, P_page);
14418 
14419  key_cnt = btree_node_number_of_keys (thread_p, C_page);
14420 
14421  header = btree_get_node_header (thread_p, C_page);
14422  if (header == NULL)
14423  {
14424  goto error;
14425  }
14426 
14427  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
14428 
14429  P_page = C_page;
14430  C_page = NULL;
14431  P_vpid = C_vpid;
14432  }
14433 
14434  if (key_cnt != 0)
14435  {
14436  goto end; /* OK */
14437  }
14438 
14439 again:
14440 
14441  header = btree_get_node_header (thread_p, P_page);
14442  if (header == NULL)
14443  {
14444  goto error;
14445  }
14446 
14447  /* fix the next leaf page and set slot_id and oid_pos if it exists. */
14448  assert (where == BTREE_BOUNDARY_FIRST || where == BTREE_BOUNDARY_LAST);
14449  if (where == BTREE_BOUNDARY_FIRST)
14450  {
14451  C_vpid = header->next_vpid; /* move backward */
14452  }
14453  else
14454  {
14455  C_vpid = header->prev_vpid; /* move foward */
14456  }
14457 
14458  if (!VPID_ISNULL (&C_vpid))
14459  {
14460  C_page = pgbuf_fix (thread_p, &C_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
14461  if (C_page == NULL)
14462  {
14463  ASSERT_ERROR ();
14464  goto error;
14465  }
14466 
14467  (void) pgbuf_check_page_ptype (thread_p, C_page, PAGE_BTREE);
14468 
14469  /* unfix the previous leaf page if it is fixed. */
14470  if (P_page != NULL)
14471  {
14472  pgbuf_unfix_and_init (thread_p, P_page);
14473  /* do not clear bts->P_vpid for UNCONDITIONAL lock request handling */
14474  }
14475  }
14476 
14477  /* check if the current leaf page has valid slots */
14478  if (C_page != NULL)
14479  {
14480  key_cnt = btree_node_number_of_keys (thread_p, C_page);
14481 
14482  if (key_cnt <= 0)
14483  { /* empty page */
14484  P_page = C_page;
14485  C_page = NULL;
14486  goto again;
14487  }
14488 
14489  P_vpid = C_vpid;
14490  P_page = C_page;
14491  }
14492 
14493  /* NOTE that we do NOT release the page latch on P here */
14494 end:
14495 
14496  *pg_vpid = P_vpid;
14497 
14498  assert_release (root_level == depth + 1);
14499 
14500  if (stat_info)
14501  {
14502  stat_info->height = root_level;
14503  }
14504 
14505  return P_page;
14506 
14507 error:
14508 
14509  if (P_page)
14510  {
14511  pgbuf_unfix_and_init (thread_p, P_page);
14512  }
14513  if (C_page)
14514  {
14515  pgbuf_unfix_and_init (thread_p, C_page);
14516  }
14517 
14518  return NULL;
14519 }
14520 
14521 /*
14522  * btree_find_AR_sampling_leaf () -
14523  * return: page pointer
14524  * btid(in):
14525  * pg_vpid(in):
14526  * stat_info_p(in):
14527  * found_p(out):
14528  *
14529  * Note: Find the page identifier via the Acceptance/Rejection Sampling leaf page of the B+tree index.
14530  * Note: Random Sampling from Databases (Chapter 3. Random Sampling from B+ Trees)
14531  */
14532 static PAGE_PTR
14533 btree_find_AR_sampling_leaf (THREAD_ENTRY * thread_p, BTID * btid, VPID * pg_vpid, BTREE_STATS * stat_info_p,
14534  bool * found_p)
14535 {
14536  PAGE_PTR P_page = NULL, C_page = NULL;
14537  VPID P_vpid, C_vpid;
14538  int slot_id;
14539  BTREE_ROOT_HEADER *root_header = NULL;
14540  BTREE_NODE_HEADER *header = NULL;
14541  BTREE_NODE_TYPE node_type;
14542  NON_LEAF_REC nleaf;
14543  RECDES rec;
14544  int est_page_size, free_space;
14545  int key_cnt = 0;
14546  int root_level = 0, depth = 0;
14547  double prob = 1.0; /* Acceptance probability */
14548 
14549  assert (stat_info_p != NULL);
14550  assert (found_p != NULL);
14551 
14552  *found_p = false; /* init */
14553 
14554  VPID_SET_NULL (pg_vpid);
14555 
14556  /* read the root page */
14557  P_vpid.volid = btid->vfid.volid;
14558  P_vpid.pageid = btid->root_pageid;
14559  P_page = pgbuf_fix (thread_p, &P_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
14560  if (P_page == NULL)
14561  {
14562  goto error;
14563  }
14564 
14565  (void) pgbuf_check_page_ptype (thread_p, P_page, PAGE_BTREE);
14566 
14567  key_cnt = btree_node_number_of_keys (thread_p, P_page);
14568 
14569  root_header = btree_get_root_header (thread_p, P_page);
14570  if (root_header == NULL)
14571  {
14572  goto error;
14573  }
14574 
14575  root_level = root_header->node.node_level;
14576  node_type = (root_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
14577 
14578  est_page_size = (int) (DB_PAGESIZE - (spage_header_size () + sizeof (BTREE_NODE_HEADER) + spage_slot_size ()));
14579  assert (est_page_size > 0);
14580 
14581  while (node_type == BTREE_NON_LEAF_NODE)
14582  {
14583  depth++;
14584 
14585  /* get the randomized child page to follow */
14586 
14587  if (key_cnt <= 0)
14588  { /* node record underflow */
14590  "btree_find_AR_sampling_leaf: node key count underflow: %d. Operation Ignored.", key_cnt);
14591  goto error;
14592  }
14593 
14594  slot_id = (int) (drand48 () * key_cnt);
14595  slot_id = MAX (slot_id, 1);
14596 
14597  assert (slot_id > 0);
14598  if (spage_get_record (thread_p, P_page, slot_id, &rec, PEEK) != S_SUCCESS)
14599  {
14600  goto error;
14601  }
14602 
14604  C_vpid = nleaf.pnt;
14605  C_page = pgbuf_fix (thread_p, &C_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
14606  if (C_page == NULL)
14607  {
14608  goto error;
14609  }
14610 
14611  (void) pgbuf_check_page_ptype (thread_p, C_page, PAGE_BTREE);
14612 
14613  pgbuf_unfix_and_init (thread_p, P_page);
14614 
14615  key_cnt = btree_node_number_of_keys (thread_p, C_page);
14616 
14617  header = btree_get_node_header (thread_p, C_page);
14618  if (header == NULL)
14619  {
14620  goto error;
14621  }
14622 
14623  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
14624 
14625  /* update Acceptance probability */
14626 
14627  free_space = spage_max_space_for_new_record (thread_p, C_page);
14628  assert (est_page_size > free_space);
14629 
14630  prob *= (((double) est_page_size) - free_space) / ((double) est_page_size);
14631 
14632  P_page = C_page;
14633  C_page = NULL;
14634  P_vpid = C_vpid;
14635  }
14636 
14637  if (key_cnt != 0)
14638  {
14639  goto end; /* OK */
14640  }
14641 
14642 again:
14643 
14644  header = btree_get_node_header (thread_p, P_page);
14645  if (header == NULL)
14646  {
14647  goto error;
14648  }
14649 
14650  /* fix the next leaf page and set slot_id and oid_pos if it exists. */
14651  C_vpid = header->next_vpid;
14652  if (!VPID_ISNULL (&C_vpid))
14653  {
14654  C_page = pgbuf_fix (thread_p, &C_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
14655  if (C_page == NULL)
14656  {
14657  goto error;
14658  }
14659 
14660  (void) pgbuf_check_page_ptype (thread_p, C_page, PAGE_BTREE);
14661 
14662  /* unfix the previous leaf page if it is fixed. */
14663  if (P_page != NULL)
14664  {
14665  pgbuf_unfix_and_init (thread_p, P_page);
14666  /* do not clear bts->P_vpid for UNCONDITIONAL lock request handling */
14667  }
14668  }
14669 
14670  /* check if the current leaf page has valid slots */
14671  if (C_page != NULL)
14672  {
14673  key_cnt = btree_node_number_of_keys (thread_p, C_page);
14674 
14675  if (key_cnt <= 0)
14676  { /* empty page */
14677  P_page = C_page;
14678  C_page = NULL;
14679  goto again;
14680  }
14681  P_vpid = C_vpid;
14682  P_page = C_page;
14683  }
14684 
14685  /* NOTE that we do NOT release the page latch on P here */
14686 end:
14687 
14688  *pg_vpid = P_vpid;
14689 
14690  assert_release (root_level == depth + 1);
14691 
14692  stat_info_p->height = root_level;
14693 
14694  /* do Acceptance/Rejection sampling */
14695  if (drand48 () < prob)
14696  {
14697  /* Acceptance */
14698  *found_p = true;
14699  }
14700  else
14701  {
14702  /* Rejection */
14703  assert (*found_p == false);
14704  }
14705 
14706  return P_page;
14707 
14708 error:
14709 
14710  if (P_page)
14711  {
14712  pgbuf_unfix_and_init (thread_p, P_page);
14713  }
14714  if (C_page)
14715  {
14716  pgbuf_unfix_and_init (thread_p, C_page);
14717  }
14718 
14719  return NULL;
14720 }
14721 
14722 /*
14723  * btree_keyval_search () -
14724  * return: the number of object identifiers in the set pointed
14725  * at by oids_ptr, or -1 if an error occurs. Since there can be
14726  * many object identifiers for the given key, to avoid main
14727  * memory limitations, the set of object identifiers are returned
14728  * iteratively. At each call, the btree_scan is modified, to
14729  * remember the old search position.
14730  * btid: B+tree index identifier
14731  * scan_op_type(in):
14732  * bts(in/out): Btree range search scan structure
14733  * key(in): Key to be searched for its object identifier set
14734  * class_oid(in):
14735  * oids_ptr(in): Points to the already allocated storage area to store oids
14736  * oids_size(in): Size of allocated area for oid set storage
14737  * filter(in):
14738  * isidp(in):
14739  * is_all_class_srch(in):
14740  *
14741  * Note: Finds the set of object identifiers for the given key. if the key is not found, 0 count is returned.
14742  * Otherwise, the area pointed at by oids_ptr is filled with one group of object identifiers.
14743  *
14744  * Note: the btree_scan structure must first be initialized by using the macro BTREE_INIT_SCAN()
14745  *
14746  * Note: After the first iteration, caller can use BTREE_END_OF_SCAN() macro to understand the end of range.
14747  *
14748  * NOTE: Instead of range scan, this can be replaced with a different function to go to key directly.
14749  */
14750 int
14751 btree_keyval_search (THREAD_ENTRY * thread_p, BTID * btid, SCAN_OPERATION_TYPE scan_op_type, BTREE_SCAN * bts,
14752  key_val_range * kv_range, OID * class_oid, FILTER_INFO * filter, INDX_SCAN_ID * isidp,
14753  bool is_all_class_srch)
14754 {
14755  /* this is just a GE_LE range search with the same key */
14756  int rc;
14757 
14758  /* Assert expected arguments. */
14759  assert (btid != NULL);
14760  assert (bts != NULL);
14761  assert (isidp != NULL);
14762  assert (isidp->need_count_only == false);
14763  assert (kv_range != NULL);
14764  /* If a class must be matched, class_oid argument must be a valid OID. */
14765  assert (is_all_class_srch || (class_oid != NULL && !OID_ISNULL (class_oid)));
14766 
14767  /* Execute range scan */
14768  rc =
14769  btree_prepare_bts (thread_p, bts, btid, isidp, kv_range, filter, is_all_class_srch ? class_oid : NULL, NULL,
14770  NULL, false, NULL);
14771  if (rc != NO_ERROR)
14772  {
14773  ASSERT_ERROR ();
14774  return rc;
14775  }
14777  if (rc != NO_ERROR)
14778  {
14779  ASSERT_ERROR ();
14780  return rc;
14781  }
14782  assert (bts->n_oids_read_last_iteration >= 0);
14783 
14784  return bts->n_oids_read_last_iteration;
14785 }
14786 
14787 /*
14788  * btree_coerce_key () -
14789  * return: NO_ERROR or error code
14790  * src_keyp(in/out):
14791  * keysize(in): term# associated with index key range
14792  * btree_domainp(in): B+tree index domain
14793  * key_minmax(in): MIN_VALUE or MAX_VALUE
14794  *
14795  * Note:
14796  */
14797 int
14798 btree_coerce_key (DB_VALUE * keyp, int keysize, TP_DOMAIN * btree_domainp, int key_minmax)
14799 {
14800  DB_TYPE stype, dtype;
14801  int ssize, dsize;
14802  TP_DOMAIN *dp;
14803  DB_MIDXKEY *midxkey;
14804  TP_DOMAIN *partial_dom;
14805  int minmax;
14806  int err = NO_ERROR;
14807  bool part_key_desc = false;
14808 
14809  /* assuming all parameters are not NULL pointer, and 'src_key' is not NULL value */
14810  stype = DB_VALUE_TYPE (keyp);
14811  dtype = TP_DOMAIN_TYPE (btree_domainp);
14812 
14813  if (stype == DB_TYPE_MIDXKEY && dtype == DB_TYPE_MIDXKEY)
14814  {
14815  /* if multi-column index */
14816  /* The type of B+tree key domain can be DB_TYPE_MIDXKEY only in the case of multi-column index. And, if it is,
14817  * query optimizer makes the search key('src_key') as sequence type even if partial key was specified. One more
14818  * assumption is that query optimizer make the search key(either complete or partial) in the same order (of
14819  * sequence) of B+tree key domain. */
14820 
14821  /* get number of elements of sequence type of the 'src_key' */
14822  midxkey = db_get_midxkey (keyp);
14823  ssize = midxkey->ncolumns;
14824 
14825  /* count number of elements of sequence type of the B+tree key domain */
14826  for (dp = btree_domainp->setdomain, dsize = 0; dp; dp = dp->next, dsize++)
14827  {
14828  ;
14829  }
14830 
14831  if (ssize < 0 || ssize > dsize || dsize == 0 || ssize > keysize)
14832  {
14833  /* something wrong with making search key in query optimizer */
14834  err = ER_FAILED; /* error */
14835  }
14836  else if (ssize == dsize)
14837  {
14838  if (midxkey->domain == NULL) /* checkdb */
14839  {
14840  midxkey->domain = btree_domainp;
14841  }
14842 
14843  return NO_ERROR;
14844  }
14845  else
14846  {
14847  /* do coercing, append min or max value of the coressponding domain type to the partial search key value */
14848  DB_VALUE *dbvals = NULL;
14849  int num_dbvals;
14850 
14851  num_dbvals = dsize - ssize;
14852  dbvals = (DB_VALUE *) db_private_alloc (NULL, num_dbvals * sizeof (DB_VALUE));
14853  if (dbvals == NULL)
14854  {
14855  return ER_OUT_OF_VIRTUAL_MEMORY;
14856  }
14857 
14858  /* get the last domain element of partial-key */
14859  for (dp = btree_domainp->setdomain, dsize = 1; dsize < keysize && dp; dsize++, dp = dp->next)
14860  {
14861  ; /* nop */
14862  }
14863 
14864  if (dsize < keysize || dp == NULL)
14865  {
14866  db_private_free_and_init (NULL, dbvals);
14867  return ER_FAILED;
14868  }
14869 
14870  part_key_desc = dp->is_desc;
14871 
14872  for (dp = btree_domainp->setdomain, dsize = 0; dp && dsize < ssize; dp = dp->next, dsize++)
14873  {
14874  ;
14875  }
14876 
14877  if (midxkey->min_max_val.position == -1)
14878  {
14879  /* If min_max_val was not set, set it here. */
14880  minmax = key_minmax; /* init */
14881  if (minmax == BTREE_COERCE_KEY_WITH_MIN_VALUE)
14882  {
14883  if (!part_key_desc)
14884  { /* CASE 1, 2 */
14885  if (dp->is_desc != true)
14886  { /* CASE 1 */
14888  }
14889  else
14890  { /* CASE 2 */
14892  }
14893  }
14894  else
14895  { /* CASE 3, 4 */
14896  if (dp->is_desc != true)
14897  { /* CASE 3 */
14899  }
14900  else
14901  { /* CASE 4 */
14903  }
14904  }
14905  }
14906  else if (minmax == BTREE_COERCE_KEY_WITH_MAX_VALUE)
14907  {
14908  if (!part_key_desc)
14909  { /* CASE 1, 2 */
14910  if (dp->is_desc != true)
14911  { /* CASE 1 */
14913  }
14914  else
14915  { /* CASE 2 */
14917  }
14918  }
14919  else
14920  { /* CASE 3, 4 */
14921  if (dp->is_desc != true)
14922  { /* CASE 3 */
14924  }
14925  else
14926  { /* CASE 4 */
14928  }
14929  }
14930  }
14931 
14932  if (minmax == BTREE_COERCE_KEY_WITH_MIN_VALUE)
14933  {
14934  midxkey->min_max_val.position = dsize;
14935  midxkey->min_max_val.type = MIN_COLUMN;
14936  }
14937  else if (minmax == BTREE_COERCE_KEY_WITH_MAX_VALUE)
14938  {
14939  midxkey->min_max_val.position = dsize;
14940  midxkey->min_max_val.type = MAX_COLUMN;
14941  }
14942  else
14943  {
14944  err = ER_FAILED;
14945  }
14946  }
14947 
14948  num_dbvals = 0;
14949  partial_dom = dp;
14950  for (err = NO_ERROR; dp && err == NO_ERROR; dp = dp->next, dsize++)
14951  {
14952  db_make_null (&dbvals[num_dbvals]);
14953  num_dbvals++;
14954  }
14955 
14956  if (err == NO_ERROR)
14957  {
14958  err = pr_midxkey_add_elements (keyp, dbvals, num_dbvals, partial_dom);
14959  }
14960 
14961  db_private_free_and_init (NULL, dbvals);
14962  }
14963  }
14964  else if (
14965  /* check if they are string or bit type */
14966  /* compatible if two types are same (except for sequence type) */
14967  (stype == dtype)
14968  /* CHAR type and VARCHAR type are compatible with each other */
14969  || ((stype == DB_TYPE_CHAR || stype == DB_TYPE_VARCHAR)
14970  && (dtype == DB_TYPE_CHAR || dtype == DB_TYPE_VARCHAR))
14971  /* NCHAR type and VARNCHAR type are compatible with each other */
14972  || ((stype == DB_TYPE_NCHAR || stype == DB_TYPE_VARNCHAR)
14973  && (dtype == DB_TYPE_NCHAR || dtype == DB_TYPE_VARNCHAR))
14974  /* BIT type and VARBIT type are compatible with each other */
14975  || ((stype == DB_TYPE_BIT || stype == DB_TYPE_VARBIT) && (dtype == DB_TYPE_BIT || dtype == DB_TYPE_VARBIT))
14976  /* OID type and OBJECT type are compatible with each other */
14977  /* Keys can come in with a type of DB_TYPE_OID, but the B+tree domain itself will always be a
14978  * DB_TYPE_OBJECT. The comparison routines can handle OID and OBJECT as compatible type with each other . */
14979  || (stype == DB_TYPE_OID || stype == DB_TYPE_OBJECT))
14980  {
14981  err = NO_ERROR;
14982  }
14983  else
14984  {
14985  DB_VALUE temp_val;
14986 
14987  db_make_null (&temp_val);
14988 
14989  if (tp_more_general_type (dtype, stype) > 0)
14990  {
14991  /* the other case, do real coercing using 'tp_value_coerce()' */
14992  if (tp_value_coerce (keyp, &temp_val, btree_domainp) == DOMAIN_COMPATIBLE)
14993  {
14994  pr_clear_value (keyp);
14995  pr_clone_value (&temp_val, keyp);
14996  }
14997 
14998  pr_clear_value (&temp_val);
14999  }
15000  else if (TP_IS_NUMERIC_TYPE (dtype) || TP_IS_DATE_OR_TIME_TYPE (dtype))
15001  {
15002  /* try to strict cast keyp to dtype */
15003  err = tp_value_coerce_strict (keyp, &temp_val, btree_domainp);
15004  if (err == NO_ERROR)
15005  {
15006  pr_clear_value (keyp);
15007  pr_clone_value (&temp_val, keyp);
15008  }
15009  else
15010  {
15011  /* unsuccessful, */
15012  err = NO_ERROR;
15013  }
15014 
15015  pr_clear_value (&temp_val);
15016  }
15017  else
15018  {
15019  err = NO_ERROR;
15020  }
15021  }
15022 
15023  if (err != NO_ERROR)
15024  {
15026  }
15027 
15028  /* return result */
15029  return err;
15030 }
15031 
15032 /*
15033  * btree_prepare_bts () - Prepare b-tree scan structure before starting index scan.
15034  *
15035  * return : Error code.
15036  * thread_p (in) : Thread entry.
15037  * bts (in) : B-tree scan structure.
15038  * btid (in) : B-tree identifier.
15039  * index_scan_id_p (in) : Index scan info.
15040  * key_val_range (in) : Range of scan.
15041  * filter (in) : Key filter.
15042  * match_class_oid (in) : Non-NULL value if class must be matched (unique indexes).
15043  * key_limit_upper (in) : Pointer to upper key limit. NULL if there is no upper key limit.
15044  * key_limit_lower (in) : Pointer to lower key limit. NULL if there is no lower key limit.
15045  * need_to_check_null (in) : True if midxkey NULL needs to be checked.
15046  * bts_other (in/out) : Sets the argument specific to one type of range search.
15047  */
15048 int
15049 btree_prepare_bts (THREAD_ENTRY * thread_p, BTREE_SCAN * bts, BTID * btid, INDX_SCAN_ID * index_scan_id_p,
15050  key_val_range * kv_range, FILTER_INFO * filter, const OID * match_class_oid,
15051  DB_BIGINT * key_limit_upper, DB_BIGINT * key_limit_lower, bool need_to_check_null, void *bts_other)
15052 {
15053  key_val_range inf_key_val_range;
15054  PAGE_PTR root_page = NULL;
15055  VPID root_vpid;
15056  int error_code = NO_ERROR;
15057  DB_MIDXKEY *midxkey = NULL;
15058  DB_VALUE *swap_key = NULL;
15059  int i = 0;
15060  static bool oracle_style_empty_string = prm_get_bool_value (PRM_ID_ORACLE_STYLE_EMPTY_STRING);
15061 
15062  /* Assert expected arguments. */
15063  assert (bts != NULL);
15064  /* If b-tree info is valid, then topclass_oid must not be NULL. */
15066 
15067  if (bts->is_scan_started)
15068  {
15069  /* B-tree scan must have been initialized already. */
15070  return NO_ERROR;
15071  }
15072 
15073  assert (VPID_ISNULL (&bts->C_vpid));
15074 
15075  if (kv_range == NULL)
15076  {
15077  /* NULL kv_range argument means a full range scan */
15078  db_make_null (&inf_key_val_range.key1);
15079  db_make_null (&inf_key_val_range.key2);
15080  inf_key_val_range.range = INF_INF;
15081  inf_key_val_range.num_index_term = 0;
15082  inf_key_val_range.is_truncated = false;
15083 
15084  kv_range = &inf_key_val_range;
15085  }
15086 
15087  if (!bts->is_btid_int_valid)
15088  {
15089  root_vpid.pageid = btid->root_pageid;
15090  root_vpid.volid = btid->vfid.volid;
15091  root_page = btree_fix_root_with_info (thread_p, btid, PGBUF_LATCH_READ, NULL, NULL, &bts->btid_int);
15092  if (root_page == NULL)
15093  {
15094  ASSERT_ERROR_AND_SET (error_code);
15095  return error_code;
15096  }
15097  /* B-tree info successfully obtained. */
15098 
15099  if (index_scan_id_p != NULL && index_scan_id_p->check_not_vacuumed)
15100  {
15101  /* Not vacuumed check can work properly only after creator MVCCID was vacuumed. Otherwise, checker may find
15102  * older MVCCID's that have been logged with creator MVCCID and it will complain (even crash in debug mode). */
15103  MVCCID creator_mvccid = btree_get_creator_mvccid (thread_p, root_page);
15104  if (MVCCID_IS_VALID (creator_mvccid) && !vacuum_is_mvccid_vacuumed (creator_mvccid))
15105  {
15106  /* Do not allow check for not vacuumed records. */
15107  index_scan_id_p->check_not_vacuumed = false;
15108  }
15109  }
15110 
15111  /* Root page is no longer needed. */
15112  pgbuf_unfix_and_init (thread_p, root_page);
15113 
15114  /* TODO: Why is the below code here? What does constructing btid have to do with the issue described below?
15115  * Shouldn't this be always verified? It doesn't look like belonging here. */
15116  /*
15117  * The asc/desc properties in midxkey from log_applier may be
15118  * inaccurate. therefore, we should use btree header's domain while
15119  * processing btree search request from log_applier.
15120  */
15121  if (DB_VALUE_TYPE (&kv_range->key1) == DB_TYPE_MIDXKEY)
15122  {
15123  midxkey = db_get_midxkey (&kv_range->key1);
15124  if (midxkey->domain == NULL || LOG_CHECK_LOG_APPLIER (thread_p))
15125  {
15126  /*
15127  * The asc/desc properties in midxkey from log_applier may be
15128  * inaccurate. therefore, we should use btree header's domain
15129  * while processing btree search request from log_applier.
15130  */
15131  if (midxkey->domain)
15132  {
15133  tp_domain_free (midxkey->domain);
15134  }
15135  midxkey->domain = bts->btid_int.key_type;
15136  }
15137  }
15138  if (DB_VALUE_TYPE (&kv_range->key2) == DB_TYPE_MIDXKEY)
15139  {
15140  midxkey = db_get_midxkey (&kv_range->key2);
15141  if (midxkey->domain == NULL || LOG_CHECK_LOG_APPLIER (thread_p))
15142  {
15143  if (midxkey->domain)
15144  {
15145  tp_domain_free (midxkey->domain);
15146  }
15147  midxkey->domain = bts->btid_int.key_type;
15148  }
15149  }
15150 
15151  /* TODO: What does this assert mean? */
15152  /* is from keyval_search; checkdb or find_unique */
15153  assert_release (kv_range->num_index_term == 0);
15154 
15155  /* B-tree scan btid_int is now valid. */
15156  bts->is_btid_int_valid = true;
15157  }
15158 
15159  if (index_scan_id_p)
15160  {
15161  bts->index_scan_idp = index_scan_id_p;
15162  if (index_scan_id_p->indx_info != NULL)
15163  {
15164  bts->use_desc_index = index_scan_id_p->indx_info->use_desc_index != 0;
15165  }
15166  else
15167  {
15168  bts->use_desc_index = false;
15169  }
15170  bts->oid_ptr = bts->index_scan_idp->oid_list != NULL ? bts->index_scan_idp->oid_list->oidp : NULL;
15171 
15172  /* set index key copy_buf info; is allocated at btree_keyval_search() or scan_open_index_scan(). */
15173  /* TODO: Use index_scan_id_p->copy_buf directly. */
15174  bts->btid_int.copy_buf = index_scan_id_p->copy_buf;
15175  bts->btid_int.copy_buf_len = index_scan_id_p->copy_buf_len;
15176  }
15177 
15178  /* initialize the key range with given information */
15179  switch (kv_range->range)
15180  {
15181  case EQ_NA:
15182  case GT_LT:
15183  case GT_LE:
15184  case GE_LT:
15185  case GE_LE:
15186  case GE_INF:
15187  case GT_INF:
15188  case INF_LE:
15189  case INF_LT:
15190  case INF_INF:
15191  break;
15192  default:
15193  assert (false);
15195  return ER_BTREE_INVALID_RANGE;
15196  }
15197 
15198  /* Set up the keys and make sure that they have the proper domain (by coercing, if necessary). Open-ended searches
15199  * will have one or both of key1 or key2 set to NULL so that we no longer have to do DB_IS_NULL() tests on them. */
15200  /* TODO: fix multi-column index NULL problem */
15201  /* Only used for multi-column index with PRM_ORACLE_STYLE_EMPTY_STRING, otherwise set as zero */
15202 
15203  /* Set key range. */
15204  bts->key_range.num_index_term = kv_range->num_index_term;
15205 
15206  /* re-check for partial-key domain is desc */
15207  if (!BTREE_IS_PART_KEY_DESC (&(bts->btid_int)))
15208  {
15209  TP_DOMAIN *dom;
15210 
15211  dom = bts->btid_int.key_type;
15212  if (TP_DOMAIN_TYPE (dom) == DB_TYPE_MIDXKEY)
15213  {
15214  dom = dom->setdomain;
15215  }
15216 
15217  /* get the last domain element of partial-key */
15218  for (i = 1; i < kv_range->num_index_term && dom; i++, dom = dom->next)
15219  {
15220  ; /* nop */
15221  }
15222 
15223  if (i < kv_range->num_index_term || dom == NULL)
15224  {
15225  assert (false);
15226  return ER_FAILED;
15227  }
15228 
15229  bts->btid_int.part_key_desc = dom->is_desc;
15230  }
15231 
15232 #if !defined(NDEBUG)
15233  if (DB_VALUE_TYPE (&kv_range->key1) == DB_TYPE_MIDXKEY)
15234  {
15235  midxkey = db_get_midxkey (&kv_range->key1);
15236  assert (midxkey->ncolumns == midxkey->domain->precision);
15237  }
15238  if (DB_VALUE_TYPE (&kv_range->key2) == DB_TYPE_MIDXKEY)
15239  {
15240  midxkey = db_get_midxkey (&kv_range->key2);
15241  assert (midxkey->ncolumns == midxkey->domain->precision);
15242  }
15243 #endif
15244 
15245  /* lower bound key and upper bound key */
15246  if (DB_IS_NULL (&kv_range->key1) || btree_multicol_key_is_null (&kv_range->key1))
15247  {
15248  bts->key_range.lower_key = NULL;
15249  }
15250  else
15251  {
15252  bts->key_range.lower_key = &kv_range->key1;
15253  }
15254 
15255  if (DB_IS_NULL (&kv_range->key2) || btree_multicol_key_is_null (&kv_range->key2))
15256  {
15257  bts->key_range.upper_key = NULL;
15258  }
15259  else
15260  {
15261  bts->key_range.upper_key = &kv_range->key2;
15262  }
15263 
15264  /* range type */
15265  bts->key_range.range = kv_range->range;
15266 
15267  /* Swap range for scan is descending. */
15268  if ((bts->use_desc_index && !BTREE_IS_PART_KEY_DESC (&bts->btid_int))
15269  || (!bts->use_desc_index && BTREE_IS_PART_KEY_DESC (&bts->btid_int)))
15270  {
15271  /* Reverse scan and its range. */
15272  range_reverse (bts->key_range.range);
15273  swap_key = bts->key_range.lower_key;
15274  bts->key_range.lower_key = bts->key_range.upper_key;
15275  bts->key_range.upper_key = swap_key;
15276  }
15277 
15278  if (oracle_style_empty_string)
15279  {
15280  /* TODO: A comment explaining this would be great. */
15281  int j, ids_size;
15282 
15283  if (filter && (*(filter->num_vstr_ptr) > 0) && filter->vstr_ids != NULL)
15284  {
15285  ids_size = 0; /* init */
15286  for (i = 0; i < kv_range->num_index_term; i++)
15287  {
15288  filter->vstr_ids[i] = -1; /* init to false */
15289  for (j = 0; j < filter->scan_attrs->num_attrs; j++)
15290  {
15291  if (filter->btree_attr_ids[i] == filter->scan_attrs->attr_ids[j])
15292  {
15293  filter->vstr_ids[i] = filter->btree_attr_ids[i];
15294  ids_size = i + 1;
15295  break;
15296  }
15297  }
15298  }
15299 
15300  /* reset num of variable string attr in key range */
15301  *(filter->num_vstr_ptr) = ids_size;
15302  }
15303  }
15304  /* Initialize key filter */
15305  if (filter) /* Valid pointer or NULL */
15306  {
15307  bts->key_filter_storage = *filter;
15308  bts->key_filter = &bts->key_filter_storage;
15309  }
15310  else
15311  {
15312  bts->key_filter = NULL;
15313  }
15314  /* Reset key_range_max_value_equal */
15315  bts->key_range_max_value_equal = false;
15316 
15317  bts->read_keys = 0;
15318  bts->qualified_keys = 0;
15319  bts->n_oids_read = 0;
15320  bts->n_oids_read_last_iteration = 0;
15321 
15322  /* Key limits. */
15323  bts->key_limit_lower = key_limit_lower;
15324  bts->key_limit_upper = key_limit_upper;
15325 
15326  /* Should class OID be matched? (for hierarchical classes). */
15327  if (match_class_oid != NULL)
15328  {
15329  COPY_OID (&bts->match_class_oid, match_class_oid);
15330  }
15331 
15332  /* Need to check null? */
15333  bts->need_to_check_null = need_to_check_null;
15334 
15335  /* Set other arguments specific to scan type. */
15336  bts->bts_other = bts_other;
15337 
15338  /* Prepare successful. */
15339  return NO_ERROR;
15340 }
15341 
15342 /*
15343  * btree_scan_update_range () - Update range of b-tree scan.
15344  *
15345  * return : Error code.
15346  * thread_p (in) : Thread entry.
15347  * bts (in/out) : B-tree scan.
15348  * key_val_range (in) : New range.
15349  */
15350 static int
15352 {
15353  DB_MIDXKEY *midxkey = NULL;
15354  DB_VALUE *swap_key = NULL;
15355 
15356  /* Assert expected arguments. */
15357  assert (bts != NULL);
15358  assert (kv_range != NULL);
15359 
15360  /* Check valid range. */
15361  switch (kv_range->range)
15362  {
15363  case EQ_NA:
15364  case GT_LT:
15365  case GT_LE:
15366  case GE_LT:
15367  case GE_LE:
15368  case GE_INF:
15369  case GT_INF:
15370  case INF_LE:
15371  case INF_LT:
15372  case INF_INF:
15373  break;
15374  default:
15375  assert (false);
15377  return ER_BTREE_INVALID_RANGE;
15378  }
15379 
15380  /* Set key range. */
15381  bts->key_range.num_index_term = kv_range->num_index_term;
15382 
15383 #if !defined(NDEBUG)
15384  if (DB_VALUE_TYPE (&kv_range->key1) == DB_TYPE_MIDXKEY)
15385  {
15386  midxkey = db_get_midxkey (&kv_range->key1);
15387  assert (midxkey->ncolumns == midxkey->domain->precision);
15388  }
15389  if (DB_VALUE_TYPE (&kv_range->key2) == DB_TYPE_MIDXKEY)
15390  {
15391  midxkey = db_get_midxkey (&kv_range->key2);
15392  assert (midxkey->ncolumns == midxkey->domain->precision);
15393  }
15394 #endif
15395 
15396  /* lower bound key and upper bound key */
15397  if (DB_IS_NULL (&kv_range->key1) || btree_multicol_key_is_null (&kv_range->key1))
15398  {
15399  bts->key_range.lower_key = NULL;
15400  }
15401  else
15402  {
15403  bts->key_range.lower_key = &kv_range->key1;
15404  }
15405 
15406  if (DB_IS_NULL (&kv_range->key2) || btree_multicol_key_is_null (&kv_range->key2))
15407  {
15408  bts->key_range.upper_key = NULL;
15409  }
15410  else
15411  {
15412  bts->key_range.upper_key = &kv_range->key2;
15413  }
15414 
15415  /* range type */
15416  bts->key_range.range = kv_range->range;
15417 
15418  /* Swap range for scan is descending. */
15419  if ((bts->use_desc_index && !BTREE_IS_PART_KEY_DESC (&bts->btid_int))
15420  || (!bts->use_desc_index && BTREE_IS_PART_KEY_DESC (&bts->btid_int)))
15421  {
15422  /* Reverse scan and its range. */
15423  range_reverse (bts->key_range.range);
15424  swap_key = bts->key_range.lower_key;
15425  bts->key_range.lower_key = bts->key_range.upper_key;
15426  bts->key_range.upper_key = swap_key;
15427  }
15428 
15429  return NO_ERROR;
15430 }
15431 
15432 /*
15433  * btree_find_next_index_record () -
15434  * return: NO_ERROR
15435  * bts(in):
15436  *
15437  * Note: This functions finds the next index record(or slot).
15438  * Then, it adjusts the slot_id and oid_pos information about the oid-set contained in the found index slot.
15439  * If next records is located in next page, unfix current page and change C_page as it.
15440  */
15441 static int
15443 {
15444  PAGE_PTR first_page;
15445  int ret_val = NO_ERROR;
15446 
15447  first_page = bts->C_page; /* init */
15448 
15449  ret_val = btree_find_next_index_record_holding_current (thread_p, bts, NULL);
15450 #if 0 /* TODO - need to check return value */
15451  if (ret_val != NO_ERROR)
15452  {
15453  goto error;
15454  }
15455 #endif
15456 
15457  if (first_page != bts->C_page)
15458  {
15459  /* reset common_prefix to recalculate */
15461  }
15462 
15463  /*
15464  * unfix first page if fix next page and move to it
15465  *
15466  * case 1: P_page == NULL, C_page == first_page x do not fix 1 next page
15467  * case 2: P_page == first_page, C_page == NULL x can't fix 1 next page
15468  * case 3: P_page == first_page, C_page != first_pag o fix 1 next
15469  * case 4: P_page == NULL, C_page == NULL o can't fix N next, unfix N-1 prev
15470  * case 5: P_page == NULL, C_page != first_page o fix N next, unfix N-1 prev
15471  * other case: imppossible (assert)
15472  *
15473  * in case of 3, 4, 5, unfix first_page
15474  */
15475 
15476 #if !defined(NDEBUG)
15477  if ((bts->P_page == NULL && bts->C_page == first_page) || (bts->P_page == first_page && bts->C_page == NULL)
15478  || (bts->P_page == first_page && bts->C_page && bts->C_page != first_page)
15479  || (bts->P_page == NULL && bts->C_page == NULL)
15480  || (bts->P_page == NULL && bts->C_page && bts->C_page != first_page))
15481  {
15482  /* case 1, 2, 3, 4, 5 */
15483  }
15484  else
15485  {
15486  assert (false);
15487  }
15488 #endif
15489 
15490  if ((bts->C_page == NULL && bts->P_page == NULL) /* case 4 */
15491  || (bts->C_page != NULL && bts->C_page != first_page)) /* case 3, 5 */
15492  {
15493  if (first_page == bts->P_page)
15494  {
15495  /* prevent double unfix by caller */
15496  bts->P_page = NULL;
15497  }
15498 
15499  if (first_page != NULL)
15500  {
15501  pgbuf_unfix_and_init (thread_p, first_page);
15502  }
15503  }
15504 
15505  return ret_val;
15506 }
15507 
15508 /*
15509  * btree_find_next_index_record_holding_current () -
15510  * return: NO_ERROR
15511  * bts(in):
15512  *
15513  * Note: This functions finds & peek next index record this function does not unfix first page
15514  */
15515 static int
15517 {
15518  RECDES rec;
15519  int ret = NO_ERROR;
15520  PAGE_PTR first_page = bts->C_page;
15521 
15522  rec.data = NULL;
15523 
15524  /*
15525  * Assumptions : last accessed leaf page is fixed.
15526  * - bts->C_page != NULL
15527  * - bts->O_page : NULL or NOT NULL
15528  * - bts->P_page == NULL
15529  */
15530 
15531  /* unfix the overflow page if it is fixed. */
15532  if (bts->O_page != NULL)
15533  {
15534  pgbuf_unfix_and_init (thread_p, bts->O_page);
15535  VPID_SET_NULL (&(bts->O_vpid));
15536  }
15537 
15538  /* unfix the previous leaf page if it is fixed. */
15539  if (bts->P_page != NULL)
15540  {
15541  pgbuf_unfix_and_init (thread_p, bts->P_page);
15542  VPID_SET_NULL (&(bts->P_vpid));
15543  }
15544 
15545  if (bts->C_page == NULL)
15546  {
15547  return ER_FAILED;
15548  }
15549 
15550  bts->P_vpid = bts->C_vpid; /* save started leaf vpid */
15551 
15552  while (bts->C_page != NULL)
15553  {
15554  ret = btree_find_next_index_record_holding_current_helper (thread_p, bts, first_page);
15555  if (ret != NO_ERROR)
15556  {
15557  goto exit_on_error;
15558  }
15559 
15560  /* filter out fence_key record */
15561  if (bts->C_page != NULL)
15562  {
15563  assert (bts->slot_id > 0);
15564  if ((bts->slot_id != 1 && bts->slot_id != btree_node_number_of_keys (thread_p, bts->C_page))
15565  || !btree_is_fence_key (bts->C_page, bts->slot_id))
15566  {
15567  /* Found. */
15568  /* Safe guard: key cannot be fence if between 1 and key count. */
15569  assert (!btree_is_fence_key (bts->C_page, bts->slot_id));
15570  break;
15571  }
15572  /* This is fence key. Continue searching. */
15573  }
15574  }
15575 
15576  if (VPID_EQ (&bts->P_vpid, &bts->C_vpid))
15577  {
15578  /* set bts->P_vpid to null for unconditional lock request handling */
15579  VPID_SET_NULL (&bts->P_vpid);
15580  }
15581 
15582  /* Safe guard: should not stop on fence key. */
15583  assert (bts->C_page == NULL || !btree_is_fence_key (bts->C_page, bts->slot_id));
15584 
15585  if (bts->C_page == NULL)
15586  {
15587  assert (VPID_ISNULL (&bts->C_vpid));
15588  bts->end_scan = true;
15589  if (bts->P_page != NULL && bts->P_page != first_page)
15590  {
15591  pgbuf_unfix_and_init (thread_p, bts->P_page);
15592  }
15593  }
15594 
15595  if (bts->C_page != NULL && peek_rec != NULL)
15596  {
15597  if (spage_get_record (thread_p, bts->C_page, bts->slot_id, peek_rec, PEEK) != S_SUCCESS)
15598  {
15599  assert (false);
15600  goto exit_on_error;
15601  }
15602  }
15603 
15604  return ret;
15605 
15606 exit_on_error:
15607 
15608  assert (ret != NO_ERROR);
15609 
15610  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
15611 }
15612 
15613 /*
15614  * btree_find_next_index_record_holding_current_helper () -
15615  * return: NO_ERROR
15616  * bts(in):
15617  *
15618  * Note: This functions finds the next index record(or slot).
15619  * Then, it adjusts the slot_id and oid_pos information about the oid-set contained in the found index slot.
15620  */
15621 static int
15623 {
15624  int key_cnt;
15625  int ret = NO_ERROR;
15626  PGBUF_LATCH_CONDITION latch_condition;
15627  BTREE_NODE_HEADER *header = NULL;
15628 
15629  /* get header information (key_cnt) from the current leaf page */
15630  key_cnt = btree_node_number_of_keys (thread_p, bts->C_page);
15631 
15632 #if !defined(NDEBUG)
15633  header = btree_get_node_header (thread_p, bts->C_page);
15634 
15635  assert (header != NULL);
15636  assert (header->node_level == 1); /* BTREE_LEAF_NODE */
15637 #endif
15638 
15639  /*
15640  * If the next index record exists in the current leaf page,
15641  * the next index record(slot) and OID position can be identified easily.
15642  */
15643  if (key_cnt > 0)
15644  {
15645  if (bts->use_desc_index)
15646  {
15647  if (bts->slot_id > 1)
15648  {
15649  bts->slot_id--;
15650  bts->oid_pos = 0;
15651  goto end; /* OK */
15652  }
15653  }
15654  else
15655  {
15656  if (bts->slot_id < key_cnt)
15657  {
15658 
15659  bts->slot_id++;
15660  bts->oid_pos = 0;
15661  goto end; /* OK */
15662  }
15663  }
15664  }
15665 
15666  while (bts->C_page != NULL)
15667  {
15668  header = btree_get_node_header (thread_p, bts->C_page);
15669  if (header == NULL)
15670  {
15671  if (first_page != bts->P_page)
15672  {
15673  pgbuf_unfix_and_init (thread_p, bts->P_page);
15674  }
15675 
15676  goto exit_on_error;
15677  }
15678 
15679  if (bts->use_desc_index)
15680  {
15681  bts->C_vpid = header->prev_vpid;
15682  latch_condition = PGBUF_CONDITIONAL_LATCH;
15683  }
15684  else
15685  {
15686  bts->C_vpid = header->next_vpid;
15687  latch_condition = PGBUF_UNCONDITIONAL_LATCH;
15688  }
15689 
15690  bts->P_page = bts->C_page;
15691  bts->C_page = NULL;
15692 
15693  if (!VPID_ISNULL (&(bts->C_vpid)))
15694  {
15695  bts->C_page = pgbuf_fix (thread_p, &bts->C_vpid, OLD_PAGE, PGBUF_LATCH_READ, latch_condition);
15696  if (bts->C_page == NULL)
15697  {
15698  if (bts->use_desc_index)
15699  {
15700  assert (latch_condition == PGBUF_CONDITIONAL_LATCH);
15703  bts->btid_int.sys_btid->root_pageid);
15704  }
15705 
15706  if (first_page != bts->P_page)
15707  {
15708  pgbuf_unfix_and_init (thread_p, bts->P_page);
15709  }
15710 
15711  goto exit_on_error;
15712  }
15713 
15714  (void) pgbuf_check_page_ptype (thread_p, bts->C_page, PAGE_BTREE);
15715 
15716  /* unfix the previous leaf page */
15717  assert (bts->P_page != NULL);
15718 
15719  if (first_page != bts->P_page)
15720  {
15721  pgbuf_unfix_and_init (thread_p, bts->P_page);
15722  }
15723 
15724  /* do not clear bts->P_vpid for UNCONDITIONAL lock request handling */
15725 
15726  key_cnt = btree_node_number_of_keys (thread_p, bts->C_page);
15727 
15728  if (key_cnt > 0)
15729  {
15730  if (bts->use_desc_index)
15731  {
15732  bts->slot_id = key_cnt;
15733  bts->oid_pos = 0;
15734  }
15735  else
15736  {
15737  bts->slot_id = 1;
15738  bts->oid_pos = 0;
15739  }
15740 
15741  goto end; /* OK */
15742  }
15743  }
15744  else
15745  {
15746  if (first_page != bts->P_page)
15747  {
15748  pgbuf_unfix_and_init (thread_p, bts->P_page);
15749  }
15750  }
15751  }
15752 
15753 end:
15754  return ret;
15755 
15756 exit_on_error:
15757 
15758  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
15759 }
15760 
15761 /*
15762  * btree_apply_key_range_and_filter () - Apply key range and key filter condition
15763  * return: NO_ERROR
15764  * bts(in) : pointer to B+-tree scan structure
15765  * is_iss(in) : true if this is an index skip scan
15766  * is_key_range_satisfied(out): true, or false
15767  * is_key_filter_satisfied(out): true, or false
15768  *
15769  * Note: This function applies key range condition and key filter condition to the current key value saved
15770  * in B+-tree scan structure. The results of the evaluation of the given conditions are returned through
15771  * key_range_satisfied and key_filter_satisfied.
15772  */
15773 static int
15774 btree_apply_key_range_and_filter (THREAD_ENTRY * thread_p, BTREE_SCAN * bts, bool is_iss, bool * is_key_range_satisfied,
15775  bool * is_key_filter_satisfied, bool need_to_check_null)
15776 {
15777  int c; /* comparison result */
15778  DB_LOGICAL ev_res; /* evaluation result */
15779  DB_MIDXKEY *mkey; /* midxkey ptr */
15780  DB_VALUE ep; /* element ptr */
15781  bool allow_null_in_midxkey = false;
15782  DB_TYPE type;
15783  int ret = NO_ERROR;
15784 
15785  *is_key_range_satisfied = *is_key_filter_satisfied = false;
15786  bts->key_range_max_value_equal = false; /* init as false */
15787 
15788  /* Key Range Checking */
15789  if (bts->key_range.upper_key == NULL)
15790  {
15791  c = DB_GT;
15792  }
15793  else
15794  {
15795  c = btree_compare_key (bts->key_range.upper_key, &bts->cur_key, bts->btid_int.key_type, 1, 1, NULL);
15796 
15797  if (c == DB_UNK)
15798  {
15799  /* error should have been set */
15800  goto exit_on_error;
15801  }
15802 
15803  /* when using descending index the comparison should be changed again */
15804  if (bts->use_desc_index)
15805  {
15806  c = -c;
15807  }
15808  }
15809 
15810  if (c < 0)
15811  {
15812  *is_key_range_satisfied = false;
15813  }
15814  else if (c == 0)
15815  {
15816  if (bts->key_range.range == GT_LE || bts->key_range.range == GE_LE || bts->key_range.range == INF_LE)
15817  {
15818  *is_key_range_satisfied = true;
15819  bts->key_range_max_value_equal = true;
15820  }
15821  else
15822  {
15823  *is_key_range_satisfied = false;
15824  }
15825  }
15826  else
15827  {
15828  *is_key_range_satisfied = true;
15829  }
15830 
15831  if (*is_key_range_satisfied)
15832  {
15833  if (need_to_check_null && DB_VALUE_DOMAIN_TYPE (&bts->cur_key) == DB_TYPE_MIDXKEY
15834  && bts->key_range.num_index_term > 0)
15835  {
15836  mkey = db_get_midxkey (&(bts->cur_key));
15837  /* get the last element from key range elements */
15838  ret = pr_midxkey_get_element_nocopy (mkey, bts->key_range.num_index_term - 1, &ep, NULL, NULL);
15839  if (ret != NO_ERROR)
15840  {
15841  goto exit_on_error;
15842  }
15843 
15844  if (DB_IS_NULL (&ep))
15845  {
15846  bool is_desc = false;
15847 
15848  allow_null_in_midxkey = false; /* init */
15849 
15850  /*
15851  * assert_release (bts->key_range.num_index_term == 1);
15852  * todo: We need to understand what this part of the code does, as it is quite ambiguous.
15853  * Also, it should cover the other cases for bts->key_range.num_index_term as well.
15854  * This needs thoroughly checking.
15855  */
15856 
15858  {
15859  if (ep.need_clear)
15860  { /* need to check */
15861  type = DB_VALUE_DOMAIN_TYPE (&ep);
15862  if (QSTR_IS_ANY_CHAR_OR_BIT (type) && ep.data.ch.medium.buf != NULL)
15863  {
15864  allow_null_in_midxkey = true; /* is Empty-string */
15865  }
15866  }
15867  }
15868 
15869  is_desc = (bts->use_desc_index ? true : false);
15870  if (bts->btid_int.key_type && bts->btid_int.key_type->setdomain
15871  && bts->btid_int.key_type->setdomain->is_desc)
15872  {
15873  is_desc = !is_desc;
15874  }
15875 
15876  if (is_iss && is_desc && bts->key_range.num_index_term == 1)
15877  {
15878  /* We're inside an INDEX SKIP SCAN doing a descending scan. We allow the first term of a MIDXKEY to
15879  * be NULL since ISS has to return the results for which the first column of the index is NULL. */
15880  allow_null_in_midxkey = true;
15881  }
15882  if (!allow_null_in_midxkey)
15883  {
15884  *is_key_filter_satisfied = false;
15885  goto end; /* give up */
15886  }
15887  }
15888  if (!DB_IS_NULL (&ep) && ep.need_clear == true)
15889  {
15890  pr_clear_value (&ep);
15891  }
15892  }
15893 
15894  /*
15895  * Only in case that key_range_satisfied is true,
15896  * the key filter can be applied to the current key value.
15897  */
15898  *is_key_filter_satisfied = true;
15899  if (bts->key_filter && bts->key_filter->scan_pred->regu_list)
15900  {
15901  ev_res = eval_key_filter (thread_p, &bts->cur_key, bts->key_filter);
15902  if (ev_res != V_TRUE)
15903  {
15904  *is_key_filter_satisfied = false;
15905  }
15906 
15907  if (ev_res == V_ERROR)
15908  {
15909  goto exit_on_error;
15910  }
15911  }
15912  }
15913 
15914 end:
15915  assert ((*is_key_range_satisfied == false && *is_key_filter_satisfied == false)
15916  || (*is_key_range_satisfied == true && *is_key_filter_satisfied == false)
15917  || (*is_key_range_satisfied == true && *is_key_filter_satisfied == true));
15918 
15919  return ret;
15920 
15921 exit_on_error:
15922  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
15923 }
15924 
15925 /*
15926  * btree_attrinfo_read_dbvalues () -
15927  * Find db_values of desired attributes of given key
15928  *
15929  * curr_key(in): the current key
15930  * btree_att_ids(in): the btree attributes ids
15931  * btree_num_att(in): the btree attributes count
15932  * attr_info(in/out): The attribute information structure which describe the desired attributes
15933  *
15934  * Note: Find DB_VALUES of desired attributes of given key.
15935  * The attr_info structure must have already been initialized with the desired attributes.
15936  */
15937 int
15938 btree_attrinfo_read_dbvalues (THREAD_ENTRY * thread_p, DB_VALUE * curr_key, int *btree_att_ids, int btree_num_att,
15939  HEAP_CACHE_ATTRINFO * attr_info, int func_index_col_id)
15940 {
15941  int i, j, error = NO_ERROR;
15942  HEAP_ATTRVALUE *attr_value;
15943  bool found;
15944 
15945  if (curr_key == NULL || btree_att_ids == NULL || btree_num_att < 0 || attr_info == NULL)
15946  {
15947  return ER_FAILED;
15948  }
15949 
15950  if (DB_VALUE_TYPE (curr_key) != DB_TYPE_MIDXKEY)
15951  {
15952  if (attr_info->num_values != 1 || btree_num_att != 1 || attr_info->values->attrid != btree_att_ids[0])
15953  {
15954  return ER_FAILED;
15955  }
15956 
15957  if (pr_clear_value (&(attr_info->values->dbvalue)) != NO_ERROR)
15958  {
15959  attr_info->values->state = HEAP_UNINIT_ATTRVALUE;
15960  return ER_FAILED;
15961  }
15962 
15963  if (pr_clone_value (curr_key, &(attr_info->values->dbvalue)) != NO_ERROR)
15964  {
15965  attr_info->values->state = HEAP_UNINIT_ATTRVALUE;
15966  return ER_FAILED;
15967  }
15968 
15969  attr_info->values->state = HEAP_WRITTEN_ATTRVALUE;
15970  }
15971  else
15972  {
15973  attr_value = attr_info->values;
15974  for (i = 0; i < attr_info->num_values; i++)
15975  {
15976  found = false;
15977  for (j = 0; j < btree_num_att; j++)
15978  {
15979  if (attr_value->attrid == btree_att_ids[j])
15980  {
15981  found = true;
15982  break;
15983  }
15984  }
15985 
15986  if (found == false)
15987  {
15988  error = ER_FAILED;
15989  goto error;
15990  }
15991 
15992  if (pr_clear_value (&(attr_value->dbvalue)) != NO_ERROR)
15993  {
15994  error = ER_FAILED;
15995  goto error;
15996  }
15997 
15998  if (func_index_col_id != -1)
15999  {
16000  /* consider that in the midxkey resides the function result, which must be skipped if we are interested
16001  * in attributes */
16002  if (j >= func_index_col_id)
16003  {
16004  j++;
16005  }
16006  }
16007  if (pr_midxkey_get_element_nocopy (db_get_midxkey (curr_key), j, &(attr_value->dbvalue), NULL, NULL) !=
16008  NO_ERROR)
16009  {
16010  error = ER_FAILED;
16011  goto error;
16012  }
16013 
16014  attr_value->state = HEAP_WRITTEN_ATTRVALUE;
16015  attr_value++;
16016  }
16017  }
16018 
16019  return NO_ERROR;
16020 
16021 error:
16022 
16023  attr_value = attr_info->values;
16024  for (i = 0; i < attr_info->num_values; i++)
16025  {
16026  attr_value->state = HEAP_UNINIT_ATTRVALUE;
16027  }
16028 
16029  return error;
16030 }
16031 
16032 /*
16033  * btree_dump_curr_key () -
16034  * Dump the current key
16035  *
16036  * bts(in): pointer to B+-tree scan structure
16037  * filter(in): key filter
16038  * oid(in): the current oid
16039  * iscan_id(in): index scan id
16040  */
16041 static int
16042 btree_dump_curr_key (THREAD_ENTRY * thread_p, BTREE_SCAN * bts, FILTER_INFO * filter, OID * oid,
16043  INDX_SCAN_ID * iscan_id)
16044 {
16045  HEAP_CACHE_ATTRINFO *attr_info;
16046  REGU_VARIABLE_LIST regu_list;
16047  int error;
16048 
16049  if (bts == NULL || iscan_id == NULL || iscan_id->indx_cov.list_id == NULL || iscan_id->indx_cov.val_descr == NULL
16050  || iscan_id->indx_cov.output_val_list == NULL || iscan_id->indx_cov.tplrec == NULL)
16051  {
16052  return ER_FAILED;
16053  }
16054 
16055  if (iscan_id->rest_attrs.num_attrs > 0)
16056  {
16057  /* normal index scan or join index scan */
16058  attr_info = iscan_id->rest_attrs.attr_cache;
16059  regu_list = iscan_id->rest_regu_list;
16060  }
16061  else if (iscan_id->pred_attrs.num_attrs > 0)
16062  {
16063  /* rest_attrs.num_attrs == 0 if index scan term is join index scan with always-true condition. example: SELECT
16064  * ... FROM X inner join Y on 1 = 1; */
16065  attr_info = iscan_id->pred_attrs.attr_cache;
16066  regu_list = iscan_id->scan_pred.regu_list;
16067  }
16068  else
16069  {
16070  assert_release (false);
16071  attr_info = NULL;
16072  regu_list = NULL;
16073  }
16074 
16075  error =
16076  btree_attrinfo_read_dbvalues (thread_p, &(bts->cur_key), filter->btree_attr_ids, filter->btree_num_attrs, attr_info,
16077  iscan_id->indx_cov.func_index_col_id);
16078  if (error != NO_ERROR)
16079  {
16080  return error;
16081  }
16082 
16083  error = fetch_val_list (thread_p, regu_list, iscan_id->indx_cov.val_descr, NULL, oid, NULL, PEEK);
16084  if (error != NO_ERROR)
16085  {
16086  return error;
16087  }
16088 
16089  error =
16090  qexec_insert_tuple_into_list (thread_p, iscan_id->indx_cov.list_id, iscan_id->indx_cov.output_val_list,
16091  iscan_id->indx_cov.val_descr, iscan_id->indx_cov.tplrec);
16092  if (error != NO_ERROR)
16093  {
16094  return error;
16095  }
16096 
16097  return NO_ERROR;
16098 }
16099 
16100 /*
16101  * btree_get_next_key_info () - Advance to next key in b-tree and obtain information.
16102  *
16103  * return : Scan code.
16104  * thread_p (in) : Thread entry.
16105  * btid (in) : B-tree identifier.
16106  * bts (in) : B-tree scan.
16107  * num_classes (in) : Number of class in class_oid_ptr.
16108  * class_oids_ptr (in) : Class Object identifiers.
16109  * index_scan_id_p (in) : Index scan data.
16110  * key_info (out) : Array of value pointers to store key information.
16111  *
16112  * TODO: Handle unique on hierarchy indexes.
16113  */
16114 SCAN_CODE
16115 btree_get_next_key_info (THREAD_ENTRY * thread_p, BTID * btid, BTREE_SCAN * bts, int num_classes, OID * class_oids_ptr,
16116  INDX_SCAN_ID * index_scan_id_p, DB_VALUE ** key_info)
16117 {
16118  int error_code = NO_ERROR;
16119  SCAN_CODE result = S_SUCCESS;
16120  int oid_size;
16121  OID class_oid, oid;
16122  BTREE_SEARCH search_result = BTREE_KEY_NOTFOUND;
16123 
16124 #if defined(BTREE_DEBUG)
16125  if (BTREE_INVALID_INDEX_ID (btid))
16126  {
16128  btid->root_pageid);
16129  return -1;
16130  }
16131 #endif /* BTREE_DEBUG */
16132 
16133  OID_SET_NULL (&class_oid);
16134 
16135  /* initialize key filter */
16136  bts->key_filter = NULL;
16137 
16138  /* copy use desc index information in the BTS to have it available in the b-tree functions. */
16139  if (index_scan_id_p->indx_info)
16140  {
16141  bts->use_desc_index = index_scan_id_p->indx_info->use_desc_index != 0;
16142  }
16143  else
16144  {
16145  bts->use_desc_index = 0;
16146  }
16147 
16148  if (bts->C_vpid.pageid == NULL_PAGEID)
16149  {
16150  /* first btree_get_next_key_info call, initialize bts */
16151  error_code =
16152  btree_prepare_bts (thread_p, bts, btid, index_scan_id_p, NULL, NULL, &oid_Null_oid, NULL, NULL, false, NULL);
16153  if (error_code != NO_ERROR)
16154  {
16155  ASSERT_ERROR ();
16156  goto error;
16157  }
16158  error_code = btree_range_scan_start (thread_p, bts);
16159  if (error_code != NO_ERROR)
16160  {
16161  ASSERT_ERROR ();
16162  goto error;
16163  }
16164  /* search is positioned on the first key */
16165  }
16166  else
16167  {
16168  /* resume search */
16170 
16171  error_code = btree_range_scan_resume (thread_p, bts);
16172  if (error_code != NO_ERROR)
16173  {
16174  ASSERT_ERROR ();
16175  goto error;
16176  }
16177  }
16178 
16179  if (bts->end_scan)
16180  {
16181  /* Reached the end of leaf level */
16182  result = S_END;
16183  goto end;
16184  }
16185 
16186  oid_size = BTREE_IS_UNIQUE (bts->btid_int.unique_pk) ? 2 * OR_OID_SIZE : OR_OID_SIZE;
16187 
16188  /* C_page should be already loaded */
16189  assert (bts->C_page != NULL);
16190 
16191  /* TODO: Fill the rest of key information here */
16192  /* TODO: Do we have to get all oids or should we just count them ? Or maybe select only the first OID? Or a maximum
16193  * number of OIDs... */
16194 
16195  db_make_int (key_info[BTREE_KEY_INFO_VOLUMEID], bts->C_vpid.volid);
16196  db_make_int (key_info[BTREE_KEY_INFO_PAGEID], bts->C_vpid.pageid);
16197  db_make_int (key_info[BTREE_KEY_INFO_SLOTID], bts->slot_id);
16198 
16199  /* Get key */
16200  pr_clear_value (key_info[BTREE_KEY_INFO_KEY]);
16201  pr_clone_value (&bts->cur_key, key_info[BTREE_KEY_INFO_KEY]);
16202 
16203  /* Get overflow key and overflow oids */
16205  db_make_string (key_info[BTREE_KEY_INFO_OVERFLOW_KEY],
16206  btree_leaf_is_flaged (&bts->key_record, BTREE_LEAF_RECORD_OVERFLOW_KEY) ? "true" : "false");
16208  db_make_string (key_info[BTREE_KEY_INFO_OVERFLOW_OIDS],
16210 
16211  /* Get OIDs count -> For now ignore the overflow OIDs */
16213  btree_record_get_num_oids (thread_p, &bts->btid_int, &bts->key_record, bts->offset, BTREE_LEAF_NODE));
16214 
16215  /* Get OIDs -> For now just the first OID */
16216  search_result =
16217  btree_key_find_first_visible_row (thread_p, &bts->btid_int, &bts->key_record, bts->offset, BTREE_LEAF_NODE, &oid,
16218  &class_oid, -1);
16219  if (search_result == BTREE_KEY_NOTFOUND)
16220  {
16221  if (!VPID_ISNULL (&(bts->leaf_rec_info.ovfl)))
16222  {
16223  /* search for visible OID into OID overflow page */
16224  search_result =
16226  &class_oid);
16227  if (search_result == BTREE_KEY_NOTFOUND)
16228  {
16229  OID_SET_NULL (&oid);
16230  }
16231  }
16232  else
16233  {
16234  OID_SET_NULL (&oid);
16235  }
16236  }
16237  db_make_oid (key_info[BTREE_KEY_INFO_FIRST_OID], &oid);
16238 
16239  /* Key was consumed. */
16241 
16242 end:
16243  if (bts->C_page != NULL)
16244  {
16245  LSA_COPY (&bts->cur_leaf_lsa, pgbuf_get_lsa (bts->C_page));
16246  pgbuf_unfix_and_init (thread_p, bts->C_page);
16247  }
16248 
16249  if (bts->O_page != NULL)
16250  {
16251  pgbuf_unfix_and_init (thread_p, bts->C_page);
16252  }
16253 
16254  if (bts->P_page != NULL)
16255  {
16256  pgbuf_unfix_and_init (thread_p, bts->P_page);
16257  }
16258 
16259  if (result == S_END || result == S_ERROR)
16260  {
16261  btree_scan_clear_key (bts);
16262  }
16263 
16264  if (result == S_END)
16265  {
16266  VPID_SET_NULL (&bts->C_vpid);
16267  }
16268 
16269  return result;
16270 
16271 error:
16272  result = S_ERROR;
16273  goto end;
16274 }
16275 
16276 /*
16277  * btree_find_min_or_max_key () -
16278  * return: NO_ERROR
16279  * btid(in):
16280  * key(in):
16281  * find_min_key(in):
16282  */
16283 int
16284 btree_find_min_or_max_key (THREAD_ENTRY * thread_p, BTID * btid, DB_VALUE * key, int find_min_key)
16285 {
16286  VPID root_vpid;
16287  PAGE_PTR root_page_ptr = NULL;
16288  int offset;
16289  bool clear_key = false;
16290  DB_VALUE key_value;
16291  BTREE_ROOT_HEADER *root_header = NULL;
16292  RECDES rec;
16293  LEAF_REC leaf_pnt;
16294  BTREE_SCAN btree_scan, *BTS;
16295  int ret = NO_ERROR;
16296 
16297  if (key == NULL)
16298  {
16299  return NO_ERROR;
16300  }
16301 
16302  db_make_null (key);
16303  btree_init_temp_key_value (&clear_key, &key_value);
16304 
16305  BTS = &btree_scan;
16306  BTREE_INIT_SCAN (BTS);
16307 
16308  BTS->btid_int.sys_btid = btid;
16309 
16310  root_vpid.pageid = btid->root_pageid;
16311  root_vpid.volid = btid->vfid.volid;
16312 
16313  root_page_ptr = pgbuf_fix (thread_p, &root_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
16314  if (root_page_ptr == NULL)
16315  {
16316  goto exit_on_error;
16317  }
16318 
16319  (void) pgbuf_check_page_ptype (thread_p, root_page_ptr, PAGE_BTREE);
16320 
16321  root_header = btree_get_root_header (thread_p, root_page_ptr);
16322  if (root_header == NULL)
16323  {
16324  goto exit_on_error;
16325  }
16326 
16327  ret = btree_glean_root_header_info (thread_p, root_header, &BTS->btid_int);
16328  if (ret != NO_ERROR)
16329  {
16330  goto exit_on_error;
16331  }
16332 
16333  pgbuf_unfix_and_init (thread_p, root_page_ptr);
16334 
16336 
16337  /*
16338  * in case of desc domain index,
16339  * we have to find the min/max key in opposite order.
16340  */
16341  if (BTS->btid_int.key_type->is_desc)
16342  {
16343  find_min_key = !find_min_key;
16344  }
16345 
16346  if (find_min_key)
16347  {
16348  BTS->use_desc_index = 0;
16349  }
16350  else
16351  {
16352  BTS->use_desc_index = 1;
16353  }
16354 
16355  ret = btree_find_lower_bound_leaf (thread_p, BTS, NULL);
16356  if (ret != NO_ERROR)
16357  {
16358  goto exit_on_error;
16359  }
16360 
16361  if (!BTREE_END_OF_SCAN (BTS))
16362  {
16363  assert (BTS->slot_id > 0);
16364  if (spage_get_record (thread_p, BTS->C_page, BTS->slot_id, &rec, PEEK) != S_SUCCESS)
16365  {
16366  goto exit_on_error;
16367  }
16368 
16369  if (btree_read_record (thread_p, &BTS->btid_int, BTS->C_page, &rec, &key_value, (void *) &leaf_pnt,
16370  BTREE_LEAF_NODE, &clear_key, &offset, PEEK_KEY_VALUE, NULL) != NO_ERROR)
16371  {
16372  goto exit_on_error;
16373  }
16374 
16375  (void) pr_clone_value (&key_value, key);
16376 
16377  btree_clear_key_value (&clear_key, &key_value);
16378  }
16379 
16380 end:
16381 
16382  if (BTS->P_page != NULL)
16383  {
16384  pgbuf_unfix_and_init (thread_p, BTS->P_page);
16385  }
16386 
16387  if (BTS->C_page != NULL)
16388  {
16389  pgbuf_unfix_and_init (thread_p, BTS->C_page);
16390  }
16391 
16392  if (BTS->O_page != NULL)
16393  {
16394  pgbuf_unfix_and_init (thread_p, BTS->O_page);
16395  }
16396 
16397  if (root_page_ptr)
16398  {
16399  pgbuf_unfix_and_init (thread_p, root_page_ptr);
16400  }
16401 
16402  btree_clear_key_value (&clear_key, &key_value);
16403 
16404  return ret;
16405 
16406 exit_on_error:
16407 
16408  ret = (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
16409 
16410  goto end;
16411 }
16412 
16413 /*
16414  * Recovery functions
16415  */
16416 
16417 /*
16418  * btree_rv_util_save_page_records () - Save a set of page records
16419  * return: int
16420  * page_ptr(in): Page Pointer
16421  * first_slotid(in): First Slot identifier to be saved
16422  * rec_cnt(in): Number of slots to be saved
16423  * ins_slotid(in): First Slot identifier to reinsert set of records
16424  * data(in): Data area where the records will be stored
16425  * (Enough space(DB_PAGESIZE) must have been allocated by caller
16426  * length(in): Effective length of the data area after save is completed
16427  *
16428  * Note: Copy the set of records to designated data area.
16429  *
16430  * Note: This is a UTILITY routine, but not an actual recovery routine
16431  */
16432 int
16433 btree_rv_util_save_page_records (THREAD_ENTRY * thread_p, PAGE_PTR page_ptr, INT16 first_slotid, int rec_cnt,
16434  INT16 ins_slotid, char *data, int *length)
16435 {
16436  RECDES rec;
16437  int i, offset, wasted;
16438  char *datap;
16439  int ret = NO_ERROR;
16440 
16441  *length = 0;
16442  datap = (char *) data + sizeof (RECSET_HEADER);
16443  offset = sizeof (RECSET_HEADER);
16444  wasted = DB_WASTED_ALIGN (offset, BTREE_MAX_ALIGN);
16445  datap += wasted;
16446  offset += wasted;
16447 
16448  for (i = 0; i < rec_cnt; i++)
16449  {
16450  assert (first_slotid + i > 0);
16451  if (spage_get_record (thread_p, page_ptr, first_slotid + i, &rec, PEEK) != S_SUCCESS)
16452  {
16453  return ((ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
16454  }
16455 
16456  *(INT16 *) datap = rec.length;
16457  datap += 2;
16458  offset += 2;
16459 
16460  *(INT16 *) datap = rec.type;
16461  datap += 2;
16462  offset += 2;
16463 
16464  memcpy (datap, rec.data, rec.length);
16465  datap += rec.length;
16466  offset += rec.length;
16467  wasted = DB_WASTED_ALIGN (offset, BTREE_MAX_ALIGN);
16468  datap += wasted;
16469  offset += wasted;
16470  }
16471 
16472  datap = data;
16473  ((RECSET_HEADER *) datap)->rec_cnt = rec_cnt;
16474  ((RECSET_HEADER *) datap)->first_slotid = ins_slotid;
16475  *length = offset;
16476 
16477  return NO_ERROR;
16478 }
16479 
16480 /*
16481  * btree_rv_save_keyval_for_undo () - Save a < key, value > pair and other information for undo logical log purposes.
16482  *
16483  * return : Error code.
16484  * btid (in) : B+tree index identifier.
16485  * key (in) : Key to be saved.
16486  * cls_oid (in) : Class identifier.
16487  * oid (in) : Object identifier.
16488  * mvcc_id (in) : MVCCID for operation (NULL if it is not an MVCC operation).
16489  * data (out) : Data area where the above fields will be stored
16490  * (Note: The caller should FREE the allocated area.)
16491  * length (out) : Length of the data area after save is completed.
16492  *
16493  * Note: Copy the adequate key-value information to the data area and return this data area.
16494  * The MVCCID is stored in buffer only if is not null. In this case, an area at the beginning of recovery data
16495  * is reserved for the log lsa of previous MVCC operation (used by vacuum).
16496  *
16497  * Note: This is a UTILITY routine, but not an actual recovery routine
16498  *
16499  * Warning: This routine assumes that the keyval is from a leaf page and not a non-leaf page. Because of this assumption,
16500  * we use the index domain and not the non-leaf domain to write out the key value.
16501  * Currently all calls to this routine are from leaf pages. Be careful if you add a call to this routine.
16502  */
16503 int
16505  BTREE_OP_PURPOSE purpose, char *preallocated_buffer, char **data, int *capacity,
16506  int *length)
16507 {
16508  char *datap;
16509  int key_len;
16510  OR_BUF buf;
16511  PR_TYPE *pr_type;
16512  int ret = NO_ERROR;
16513  int size;
16514  OID oid_and_flags;
16515 
16516  assert (key != NULL);
16517  assert (cls_oid != NULL);
16518  assert (oid != NULL);
16519 
16520  *length = 0;
16521 
16522  key_len = (int) btree_get_disk_size_of_key (key);
16523 
16524  size = (OR_BTID_ALIGNED_SIZE /* btid */
16525  + BTREE_OBJECT_MAX_SIZE /* Object OID and all its info. */
16526  + key_len /* key length */
16527  + (2 * INT_ALIGNMENT)); /* extra space for alignment */
16528 
16529  /* Allocate enough memory to handle estimated size. */
16530  if (*data == NULL)
16531  {
16532  /* Initialize data */
16533  if (preallocated_buffer == NULL || *capacity < (int) size)
16534  {
16535  /* No preallocated buffer or not enough capacity. */
16536  *data = (char *) db_private_alloc (NULL, size);
16537  if (*data == NULL)
16538  {
16539  ASSERT_ERROR_AND_SET (ret);
16540  return ret;
16541  }
16542  *capacity = size;
16543  }
16544  else
16545  {
16546  *data = preallocated_buffer;
16547  }
16548  }
16549  else if (*capacity < (int) size)
16550  {
16551  if (*data == preallocated_buffer)
16552  {
16553  /* Allocate a new buffer. */
16554  *data = (char *) db_private_alloc (NULL, size);
16555  if (*data == NULL)
16556  {
16557  ASSERT_ERROR_AND_SET (ret);
16558  return ret;
16559  }
16560  *capacity = size;
16561  }
16562  else
16563  {
16564  /* Reallocate buffer. */
16565  char *new_data = (char *) db_private_realloc (NULL, *data, size);
16566  if (new_data == NULL)
16567  {
16568  ASSERT_ERROR_AND_SET (ret);
16569  return ret;
16570  }
16571  *capacity = size;
16572  *data = new_data;
16573  }
16574  }
16575  else
16576  {
16577  /* Current data buffer has enough space. */
16578  }
16579 
16580  /* Start packing recovery data. */
16581  datap = (char *) (*data);
16582 
16583  ASSERT_ALIGN (datap, INT_ALIGNMENT);
16584 
16585  datap = or_pack_btid (datap, btid->sys_btid);
16586 
16587  COPY_OID (&oid_and_flags, oid);
16588 
16589  /* Based on the purpose of recovery, some MVCC information may require packing. */
16590  switch (purpose)
16591  {
16593  /* Pack delete MVCCID or insert MVCCID. Vacuum will then visit this object and remove it or remove its insert
16594  * MVCCID. */
16595  assert (mvcc_info != NULL);
16596  if (BTREE_MVCC_INFO_IS_DELID_VALID (mvcc_info))
16597  {
16598  /* Save delete MVCCID. */
16600  }
16601  else
16602  {
16603  /* Save insert MVCCID. */
16606  }
16607  break;
16608 
16611  /* Object is being physically removed. Since on rollback we should also recover MVCC information, it must be
16612  * packed. */
16613  assert (mvcc_info != NULL);
16615  {
16616  /* Do not pack insert MVCCID if it is all visible. */
16618  }
16619  if (BTREE_MVCC_INFO_IS_DELID_VALID (mvcc_info))
16620  {
16621  /* Do not pack delete MVCCID if it is NULL. */
16623  }
16624  break;
16625 
16627  /* We need delete MVCCID to log data since the log record will not be MVCC type and will not include transaction
16628  * MVCCID. */
16629  assert (mvcc_info != NULL);
16631  && mvcc_info->delete_mvccid == logtb_get_current_mvccid (NULL));
16633  break;
16634 
16635  default:
16636  /* No MVCC information needs packing. It doesn't exist or it can be recovered in other ways. */
16637  break;
16638  }
16639 
16640  /* Pack class OID for unique indexes, if it is not the same with top class. Undo function should know to treat null
16641  * class OID as top class. */
16642  if (BTREE_IS_UNIQUE (btid->unique_pk) && !OID_EQ (cls_oid, &btid->topclass_oid))
16643  {
16645  }
16646 
16647  /* Save OID and all flags. */
16648  OR_PUT_OID (datap, &oid_and_flags);
16649  datap += OR_OID_SIZE;
16650 
16651  if (BTREE_IS_UNIQUE (btid->unique_pk) && !OID_EQ (cls_oid, &btid->topclass_oid))
16652  {
16653  /* Save class OID. */
16654  OR_PUT_OID (datap, cls_oid);
16655  datap += OR_OID_SIZE;
16656  }
16657 
16658  /* Add required MVCC information. */
16660  {
16661  assert (mvcc_info != NULL && MVCCID_IS_NOT_ALL_VISIBLE (mvcc_info->insert_mvccid));
16662  OR_PUT_MVCCID (datap, &mvcc_info->insert_mvccid);
16663  datap += OR_MVCCID_SIZE;
16664  }
16666  {
16667  assert (mvcc_info != NULL);
16668  assert (MVCCID_IS_VALID (mvcc_info->delete_mvccid));
16669  OR_PUT_MVCCID (datap, &mvcc_info->delete_mvccid);
16670  datap += OR_MVCCID_SIZE;
16671  }
16672 
16673  ASSERT_ALIGN (datap, INT_ALIGNMENT);
16674 
16675  /* Save key. */
16676  or_init (&buf, datap, key_len);
16677  pr_type = btid->key_type->type;
16678  ret = pr_type->index_writeval (&buf, key);
16679  if (ret != NO_ERROR)
16680  {
16681  ASSERT_ERROR ();
16682  if (*data != preallocated_buffer)
16683  {
16684  db_private_free_and_init (NULL, *data);
16685  }
16686  return ret;
16687  }
16688  datap += key_len;
16689 
16690  *length = CAST_BUFLEN (datap - *data);
16691 
16692  /* Safe guard. */
16693  assert (0 < *length);
16694  assert (*length <= (int) size);
16695 
16696  /* Success. */
16697  return NO_ERROR;
16698 }
16699 
16700 /*
16701  * btree_rv_save_keyval_for_undo_two_objects () - Create undo data by storing two objects.
16702  *
16703  * return : Error code.
16704  * btid (in) : B-tree info.
16705  * key (in) : Key value.
16706  * first_version (in) : First object info.
16707  * second_version (in) : Second object info.
16708  * preallocated_buffer (in) : Preallocated buffer to store undo data.
16709  * data (in) : Pointer to stored undo data.
16710  * capacity (in) : Capacity of data buffer.
16711  * length (in) : Length of undo data.
16712  */
16713 int
16715  BTREE_OBJECT_INFO * second_version, BTREE_OP_PURPOSE purpose,
16716  char *preallocated_buffer, char **data, int *capacity, int *length)
16717 {
16718  int size;
16719  int key_len;
16720  int error_code = NO_ERROR;
16721  char *datap = NULL;
16722  OR_BUF buf;
16723  PR_TYPE *pr_type;
16724  OID oid_and_flags;
16725 
16726  key_len = (int) btree_get_disk_size_of_key (key);
16727 
16728  size = OR_BTID_ALIGNED_SIZE + /* btid */
16729  2 * OR_OID_SIZE + /* first_version */
16730  2 * OR_OID_SIZE + /* second_version */
16731  key_len + /* key_length */
16732  2 * INT_ALIGNMENT; /* extra space for alignment */
16733 
16734  /* Allocate enough memory to handle estimated size. */
16735  if (*data == NULL)
16736  {
16737  /* Initialize data */
16738  if (preallocated_buffer == NULL || *capacity < (int) size)
16739  {
16740  /* No preallocated buffer or not enough capacity. */
16741  *data = (char *) db_private_alloc (NULL, size);
16742  if (*data == NULL)
16743  {
16744  ASSERT_ERROR_AND_SET (error_code);
16745  return error_code;
16746  }
16747  *capacity = size;
16748  }
16749  else
16750  {
16751  *data = preallocated_buffer;
16752  }
16753  }
16754  else if (*capacity < size)
16755  {
16756  if (*data == preallocated_buffer)
16757  {
16758  /* Allocate a new buffer. */
16759  *data = (char *) db_private_alloc (NULL, size);
16760  if (*data == NULL)
16761  {
16762  ASSERT_ERROR_AND_SET (error_code);
16763  return error_code;
16764  }
16765  *capacity = size;
16766  }
16767  else
16768  {
16769  /* Reallocate buffer. */
16770  char *new_data = (char *) db_private_realloc (NULL, *data, size);
16771  if (new_data == NULL)
16772  {
16773  ASSERT_ERROR_AND_SET (error_code);
16774  return error_code;
16775  }
16776  *capacity = size;
16777  *data = new_data;
16778  }
16779  }
16780  else
16781  {
16782  /* Current data buffer has enough space. */
16783  }
16784 
16785  /* Start packing recovery data. */
16786  datap = (char *) (*data);
16787 
16788  ASSERT_ALIGN (datap, INT_ALIGNMENT);
16789 
16790  datap = or_pack_btid (datap, btid->sys_btid);
16791 
16792  /* Save first object. */
16793  if (BTREE_IS_UNIQUE (btid->unique_pk) && !OID_EQ (&first_version->class_oid, &btid->topclass_oid))
16794  {
16795  /* Mark object OID that class OID is also packed. */
16796  COPY_OID (&oid_and_flags, &first_version->oid);
16798  /* Pack OID. */
16799  datap = or_pack_oid (datap, &oid_and_flags);
16800  /* Pack class OID. */
16801  datap = or_pack_oid (datap, &first_version->class_oid);
16802  }
16803  else
16804  {
16805  /* Pack OID. */
16806  datap = or_pack_oid (datap, &first_version->oid);
16807  }
16808 
16809  /* Save second object. */
16810  COPY_OID (&oid_and_flags, &second_version->oid);
16811  /* What MVCC info to save. */
16812  switch (purpose)
16813  {
16815  /* We need to save delete MVCCID of object being relocated to be able to find it. */
16816  if (BTREE_MVCC_INFO_IS_DELID_VALID (&second_version->mvcc_info))
16817  {
16819  }
16820  break;
16821  default:
16822  /* Unexpected. */
16823  assert (false);
16824  break;
16825  }
16826 
16827  if (BTREE_IS_UNIQUE (btid->unique_pk) && !OID_EQ (&second_version->class_oid, &btid->topclass_oid))
16828  {
16829  /* Mark object OID that class OID is also packed. */
16831  /* Pack OID. */
16832  datap = or_pack_oid (datap, &oid_and_flags);
16833  /* Pack class OID. */
16834  datap = or_pack_oid (datap, &second_version->class_oid);
16835  }
16836  else
16837  {
16838  /* Pack OID. */
16839  datap = or_pack_oid (datap, &oid_and_flags);
16840  }
16841 
16843  {
16844  /* Pack insert MVCCID. */
16845  datap = or_pack_mvccid (datap, second_version->mvcc_info.insert_mvccid);
16846  }
16848  {
16849  /* Pack delete MVCCID. */
16850  datap = or_pack_mvccid (datap, second_version->mvcc_info.delete_mvccid);
16851  }
16852 
16853  ASSERT_ALIGN (datap, INT_ALIGNMENT);
16854 
16855  /* Save key. */
16856  or_init (&buf, datap, key_len);
16857  pr_type = btid->key_type->type;
16858  error_code = pr_type->index_writeval (&buf, key);
16859  if (error_code != NO_ERROR)
16860  {
16861  ASSERT_ERROR ();
16862  if (*data != preallocated_buffer)
16863  {
16864  db_private_free_and_init (NULL, *data);
16865  }
16866  return error_code;
16867  }
16868  datap += key_len;
16869 
16870  *length = CAST_BUFLEN (datap - *data);
16871 
16872  /* Safe guard. */
16873  assert (0 < *length);
16874  assert (*length <= size);
16875 
16876  /* Success. */
16877  return NO_ERROR;
16878 }
16879 
16880 #if defined(ENABLE_UNUSED_FUNCTION)
16881 /*
16882  * btree_rv_util_dump_leafrec () -
16883  * return: nothing
16884  * btid(in):
16885  * rec(in): Leaf Record
16886  *
16887  * Note: Dump a Tree Leaf Node Record
16888  *
16889  * Note: This is a UTILITY routine, but not an actual recovery routine
16890  */
16891 void
16892 btree_rv_util_dump_leafrec (THREAD_ENTRY * thread_p, FILE * fp, BTID_INT * btid, RECDES * rec)
16893 {
16894  btree_dump_leaf_record (thread_p, fp, btid, rec, 2);
16895 }
16896 
16897 /*
16898  * btree_rv_util_dump_nleafrec () -
16899  * return: nothing
16900  * btid(in):
16901  * rec(in): NonLeaf Record
16902  *
16903  * Note: Dump a Tree NonLeaf Node Record
16904  *
16905  * Note: This is a UTILITY routine, but not an actual recovery routine
16906  */
16907 void
16908 btree_rv_util_dump_nleafrec (THREAD_ENTRY * thread_p, FILE * fp, BTID_INT * btid, RECDES * rec)
16909 {
16910  btree_dump_non_leaf_record (thread_p, fp, btid, rec, 2, 1);
16911 }
16912 #endif
16913 
16914 /*
16915  * btree_rv_update_tran_stats () -
16916  * return: int
16917  * recv(in): Recovery structure
16918  *
16919  * Note: Recover the in-memory unique statistics.
16920  */
16921 int
16923 {
16924  char *datap;
16925  int num_nulls, num_oids, num_keys;
16926  BTID btid;
16927 
16928  assert (recv->length >= (3 * OR_INT_SIZE) + OR_BTID_ALIGNED_SIZE);
16929 
16930  /* unpack the root statistics */
16931  datap = (char *) recv->data;
16932 
16933  OR_GET_BTID (datap, &btid);
16934  datap += OR_BTID_ALIGNED_SIZE;
16935 
16936  num_keys = OR_GET_INT (datap);
16937  datap += OR_INT_SIZE;
16938 
16939  num_oids = OR_GET_INT (datap);
16940  datap += OR_INT_SIZE;
16941 
16942  num_nulls = OR_GET_INT (datap);
16943  datap += OR_INT_SIZE;
16944 
16945  if (logtb_tran_update_unique_stats (thread_p, &btid, num_keys, num_oids, num_nulls, false) != NO_ERROR)
16946  {
16947  goto error;
16948  }
16949 
16950  return NO_ERROR;
16951 
16952 error:
16954 
16955  return ER_GENERIC_ERROR;
16956 }
16957 
16958 /*
16959  * btree_rv_roothdr_undo_update () -
16960  * return: int
16961  * recv(in): Recovery structure
16962  *
16963  * Note: Recover the root header statistics for undo purposes.
16964  */
16965 int
16967 {
16968  char *datap;
16969  BTREE_ROOT_HEADER *root_header = NULL;
16970 
16971  if (recv->length < 3 * OR_INT_SIZE)
16972  {
16973  assert (false);
16974  goto error;
16975  }
16976 
16977  root_header = btree_get_root_header (thread_p, recv->pgptr);
16978  assert (root_header != NULL);
16979 
16980  if (root_header != NULL)
16981  {
16982  /* unpack the root statistics */
16983  datap = (char *) recv->data;
16984  root_header->num_nulls += OR_GET_INT (datap);
16985  datap += OR_INT_SIZE;
16986  root_header->num_oids += OR_GET_INT (datap);
16987  datap += OR_INT_SIZE;
16988  root_header->num_keys += OR_GET_INT (datap);
16989  }
16990 
16991  pgbuf_set_dirty (thread_p, recv->pgptr, DONT_FREE);
16992 
16993  return NO_ERROR;
16994 
16995 error:
16996 
16998 
16999  return ER_GENERIC_ERROR;
17000 }
17001 
17002 /*
17003  * btree_rv_roothdr_dump () -
17004  * return:
17005  * length(in):
17006  * data(in):
17007  *
17008  * Note: Dump the root header statistics recovery information.
17009  */
17010 void
17011 btree_rv_roothdr_dump (FILE * fp, int length, void *data)
17012 {
17013  char *datap;
17014  int max_key_len, null_delta, oid_delta, key_delta;
17015 
17016  /* unpack the root statistics */
17017  datap = (char *) data;
17018  max_key_len = OR_GET_INT (datap);
17019  datap += OR_INT_SIZE;
17020  null_delta = OR_GET_INT (datap);
17021  datap += OR_INT_SIZE;
17022  oid_delta = OR_GET_INT (datap);
17023  datap += OR_INT_SIZE;
17024  key_delta = OR_GET_INT (datap);
17025  datap += OR_INT_SIZE;
17026 
17027  fprintf (fp, "\nMAX_KEY_LEN: %d NUM NULLS DELTA: %d NUM OIDS DELTA: %d NUM KEYS DELTA: %d\n\n", max_key_len,
17028  null_delta, oid_delta, key_delta);
17029 }
17030 
17031 /*
17032  * btree_rv_ovfid_undoredo_update () -
17033  * return: int
17034  * recv(in): Recovery structure
17035  *
17036  * Note: Recover the overflow VFID in the root header
17037  */
17038 int
17040 {
17041  VFID ovfid;
17042  BTREE_ROOT_HEADER *root_header = NULL;
17043 
17044  if (recv->length < (int) sizeof (VFID))
17045  {
17046  assert (false);
17047  goto error;
17048  }
17049 
17050  root_header = btree_get_root_header (thread_p, recv->pgptr);
17051  assert (root_header != NULL);
17052 
17053  if (root_header != NULL)
17054  {
17055  ovfid = *((VFID *) recv->data); /* structure copy */
17056  root_header->ovfid = ovfid;
17057  }
17058 
17059  pgbuf_set_dirty (thread_p, recv->pgptr, DONT_FREE);
17060 
17061  return NO_ERROR;
17062 
17063 error:
17064 
17066 
17067  return ER_GENERIC_ERROR;
17068 }
17069 
17070 /*
17071  * btree_rv_ovfid_dump () -
17072  * return:
17073  * length(in):
17074  * data(in):
17075  *
17076  * Note: Dump the overflow VFID for the root header.
17077  */
17078 void
17079 btree_rv_ovfid_dump (FILE * fp, int length, void *data)
17080 {
17081  VFID ovfid;
17082 
17083  ovfid = *((VFID *) data); /* structure copy */
17084 
17085  fprintf (fp, "\nOverflow key file VFID: %d|%d\n\n", ovfid.fileid, ovfid.volid);
17086 }
17087 
17088 /*
17089  * btree_rv_nodehdr_undoredo_update () - Recover an update to a node header. used either for undo or redo
17090  * return: int
17091  * recv(in): Recovery structure
17092  *
17093  * Note: Recover the update to a node header
17094  */
17095 int
17097 {
17098  RECDES rec;
17099 #if !defined(NDEBUG)
17100  RECDES peek_rec;
17101 #endif
17102  int sp_success;
17103 
17104  rec.area_size = rec.length = recv->length;
17105  rec.type = REC_HOME;
17106  rec.data = (char *) recv->data;
17107 
17108 #if !defined(NDEBUG)
17109  if (spage_get_record (thread_p, recv->pgptr, HEADER, &peek_rec, PEEK) != S_SUCCESS)
17110  {
17111  return ER_FAILED;
17112  }
17113 
17114  assert (rec.length == peek_rec.length);
17115 #endif
17116 
17117  sp_success = spage_update (thread_p, recv->pgptr, HEADER, &rec);
17118  if (sp_success != SP_SUCCESS)
17119  {
17120  if (sp_success != SP_ERROR)
17121  {
17123  }
17124  ASSERT_ERROR ();
17125  assert (false);
17126  return er_errid ();
17127  }
17128 
17129  pgbuf_set_dirty (thread_p, recv->pgptr, DONT_FREE);
17130 
17131  return NO_ERROR;
17132 }
17133 
17134 /*
17135  * btree_rv_nodehdr_redo_insert () - Recover a node header insertion. used for redo
17136  * return: int
17137  * recv(in): Recovery structure
17138  *
17139  * Note: Recover a node header insertion by reinserting the node header for redo purposes.
17140  */
17141 int
17143 {
17144  RECDES rec;
17145  int sp_success;
17146 
17147  rec.area_size = rec.length = recv->length;
17148  rec.type = REC_HOME;
17149  rec.data = (char *) recv->data;
17150  sp_success = spage_insert_at (thread_p, recv->pgptr, HEADER, &rec);
17151  if (sp_success != SP_SUCCESS)
17152  {
17153  if (sp_success != SP_ERROR)
17154  {
17156  }
17157  ASSERT_ERROR ();
17158  assert (false);
17159  return er_errid ();
17160  }
17161 
17162  pgbuf_set_dirty (thread_p, recv->pgptr, DONT_FREE);
17163 
17164  return NO_ERROR;
17165 }
17166 
17167 /*
17168  * btree_rv_nodehdr_undo_insert () - Recover a node header insertion. used for undo
17169  * return: int
17170  * recv(in): Recovery structure
17171  *
17172  * Note: Recover a node header insertion by deletion the node header for undo purposes.
17173  */
17174 int
17176 {
17177  PGSLOTID pg_slotid;
17178 
17179  pg_slotid = spage_delete (thread_p, recv->pgptr, HEADER);
17180 
17181  assert (pg_slotid != NULL_SLOTID);
17182 
17183  pgbuf_set_dirty (thread_p, recv->pgptr, DONT_FREE);
17184 
17185  return NO_ERROR;
17186 }
17187 
17188 /*
17189  * btree_rv_noderec_undoredo_update () - Recover an update to a node record. used either for undo or redo
17190  * return:
17191  * return: int
17192  * recv(in): Recovery structure
17193  *
17194  * Note: Recover the update to a node record
17195  */
17196 int
17198 {
17199  RECDES rec;
17200  INT16 slotid;
17201  int sp_success;
17202 
17203  slotid = recv->offset;
17204  rec.type = *(INT16 *) ((char *) recv->data + OFFS2);
17205  rec.area_size = rec.length = recv->length - OFFS3;
17206  rec.data = (char *) (recv->data) + OFFS3;
17207 
17208  assert (slotid > 0);
17209  sp_success = spage_update (thread_p, recv->pgptr, slotid, &rec);
17210  if (sp_success != SP_SUCCESS)
17211  {
17212  if (sp_success != SP_ERROR)
17213  {
17215  }
17216  ASSERT_ERROR ();
17217  assert (false);
17218  return er_errid ();
17219  }
17220 
17221  pgbuf_set_dirty (thread_p, recv->pgptr, DONT_FREE);
17222 
17223  return NO_ERROR;
17224 }
17225 
17226 /*
17227  * btree_rv_noderec_redo_insert () - Recover a node record insertion. used for redo
17228  * return: int
17229  * recv(in): Recovery structure
17230  *
17231  * Note: Recover a node record insertion by reinserting the record for redo purposes
17232  */
17233 int
17235 {
17236  RECDES rec;
17237  INT16 slotid;
17238  int sp_success;
17239 
17240  slotid = recv->offset;
17241  rec.type = *(INT16 *) ((char *) recv->data + OFFS2);
17242  rec.area_size = rec.length = recv->length - OFFS3;
17243  rec.data = (char *) (recv->data) + OFFS3;
17244 
17245  assert (slotid > 0);
17246  sp_success = spage_insert_at (thread_p, recv->pgptr, slotid, &rec);
17247  if (sp_success != SP_SUCCESS)
17248  {
17249  if (sp_success != SP_ERROR)
17250  {
17252  }
17253  ASSERT_ERROR ();
17254  assert (false);
17255  return er_errid ();
17256  }
17257 
17258  pgbuf_set_dirty (thread_p, recv->pgptr, DONT_FREE);
17259 
17260  return NO_ERROR;
17261 }
17262 
17263 /*
17264  * btree_rv_noderec_undo_insert () - Recover a node record insertion. used for undo
17265  * return: int
17266  * recv(in): Recovery structure
17267  *
17268  * Note: Recover a node record insertion by deleting the record for undo purposes
17269  */
17270 int
17272 {
17273  INT16 slotid;
17274  PGSLOTID pg_slotid;
17275 
17276  slotid = recv->offset;
17277  assert (slotid > 0);
17278  pg_slotid = spage_delete_for_recovery (thread_p, recv->pgptr, slotid);
17279 
17280  assert (pg_slotid != NULL_SLOTID);
17281 
17282  pgbuf_set_dirty (thread_p, recv->pgptr, DONT_FREE);
17283 
17284  return NO_ERROR;
17285 }
17286 
17287 /*
17288  * btree_rv_noderec_dump () - Dump node record recovery information
17289  * return: int
17290  * length(in): Length of Recovery Data
17291  * data(in): The data being logged
17292  *
17293  * Note: Dump node record recovery information
17294  */
17295 void
17296 btree_rv_noderec_dump (FILE * fp, int length, void *data)
17297 {
17298 #if 0
17299  /* This needs to be fixed. The easiest way is for the btid to be packed and sent, but this increases the log record.
17300  * We may want to allow this routine to know the layout of a node record. TODO: ??? */
17301 
17302  int Node_Type;
17303  RECDES rec;
17304 
17305  Node_Type = *(INT16 *) ((char *) data + OFFS1);
17306  rec.type = *(INT16 *) ((char *) data + OFFS2);
17307  rec.area_size = DB_PAGESIZE;
17308  rec.data = (char *) malloc (DB_PAGESIZE);
17309  memcpy (rec.data, (char *) data + OFFS3, rec.length);
17310 
17311  if (Node_Type == 0)
17312  {
17313  btree_rv_util_dump_leafrec (fp, btid, &rec);
17314  }
17315  else
17316  {
17317  btree_rv_util_dump_nleafrec (fp, btid, &rec);
17318  }
17319 
17320  free_and_init (rec.data);
17321 #endif
17322 }
17323 
17324 /*
17325  * btree_rv_noderec_dump_slot_id () -
17326  * return: int
17327  * length(in): Length of Recovery Data
17328  * data(in): The data being logged
17329  *
17330  * Note: Dump the slot id for the slot to be deleted for undo purposes
17331  */
17332 
17333 void
17334 btree_rv_noderec_dump_slot_id (FILE * fp, int length, void *data)
17335 {
17336  fprintf (fp, " Slot_id: %d \n", *(INT16 *) data);
17337 }
17338 
17339 /*
17340  * btree_rv_pagerec_insert () -
17341  * return: int
17342  * recv(in): Recovery structure
17343  *
17344  * Note: Put a set of records to the page
17345  */
17346 int
17348 {
17349  RECDES rec;
17351  char *datap;
17352  int i, offset, wasted;
17353  int sp_success;
17354 
17355  /* initialization */
17356  recset_header = (RECSET_HEADER *) recv->data;
17357 
17358  /* insert back saved records */
17359  datap = (char *) recv->data + sizeof (RECSET_HEADER);
17360  offset = sizeof (RECSET_HEADER);
17361  wasted = DB_WASTED_ALIGN (offset, BTREE_MAX_ALIGN);
17362  datap += wasted;
17363  offset += wasted;
17364  for (i = 0; i < recset_header->rec_cnt; i++)
17365  {
17366  rec.area_size = rec.length = *(INT16 *) datap;
17367  datap += 2;
17368  offset += 2;
17369  rec.type = *(INT16 *) datap;
17370  datap += 2;
17371  offset += 2;
17372  rec.data = datap;
17373  datap += rec.length;
17374  offset += rec.length;
17375  wasted = DB_WASTED_ALIGN (offset, BTREE_MAX_ALIGN);
17376  datap += wasted;
17377  offset += wasted;
17378 
17379  assert (recset_header->first_slotid + i > 0);
17380  sp_success = spage_insert_at (thread_p, recv->pgptr, recset_header->first_slotid + i, &rec);
17381  if (sp_success != SP_SUCCESS)
17382  {
17383  if (sp_success != SP_ERROR)
17384  {
17386  }
17387  assert (false);
17388  goto error;
17389  } /* if */
17390  } /* for */
17391 
17392  pgbuf_set_dirty (thread_p, recv->pgptr, DONT_FREE);
17393 
17394  return NO_ERROR;
17395 
17396 error:
17397 
17398  ASSERT_ERROR ();
17399  return er_errid ();
17400 }
17401 
17402 /*
17403  * btree_rv_pagerec_delete () -
17404  * return: int
17405  * recv(in): Recovery structure
17406  *
17407  * Note: Delete a set of records from the page for undo or redo purpose
17408  */
17409 int
17411 {
17413  int i;
17414 
17415  recset_header = (RECSET_HEADER *) recv->data;
17416 
17417  /* delete all specified records from the page */
17418  for (i = 0; i < recset_header->rec_cnt; i++)
17419  {
17420  assert (recset_header->first_slotid > 0);
17421  if (spage_delete (thread_p, recv->pgptr, recset_header->first_slotid) != recset_header->first_slotid)
17422  {
17423  ASSERT_ERROR ();
17424  assert (false);
17425  return er_errid ();
17426  }
17427  }
17428 
17429  pgbuf_set_dirty (thread_p, recv->pgptr, DONT_FREE);
17430 
17431  return NO_ERROR;
17432 }
17433 
17434 /*
17435  * btree_rv_newpage_redo_init () -
17436  * return: int
17437  * recv(in): Recovery structure
17438  *
17439  * Note: Initialize a B+tree page.
17440  */
17441 int
17443 {
17444  (void) pgbuf_set_page_ptype (thread_p, recv->pgptr, PAGE_BTREE);
17445 
17447 
17448  return NO_ERROR;
17449 }
17450 
17451 /*
17452  * btree_rv_read_keyval_info_nocopy () - Recover key value and other information on b-tree operation.
17453  *
17454  * return : Void.
17455  * thread_p (in) : Thread entry.
17456  * datap (in) : Buffer containing recovery data.
17457  * data_size (in) : Size of recovery data.
17458  * btid (out) : B-tree identifier.
17459  * cls_oid (out) : Class identifier.
17460  * oid (out) : Object identifier.
17461  * mvcc_id (in/out) : Operation MVCCID. It must be NULL for non-MVCC operations and not NULL for MVCC operations.
17462  * key (out) : Key value.
17463  *
17464  * Note: If it is an MVCC operation recovery (mvcc_id is not NULL), data will
17465  * start with a log lsa (of a previous MVCC operation in log), which
17466  * is used my vacuum only and must be skipped.
17467  *
17468  * Warning: This assumes that the key value has the index's domain and not the
17469  * non-leaf domain. This should be the case since this is a logical
17470  * operation and not a physical one.
17471  */
17472 int
17473 btree_rv_read_keyval_info_nocopy (THREAD_ENTRY * thread_p, char *datap, int data_size, BTID_INT * btid, OID * cls_oid,
17474  OID * oid, BTREE_MVCC_INFO * mvcc_info, DB_VALUE * key)
17475 {
17476  OR_BUF buf;
17477  PR_TYPE *pr_type;
17478  VPID root_vpid;
17479  PAGE_PTR root = NULL;
17480  BTREE_ROOT_HEADER *root_header = NULL;
17481  int key_size = -1;
17482  int error_code = NO_ERROR;
17483 
17484  assert (mvcc_info != NULL);
17485 
17486  btree_rv_read_keybuf_nocopy (thread_p, datap, data_size, btid, cls_oid, oid, mvcc_info, &buf);
17487 
17488  root_vpid.pageid = btid->sys_btid->root_pageid; /* read root page */
17489  root_vpid.volid = btid->sys_btid->vfid.volid;
17490 
17491  root = pgbuf_fix (thread_p, &root_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
17492  if (root == NULL)
17493  {
17494  ASSERT_ERROR_AND_SET (error_code);
17495  goto error;
17496  }
17497 
17498  (void) pgbuf_check_page_ptype (thread_p, root, PAGE_BTREE);
17499 
17500  root_header = btree_get_root_header (thread_p, root);
17501  if (root_header == NULL)
17502  {
17503  assert_release (false);
17504  error_code = ER_FAILED;
17505  goto error;
17506  }
17507 
17508  error_code = btree_glean_root_header_info (thread_p, root_header, btid);
17509  if (error_code != NO_ERROR)
17510  {
17511  ASSERT_ERROR ();
17512  goto error;
17513  }
17514 
17515  pgbuf_unfix_and_init (thread_p, root);
17516 
17517  pr_type = btid->key_type->type;
17518 
17519  /* Do not copy the string--just use the pointer. The pr_ routines for strings and sets have different semantics for
17520  * length. */
17521  if (pr_type->id == DB_TYPE_MIDXKEY)
17522  {
17523  key_size = CAST_BUFLEN (buf.endptr - buf.ptr);
17524  }
17525 
17526  error_code = pr_type->index_readval (&buf, key, btid->key_type, key_size, false /* not copy */ , NULL, 0);
17527  if (error_code != NO_ERROR)
17528  {
17529  ASSERT_ERROR ();
17530  goto error;
17531  }
17532 
17533  return NO_ERROR;
17534 
17535 error:
17536 
17537  assert (error_code != NO_ERROR);
17538  if (root != NULL)
17539  {
17540  pgbuf_unfix_and_init (thread_p, root);
17541  }
17542  return error_code;
17543 }
17544 
17545 /*
17546  * btree_rv_read_keybuf_nocopy () - Initializes a buffer from recovery info
17547  *
17548  * return : Void.
17549  * thread_p (in) : Thread entry.
17550  * datap (in) : Buffer containing recovery data.
17551  * data_size (in) : Size of recovery data.
17552  * btid (out) : B-tree identifier.
17553  * cls_oid (out) : Class identifier.
17554  * oid (out) : Object identifier.
17555  * mvcc_id (in/out) : Operation MVCCID. It must be NULL for non-MVCC operations and not NULL for MVCC operations.
17556  * key_buf (out) : buffer for packed key.
17557  *
17558  * Note: this should be prefered to btree_rv_read_keyval_info_nocopy
17559  * which performs an additional root page latch to retrieve key type.
17560  * Use this, if key type is already available.
17561  *
17562  * Warning: This assumes that the key value has the index's domain and not the
17563  * non-leaf domain. This should be the case since this is a logical
17564  * operation and not a physical one.
17565  */
17566 void
17567 btree_rv_read_keybuf_nocopy (THREAD_ENTRY * thread_p, char *datap, int data_size, BTID_INT * btid, OID * cls_oid,
17568  OID * oid, BTREE_MVCC_INFO * mvcc_info, OR_BUF * key_buf)
17569 {
17570  char *start = datap;
17571 
17572  assert (mvcc_info != NULL);
17573 
17574  /* extract the stored btid, key, oid data */
17575  datap = or_unpack_btid (datap, btid->sys_btid);
17576 
17577  OR_GET_OID (datap, oid);
17578  datap += OR_OID_SIZE;
17579 
17581  {
17582  /* Read class OID from record. */
17583  OR_GET_OID (datap, cls_oid);
17584  datap += OR_OID_SIZE;
17585  }
17586  else
17587  {
17588  /* Set class OID NULL. */
17589  OID_SET_NULL (cls_oid);
17590  }
17591 
17592  mvcc_info->flags = BTREE_OID_GET_MVCC_FLAGS (oid);
17593  if (mvcc_info->flags & BTREE_OID_HAS_MVCC_INSID)
17594  {
17595  OR_GET_MVCCID (datap, &mvcc_info->insert_mvccid);
17596  datap += OR_MVCCID_SIZE;
17597  }
17598  else
17599  {
17600  mvcc_info->insert_mvccid = MVCCID_ALL_VISIBLE;
17601  }
17602  if (mvcc_info->flags & BTREE_OID_HAS_MVCC_DELID)
17603  {
17604  OR_GET_MVCCID (datap, &mvcc_info->delete_mvccid);
17605  datap += OR_MVCCID_SIZE;
17606  }
17607  else
17608  {
17609  mvcc_info->delete_mvccid = MVCCID_NULL;
17610  }
17611 
17613 
17614  datap = PTR_ALIGN (datap, INT_ALIGNMENT);
17615  or_init (key_buf, datap, (data_size - CAST_BUFLEN (datap - start)));
17616 }
17617 
17618 /*
17619  * btree_rv_read_keybuf_two_objects () - Read undo buffer packed which contains two objects.
17620  *
17621  * return : Void.
17622  * thread_p (in) : Thread entry.
17623  * datap (in) : Packed data.
17624  * data_size (in) : Packed data size.
17625  * btid_int (out) : Output BTID for b-tree info.
17626  * first_version (out) : First object version.
17627  * second_version (out) : Second object version.
17628  * key_buf (out) : Buffer containing packed key value.
17629  */
17630 void
17631 btree_rv_read_keybuf_two_objects (THREAD_ENTRY * thread_p, char *datap, int data_size, BTID_INT * btid_int,
17632  BTREE_OBJECT_INFO * first_version, BTREE_OBJECT_INFO * second_version,
17633  OR_BUF * key_buf)
17634 {
17635  char *start = datap;
17636 
17637  assert (datap != NULL);
17638  assert (data_size > 0);
17639  assert (btid_int != NULL);
17640  assert (btid_int->sys_btid != NULL);
17641  assert (first_version != NULL);
17642  assert (second_version != NULL);
17643  assert (key_buf != NULL);
17644 
17645  /* extract the stored btid */
17646  datap = or_unpack_btid (datap, btid_int->sys_btid);
17647 
17648  /* extract first object. */
17649  datap = or_unpack_oid (datap, &first_version->oid);
17650  assert (BTREE_OID_GET_MVCC_FLAGS (&first_version->oid) == 0);
17652  {
17653  datap = or_unpack_oid (datap, &first_version->class_oid);
17654  }
17655  else
17656  {
17657  OID_SET_NULL (&first_version->class_oid);
17658  }
17659  BTREE_OID_CLEAR_ALL_FLAGS (&first_version->oid);
17660 
17661  /* extract second object. */
17662  datap = or_unpack_oid (datap, &second_version->oid);
17664  {
17665  datap = or_unpack_oid (datap, &second_version->class_oid);
17666  }
17667  else
17668  {
17669  OID_SET_NULL (&second_version->class_oid);
17670  }
17671 
17673  {
17674  datap = or_unpack_mvccid (datap, &second_version->mvcc_info.insert_mvccid);
17675  second_version->mvcc_info.flags |= BTREE_OID_HAS_MVCC_INSID;
17676  }
17678  {
17679  datap = or_unpack_mvccid (datap, &second_version->mvcc_info.delete_mvccid);
17680  second_version->mvcc_info.flags |= BTREE_OID_HAS_MVCC_DELID;
17681  }
17682  BTREE_OID_CLEAR_ALL_FLAGS (&second_version->oid);
17683 
17684  datap = PTR_ALIGN (datap, INT_ALIGNMENT);
17685  or_init (key_buf, datap, (data_size - CAST_BUFLEN (datap - start)));
17686 }
17687 
17688 /*
17689  * btree_rv_keyval_undo_insert () - Undo insert operation.
17690  *
17691  * return : Error code.
17692  * thread_p (in) : Thread entry.
17693  * recv (in) : Recovery data.
17694  */
17695 int
17697 {
17698  BTID_INT btid;
17699  BTID sys_btid;
17700  OR_BUF key_buf;
17701  OID cls_oid;
17702  OID oid;
17703  char *datap;
17704  int datasize;
17705  BTREE_MVCC_INFO dummy_mvcc_info;
17706  int err = NO_ERROR;
17707  MVCCID insert_mvccid;
17708 
17709  /* btid needs a place to unpack the sys_btid into. We'll use stack space. */
17710  btid.sys_btid = &sys_btid;
17711 
17712  /* extract the stored btid, key, oid data */
17713  datap = (char *) recv->data;
17714  datasize = recv->length;
17715  btree_rv_read_keybuf_nocopy (thread_p, datap, datasize, &btid, &cls_oid, &oid, &dummy_mvcc_info, &key_buf);
17716 
17717  assert (!OID_ISNULL (&oid));
17718 
17719  if (MVCCID_IS_VALID (recv->mvcc_id))
17720  {
17721  insert_mvccid = recv->mvcc_id;
17722  }
17723  else
17724  {
17725  insert_mvccid = MVCCID_ALL_VISIBLE;
17726  }
17727 
17728  /* Undo insert: just delete object and all its information. */
17729  err =
17730  btree_undo_insert_object (thread_p, btid.sys_btid, &key_buf, &oid, &cls_oid, insert_mvccid, &recv->reference_lsa);
17731  if (err != NO_ERROR)
17732  {
17733  ASSERT_ERROR ();
17734  assert (err == ER_BTREE_UNKNOWN_KEY || err == NO_ERROR || err == ER_INTERRUPTED);
17735  return err;
17736  }
17737 
17738  return NO_ERROR;
17739 }
17740 
17741 /*
17742  * btree_rv_keyval_undo_insert_unique () - Undo insert operation. Additional to regular insert, must make sure visible
17743  * object is returned to first position.
17744  *
17745  * return : Error code.
17746  * thread_p (in) : Thread entry.
17747  * recv (in) : Recovery data.
17748  */
17749 int
17751 {
17752  BTID_INT btid;
17753  BTID sys_btid;
17754  OR_BUF key_buf;
17755  char *datap;
17756  int datasize;
17757  int err = NO_ERROR;
17758  MVCCID insert_mvccid = MVCCID_NULL;
17759  BTREE_OBJECT_INFO undo_insert_object = BTREE_OBJECT_INFO_INITIALIZER;
17761 
17762  /* btid needs a place to unpack the sys_btid into. We'll use stack space. */
17763  btid.sys_btid = &sys_btid;
17764 
17765  /* extract the stored btid, key, oid's data */
17766  datap = (char *) recv->data;
17767  datasize = recv->length;
17768  btree_rv_read_keybuf_two_objects (thread_p, datap, datasize, &btid, &undo_insert_object, &second_object, &key_buf);
17769 
17770  if (MVCCID_IS_VALID (recv->mvcc_id))
17771  {
17772  insert_mvccid = recv->mvcc_id;
17773  }
17774  else
17775  {
17776  insert_mvccid = MVCCID_ALL_VISIBLE;
17777  }
17778 
17779  /* Undo insert. */
17780  err =
17781  btree_undo_insert_object_unique_multiupd (thread_p, btid.sys_btid, &key_buf, &undo_insert_object, &second_object,
17782  insert_mvccid, &recv->reference_lsa);
17783  if (err != NO_ERROR)
17784  {
17785  assert_release (false);
17786  return ER_FAILED;
17787  }
17788 
17789  return NO_ERROR;
17790 }
17791 
17792 /*
17793  * btree_rv_keyval_undo_insert_mvcc_delid () - Recovery function for undo MVCC delete.
17794  * return : Error code.
17795  * recv (in): Recovery data.
17796  *
17797  * Note: Undo the insertion of a delete MVCCID by looking up <key, oid, delete_mvccid> pair in B+tree. Sometimes we
17798  * may need to also match the insert MVCCID (if the object was also inserted by this transaction).
17799  */
17800 int
17802 {
17803  BTID_INT btid;
17804  BTID sys_btid;
17805  OR_BUF key_buf;
17806  OID cls_oid;
17807  OID oid;
17808  char *datap;
17809  int datasize;
17811  int err = NO_ERROR;
17813 
17814  /* btid needs a place to unpack the sys_btid into. We'll use stack space. */
17815  btid.sys_btid = &sys_btid;
17816 
17817  /* extract the stored btid, key, oid data */
17818  datap = (char *) recv->data;
17819  datasize = recv->length;
17820 
17821  btree_rv_read_keybuf_nocopy (thread_p, datap, datasize, &btid, &cls_oid, &oid, &mvcc_info, &key_buf);
17822  assert (!OID_ISNULL (&oid));
17823 
17824  if (recv->mvcc_id == MVCCID_NULL)
17825  {
17826  /* Not a MVCC log record. MVCCID should be saved in log data. */
17827  /* TODO: Is this acceptable? I don't think this code is ever touched. */
17828  assert (BTREE_MVCC_INFO_IS_DELID_VALID (&mvcc_info));
17829  BTREE_MVCC_INFO_SET_DELID (&match_mvccinfo, mvcc_info.delete_mvccid);
17830  }
17831  else
17832  {
17833  BTREE_MVCC_INFO_SET_DELID (&match_mvccinfo, recv->mvcc_id);
17834  }
17835  assert (MVCCID_IS_VALID (BTREE_MVCC_INFO_DELID (&match_mvccinfo)));
17836 
17838  {
17839  /* We also need to match insert MVCCID, which is the same as delete_mvccid. */
17840  BTREE_MVCC_INFO_SET_INSID (&match_mvccinfo, match_mvccinfo.delete_mvccid);
17841  }
17842 
17843  err =
17844  btree_undo_mvcc_delete (thread_p, btid.sys_btid, &key_buf, &oid, &cls_oid, &match_mvccinfo, &recv->reference_lsa);
17845  if (err != NO_ERROR)
17846  {
17847  ASSERT_ERROR ();
17848  assert (err == ER_BTREE_UNKNOWN_KEY || err == NO_ERROR || err == ER_INTERRUPTED);
17849  return err;
17850  }
17851 
17852  return NO_ERROR;
17853 }
17854 
17855 /*
17856  * btree_rv_keyval_undo_delete () -
17857  * return: int
17858  * recv(in): Recovery structure
17859  *
17860  * Note: undo the deletion of a <key, val> pair to the B+tree,
17861  * by inserting the <key, val> pair to the tree.
17862  */
17863 int
17865 {
17866  BTID_INT btid;
17867  BTID sys_btid;
17868  DB_VALUE key;
17869  OID cls_oid;
17870  OID oid;
17871  char *datap;
17872  int datasize;
17874  int error_code = NO_ERROR;
17875 
17876  /* btid needs a place to unpack the sys_btid into. We'll use stack space. */
17877  btid.sys_btid = &sys_btid;
17878 
17879  /* extract the stored btid, key, oid data */
17880  datap = (char *) recv->data;
17881  datasize = recv->length;
17882  error_code = btree_rv_read_keyval_info_nocopy (thread_p, datap, datasize, &btid, &cls_oid, &oid, &mvcc_info, &key);
17883  if (error_code != NO_ERROR)
17884  {
17885  ASSERT_ERROR ();
17886  return error_code;
17887  }
17888 
17889  assert (!OID_ISNULL (&oid));
17890 
17891  /* Insert object and all its info. */
17892  error_code =
17893  btree_undo_delete_physical (thread_p, btid.sys_btid, &key, &cls_oid, &oid, &mvcc_info, &recv->reference_lsa);
17894  if (error_code != NO_ERROR)
17895  {
17896  ASSERT_ERROR ();
17897  assert (error_code == ER_BTREE_DUPLICATE_OID || error_code == ER_INTERRUPTED);
17898  return error_code;
17899  }
17900 
17901  return NO_ERROR;
17902 }
17903 
17904 /*
17905  * btree_rv_remove_marked_for_delete () - Part of run postpone to remove an object which was previously marked
17906  * for delete.
17907  *
17908  *
17909  * return : Error code.
17910  * thread_p (in) : Thread entry.
17911  * rcv (in) : Recovery data.
17912  */
17913 int
17915 {
17916  BTID_INT btree_info;
17917  BTID sys_btid;
17918  OR_BUF key_buf;
17920  int error_code = NO_ERROR;
17921 
17922  assert (!LSA_ISNULL (&rcv->reference_lsa));
17923 
17924  btree_info.sys_btid = &sys_btid;
17925  btree_rv_read_keybuf_nocopy (thread_p, (char *) rcv->data, rcv->length, &btree_info, &object_info.class_oid,
17926  &object_info.oid, &object_info.mvcc_info, &key_buf);
17927 
17928  error_code =
17929  btree_delete_postponed (thread_p, btree_info.sys_btid, &key_buf, &object_info, object_info.mvcc_info.delete_mvccid,
17930  &rcv->reference_lsa);
17931  if (error_code != NO_ERROR)
17932  {
17933  assert_release (false);
17934  }
17935  return error_code;
17936 }
17937 
17938 /*
17939  * btree_rv_keyval_dump () - Dump undo information <key-value> insertion.
17940  *
17941  * return : Void.
17942  * fp (in) : File pointer.
17943  * length (in) : Data length.
17944  * data (in) : Recovery data.
17945  */
17946 void
17947 btree_rv_keyval_dump (FILE * fp, int length, void *data)
17948 {
17949  BTID btid;
17950  OID oid, class_oid;
17951  short mvcc_flags;
17952  MVCCID mvccid;
17953  char *datap = (char *) data;
17954 
17955  datap = or_unpack_btid (datap, &btid);
17956  fprintf (fp, " BTID = { { %d , %d }, %d} \n ", btid.vfid.volid, btid.vfid.fileid, btid.root_pageid);
17957 
17958  datap = or_unpack_oid (datap, &oid);
17959  mvcc_flags = BTREE_OID_GET_MVCC_FLAGS (&oid);
17960 
17962  {
17963  datap = or_unpack_oid (datap, &class_oid);
17965  fprintf (fp, " OID = { %d, %d, %d } \n", oid.volid, oid.pageid, oid.slotid);
17966  fprintf (fp, " CLASS_OID = { %d, %d, %d } \n", class_oid.volid, class_oid.pageid, class_oid.slotid);
17967  }
17968  else
17969  {
17971  fprintf (fp, " OID = { %d, %d, %d } \n", oid.volid, oid.pageid, oid.slotid);
17972  }
17973 
17974  if (mvcc_flags & BTREE_OID_HAS_MVCC_INSID)
17975  {
17976  datap = or_unpack_mvccid (datap, &mvccid);
17977  fprintf (fp, " INSERT MVCCID = %llu \n", (long long unsigned int) mvccid);
17978  }
17979  if (mvcc_flags & BTREE_OID_HAS_MVCC_DELID)
17980  {
17981  datap = or_unpack_mvccid (datap, &mvccid);
17982  fprintf (fp, " DELETE MVCCID = %llu \n", (long long unsigned int) mvccid);
17983  }
17984  /* Print key as hexa. */
17985  log_rv_dump_hexa (fp, length - CAST_BUFLEN (datap - (char *) data), datap);
17986 }
17987 
17988 /*
17989  * btree_rv_undoredo_copy_page () -
17990  * return: int
17991  * recv(in): Recovery structure
17992  *
17993  * Note: Copy a whole page back for undo or redo purposes
17994  */
17995 int
17997 {
17998  (void) pgbuf_set_page_ptype (thread_p, recv->pgptr, PAGE_BTREE); /* redo */
17999 
18000  (void) memcpy (recv->pgptr, recv->data, DB_PAGESIZE);
18001 
18002  pgbuf_set_dirty (thread_p, recv->pgptr, DONT_FREE);
18003 
18004  return NO_ERROR;
18005 }
18006 
18007 /*
18008  * btree_rv_nop () -
18009  * return: int
18010  * recv(in): Recovery structure
18011  *
18012  *
18013  * Note: Does nothing. This routine is used for to accompany some compensating redo logs which are supposed
18014  * to do nothing.
18015  */
18016 int
18017 btree_rv_nop (THREAD_ENTRY * thread_p, LOG_RCV * recv)
18018 {
18019  assert (recv->pgptr != NULL);
18020  pgbuf_set_dirty (thread_p, recv->pgptr, DONT_FREE);
18021  return NO_ERROR;
18022 }
18023 
18024 /*
18025  * btree_multicol_key_is_null () -
18026  * return: Return true if DB_VALUE is a NULL multi-column key and false otherwise.
18027  * key(in): Pointer to multi-column key
18028  *
18029  * Note: Check the multi-column key for a NULL value. In terms of the B-tree,
18030  * a NULL multi-column key is a sequence in which each element is NULL.
18031  */
18032 bool
18034 {
18035  bool status = false;
18036  DB_MIDXKEY *midxkey;
18037  unsigned char *bits;
18038  int nbytes, i;
18039 
18040  if (DB_VALUE_TYPE (key) == DB_TYPE_MIDXKEY)
18041  {
18042  assert (!DB_IS_NULL (key));
18043 
18044  midxkey = db_get_midxkey (key);
18045  assert (midxkey != NULL);
18046 
18047  /* ncolumns == -1 means already constructing step */
18048  if (midxkey && midxkey->ncolumns != -1)
18049  {
18050  bits = (unsigned char *) midxkey->buf;
18051  nbytes = OR_MULTI_BOUND_BIT_BYTES (midxkey->ncolumns);
18052  for (i = 0; i < nbytes; i++)
18053  {
18054  if (bits[i] != (unsigned char) 0)
18055  {
18056  return false;
18057  }
18058  }
18059 
18060  status = true;
18061  }
18062  if (midxkey->min_max_val.position != -1)
18063  {
18064  return false;
18065  }
18066  }
18067 
18068  return status;
18069 }
18070 
18071 /*
18072  * btree_multicol_key_has_null () -
18073  * return: Return true if DB_VALUE is a multi-column key and has a NULL element in it and false otherwise.
18074  * key(in): Pointer to multi-column key
18075  *
18076  * Note: Check the multi-column key has a NULL element.
18077  */
18078 int
18080 {
18081  int status = 0;
18082  DB_MIDXKEY *midxkey;
18083  int i;
18084 
18085  if (DB_VALUE_TYPE (key) == DB_TYPE_MIDXKEY)
18086  {
18087  assert (!DB_IS_NULL (key));
18088 
18089  midxkey = db_get_midxkey (key);
18090  assert (midxkey != NULL);
18091 
18092  /* ncolumns == -1 means already constructing step */
18093  if (midxkey && midxkey->ncolumns != -1)
18094  {
18095  for (i = 0; i < midxkey->ncolumns; i++)
18096  {
18097  if (OR_MULTI_ATT_IS_UNBOUND (midxkey->buf, i))
18098  {
18099  return 1;
18100  }
18101  }
18102 
18103  return 0;
18104  }
18105  }
18106 
18107  return status;
18108 }
18109 
18110 /*
18111  * btree_find_key_from_leaf () -
18112  * return:
18113  * btid(in):
18114  * pg_ptr(in):
18115  * key_cnt(in):
18116  * oid(in):
18117  * key(in):
18118  * clear_key(in):
18119  */
18120 static DISK_ISVALID
18121 btree_find_key_from_leaf (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR pg_ptr, int key_cnt, OID * oid,
18122  DB_VALUE * key, bool * clear_key)
18123 {
18124  RECDES rec;
18125  LEAF_REC leaf_pnt;
18126  VPID ovfl_vpid;
18127  int i, offset;
18128  int error_code;
18129  PAGE_PTR found_page = NULL;
18130  int offset_to_object = NOT_FOUND;
18131 
18132  VPID_SET_NULL (&leaf_pnt.ovfl);
18133 
18134  for (i = 1; i <= key_cnt; i++)
18135  {
18136  if (spage_get_record (thread_p, pg_ptr, i, &rec, PEEK) != S_SUCCESS)
18137  {
18138  return DISK_ERROR;
18139  }
18140 
18141  if (btree_read_record (thread_p, btid, pg_ptr, &rec, key, &leaf_pnt, BTREE_LEAF_NODE, clear_key, &offset,
18143  {
18144  return DISK_ERROR;
18145  }
18146  ovfl_vpid = leaf_pnt.ovfl;
18147 
18148  error_code =
18149  btree_find_oid_and_its_page (thread_p, btid, oid, pg_ptr, BTREE_OP_DELETE_OBJECT_PHYSICAL, NULL, &rec,
18150  &leaf_pnt, offset, &found_page, NULL, &offset_to_object, NULL);
18151  if (error_code != NO_ERROR)
18152  {
18153  assert (found_page == NULL);
18154  return DISK_ERROR;
18155  }
18156  if (offset_to_object != NOT_FOUND)
18157  {
18158  /* key will be cleared by caller */
18159  assert (found_page != NULL);
18160  if (found_page != pg_ptr)
18161  {
18162  pgbuf_unfix_and_init (thread_p, found_page);
18163  }
18164  return DISK_VALID;
18165  }
18166 
18167  btree_clear_key_value (clear_key, key);
18168  }
18169 
18170  return DISK_INVALID;
18171 }
18172 
18173 /*
18174  * btree_find_key_from_nleaf () -
18175  * return:
18176  * btid(in):
18177  * pg_ptr(in):
18178  * key_cnt(in):
18179  * oid(in):
18180  * key(in):
18181  * clear_key(in):
18182  */
18183 static DISK_ISVALID
18184 btree_find_key_from_nleaf (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR pg_ptr, int key_cnt, OID * oid,
18185  DB_VALUE * key, bool * clear_key)
18186 {
18187  int i;
18188  NON_LEAF_REC nleaf_ptr;
18189  VPID page_vpid;
18190  PAGE_PTR page = NULL;
18191  RECDES rec;
18192  DISK_ISVALID status = DISK_INVALID;
18193 
18194  for (i = 1; i <= key_cnt; i++)
18195  {
18196  if (spage_get_record (thread_p, pg_ptr, i, &rec, PEEK) != S_SUCCESS)
18197  {
18198  return DISK_ERROR;
18199  }
18200 
18202  page_vpid = nleaf_ptr.pnt;
18203 
18204  page = pgbuf_fix (thread_p, &page_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
18205  if (page == NULL)
18206  {
18207  return DISK_ERROR;
18208  }
18209 
18210  (void) pgbuf_check_page_ptype (thread_p, page, PAGE_BTREE);
18211 
18212  status = btree_find_key_from_page (thread_p, btid, page, oid, key, clear_key);
18213  pgbuf_unfix_and_init (thread_p, page);
18214 
18215  if (status == DISK_VALID)
18216  {
18217  break;
18218  }
18219  }
18220 
18221  return status;
18222 }
18223 
18224 /*
18225  * btree_find_key_from_page () -
18226  * return:
18227  * btid(in):
18228  * pg_ptr(in):
18229  * oid(in):
18230  * key(in):
18231  * clear_key(in):
18232  */
18233 static DISK_ISVALID
18234 btree_find_key_from_page (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR pg_ptr, OID * oid, DB_VALUE * key,
18235  bool * clear_key)
18236 {
18237  BTREE_NODE_HEADER *header = NULL;
18238  BTREE_NODE_TYPE node_type;
18239  int key_cnt;
18240  DISK_ISVALID status;
18241 
18242  key_cnt = btree_node_number_of_keys (thread_p, pg_ptr);
18243 
18244  header = btree_get_node_header (thread_p, pg_ptr);
18245  if (header == NULL)
18246  {
18247  return DISK_ERROR;
18248  }
18249 
18250  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
18251 
18252  if (node_type == BTREE_NON_LEAF_NODE)
18253  {
18254  status = btree_find_key_from_nleaf (thread_p, btid, pg_ptr, key_cnt, oid, key, clear_key);
18255  }
18256  else
18257  {
18258  status = btree_find_key_from_leaf (thread_p, btid, pg_ptr, key_cnt, oid, key, clear_key);
18259  }
18260 
18261  return status;
18262 }
18263 
18264 /*
18265  * btree_find_key () -
18266  * return:
18267  * btid(in):
18268  * oid(in):
18269  * key(in):
18270  * clear_key(in):
18271  */
18273 btree_find_key (THREAD_ENTRY * thread_p, BTID * btid, OID * oid, DB_VALUE * key, bool * clear_key)
18274 {
18275  VPID root_vpid;
18276  PAGE_PTR root = NULL;
18277  BTREE_ROOT_HEADER *root_header = NULL;
18279  DISK_ISVALID status;
18280 
18281  root_vpid.pageid = btid->root_pageid; /* read root page */
18282  root_vpid.volid = btid->vfid.volid;
18283  root = pgbuf_fix (thread_p, &root_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
18284  if (root == NULL)
18285  {
18286  return DISK_ERROR;
18287  }
18288 
18289  (void) pgbuf_check_page_ptype (thread_p, root, PAGE_BTREE);
18290 
18291  root_header = btree_get_root_header (thread_p, root);
18292  if (root_header == NULL)
18293  {
18294  status = DISK_ERROR;
18295  goto end;
18296  }
18297 
18298  btid_int.sys_btid = btid;
18299  btree_glean_root_header_info (thread_p, root_header, &btid_int);
18300  status = btree_find_key_from_page (thread_p, &btid_int, root, oid, key, clear_key);
18301 
18302 end:
18303 
18304  assert (root != NULL);
18305  pgbuf_unfix_and_init (thread_p, root);
18306 
18307  return status;
18308 }
18309 
18310 
18311 int
18312 btree_set_error (THREAD_ENTRY * thread_p, const DB_VALUE * key, const OID * obj_oid, const OID * class_oid,
18313  const BTID * btid, const char *bt_name, int severity, int err_id, const char *filename, int lineno)
18314 {
18315  char btid_msg_buf[OID_MSG_BUF_SIZE];
18316  char class_oid_msg_buf[OID_MSG_BUF_SIZE];
18317  char oid_msg_buf[OID_MSG_BUF_SIZE];
18318  char *index_name;
18319  char *class_name = NULL;
18320  char *keyval;
18321 
18322  assert (btid != NULL);
18323 
18324  /* init as empty string */
18325  btid_msg_buf[0] = class_oid_msg_buf[0] = oid_msg_buf[0] = 0;
18326  index_name = class_name = keyval = NULL;
18327 
18328  /* fetch index name from the class representation */
18329  if (class_oid != NULL && !OID_ISNULL (class_oid))
18330  {
18331  if (heap_get_indexinfo_of_btid (thread_p, class_oid, btid, NULL, NULL, NULL, NULL, &index_name, NULL) != NO_ERROR)
18332  {
18333  er_clear ();
18334  index_name = NULL;
18335  }
18336  }
18337 
18338  if (index_name && btid)
18339  {
18340  /* print valid btid */
18341  snprintf (btid_msg_buf, OID_MSG_BUF_SIZE, "(B+tree: %d|%d|%d)", btid->vfid.volid, btid->vfid.fileid,
18342  btid->root_pageid);
18343  }
18344 
18345  if (class_oid != NULL && !OID_ISNULL (class_oid))
18346  {
18347  int save_old_wait;
18348 
18349  snprintf (class_oid_msg_buf, OID_MSG_BUF_SIZE, "(CLASS_OID: %d|%d|%d)", class_oid->volid, class_oid->pageid,
18350  class_oid->slotid);
18351 
18352  /* we have latch on b-tree page. although unlikely, trying to get class name can lead to a dead latch. that is
18353  * undesirable, so we'll force no wait for latch here. if the latch fails, the notification will miss class name,
18354  * but it will have class OID. */
18355 
18356  /* We don't provide classname for VACUUM operations, since it may prevent other vacuums from fixing a page. */
18357  if (!VACUUM_IS_THREAD_VACUUM (thread_p))
18358  {
18359  save_old_wait = xlogtb_reset_wait_msecs (thread_p, LK_FORCE_ZERO_WAIT);
18360  if (heap_get_class_name (thread_p, class_oid, &class_name) != NO_ERROR)
18361  {
18362  /* ignore */
18363  er_clear ();
18364  }
18365  (void) xlogtb_reset_wait_msecs (thread_p, save_old_wait);
18366  }
18367  }
18368 
18369  if (key && obj_oid)
18370  {
18371  keyval = pr_valstring (key);
18372  if (keyval)
18373  {
18374  snprintf (oid_msg_buf, OID_MSG_BUF_SIZE, "(OID: %d|%d|%d)", obj_oid->volid, obj_oid->pageid, obj_oid->slotid);
18375  }
18376  }
18377 
18378  er_set (severity, filename, lineno, err_id, 6, (index_name) ? index_name : ((bt_name) ? bt_name : "*UNKNOWN-INDEX*"),
18379  btid_msg_buf, (class_name) ? class_name : "*UNKNOWN-CLASS*", class_oid_msg_buf,
18380  (keyval) ? keyval : "*UNKNOWN-KEY*", oid_msg_buf);
18381 
18382  if (keyval)
18383  {
18384  db_private_free (thread_p, keyval);
18385  }
18386  if (class_name)
18387  {
18388  free_and_init (class_name);
18389  }
18390  if (index_name)
18391  {
18392  free_and_init (index_name);
18393  }
18394 
18395  return NO_ERROR;
18396 }
18397 
18398 /*
18399  * btree_get_asc_desc - get asc/desc for column index from BTREE
18400  *
18401  * return: error code
18402  * thread_p(in): THREAD_ENTRY
18403  * btid(in): BTID
18404  * col_idx(in): column index
18405  * asc_desc(out): asc/desc for column index
18406  */
18407 int
18408 btree_get_asc_desc (THREAD_ENTRY * thread_p, BTID * btid, int col_idx, int *asc_desc)
18409 {
18410  VPID r_vpid; /* root page identifier */
18411  PAGE_PTR r_pgptr = NULL; /* root page pointer */
18413  BTREE_ROOT_HEADER *root_header = NULL;
18414  TP_DOMAIN *domain;
18415  int k, ret = NO_ERROR;
18416 
18417  if (btid == NULL || asc_desc == NULL)
18418  {
18419  return ER_FAILED;
18420  }
18421 
18422  ret = NO_ERROR;
18423  *asc_desc = 0;
18424 
18425  r_vpid.pageid = btid->root_pageid;
18426  r_vpid.volid = btid->vfid.volid;
18427 
18428  r_pgptr = pgbuf_fix (thread_p, &r_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
18429  if (r_pgptr == NULL)
18430  {
18431  goto exit_on_error;
18432  }
18433 
18434  (void) pgbuf_check_page_ptype (thread_p, r_pgptr, PAGE_BTREE);
18435 
18436  root_header = btree_get_root_header (thread_p, r_pgptr);
18437  if (root_header == NULL)
18438  {
18439  goto exit_on_error;
18440  }
18441 
18442  btid_int.sys_btid = btid;
18443 
18444  ret = btree_glean_root_header_info (thread_p, root_header, &btid_int);
18445  if (ret != NO_ERROR)
18446  {
18447  goto exit_on_error;
18448  }
18449 
18450  if (btid_int.key_type->setdomain)
18451  {
18452  domain = btid_int.key_type->setdomain;
18453  for (k = 1; k <= col_idx; k++)
18454  {
18455  domain = domain->next;
18456  if (domain == NULL)
18457  {
18458  goto exit_on_error;
18459  }
18460  }
18461  }
18462  else
18463  {
18464  domain = btid_int.key_type;
18465  if (col_idx != 0)
18466  {
18467  return ER_FAILED;
18468  }
18469  }
18470 
18471  *asc_desc = domain->is_desc;
18472  pgbuf_unfix_and_init (thread_p, r_pgptr);
18473 
18474  return NO_ERROR;
18475 
18476 exit_on_error:
18477 
18478  if (r_pgptr != NULL)
18479  {
18480  pgbuf_unfix_and_init (thread_p, r_pgptr);
18481  }
18482 
18483  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
18484 }
18485 
18486 static void
18487 btree_set_unknown_key_error (THREAD_ENTRY * thread_p, BTID * btid, DB_VALUE * key, const char *debug_msg)
18488 {
18489  int severity;
18490  PR_TYPE *pr_type;
18491  char *err_key;
18492 
18493  assert (btid != NULL);
18494  assert (key != NULL);
18495 
18496  /* If this is vacuum worker, we can expect many such error. Don't spam the log with them. */
18497  if (VACUUM_IS_THREAD_VACUUM_WORKER (thread_p))
18498  {
18499  return;
18500  }
18501 
18502  if (log_is_in_crash_recovery ())
18503  {
18504  severity = ER_WARNING_SEVERITY;
18505  }
18506  else
18507  {
18508  severity = ER_ERROR_SEVERITY;
18509  }
18510 
18511  err_key = pr_valstring (key);
18512  pr_type = pr_type_from_id (DB_VALUE_DOMAIN_TYPE (key));
18513 
18514  er_set (severity, ARG_FILE_LINE, ER_BTREE_UNKNOWN_KEY, 5, (err_key != NULL) ? err_key : "_NULL_KEY",
18515  btid->vfid.fileid, btid->vfid.volid, btid->root_pageid,
18516  (pr_type != NULL) ? pr_type->name : "INVALID KEY TYPE");
18517 
18518  er_log_debug (ARG_FILE_LINE, debug_msg);
18519 
18520  if (err_key != NULL)
18521  {
18522  db_private_free (thread_p, err_key);
18523  }
18524 }
18525 
18526 /*
18527  * btree_get_next_page_vpid () - Get VPID of next leaf node in b-tree.
18528  *
18529  * return : Error code.
18530  * thread_p (in) : Thread entry.
18531  * leaf_page (in) : Leaf node.
18532  * next_vpid (out) : Outputs VPID of next leaf node.
18533  */
18534 static int
18535 btree_get_next_page_vpid (THREAD_ENTRY * thread_p, PAGE_PTR leaf_page, VPID * next_vpid)
18536 {
18537  BTREE_NODE_HEADER *header = NULL;
18538 
18539  assert (leaf_page != NULL);
18540  assert (btree_get_node_level (thread_p, leaf_page) == 1);
18541  assert (next_vpid != NULL);
18542 
18543  header = btree_get_node_header (thread_p, leaf_page);
18544  if (header == NULL)
18545  {
18546  assert (false);
18547  return ER_FAILED;
18548  }
18549  VPID_COPY (next_vpid, &header->next_vpid);
18550  return NO_ERROR;
18551 }
18552 
18553 /*
18554  * btree_get_next_page () -
18555  * return:
18556  *
18557  * page_p(in):
18558  */
18559 static PAGE_PTR
18561 {
18562  BTREE_NODE_HEADER *header = NULL;
18563  PAGE_PTR next_page = NULL;
18564  VPID next_vpid;
18565 
18566  if (page_p == NULL)
18567  {
18568  assert (page_p != NULL);
18569  return NULL;
18570  }
18571 
18572  header = btree_get_node_header (thread_p, page_p);
18573  if (header == NULL)
18574  {
18575  return NULL;
18576  }
18577 
18578  next_vpid = header->next_vpid;
18579  if (VPID_ISNULL (&next_vpid))
18580  {
18581  goto exit_on_error;
18582  }
18583 
18584  next_page = pgbuf_fix (thread_p, &next_vpid, OLD_PAGE, PGBUF_LATCH_WRITE, PGBUF_UNCONDITIONAL_LATCH);
18585  if (next_page == NULL)
18586  {
18587  goto exit_on_error;
18588  }
18589 
18590  (void) pgbuf_check_page_ptype (thread_p, next_page, PAGE_BTREE);
18591 
18592  return next_page;
18593 
18594 exit_on_error:
18595 
18596  if (next_page)
18597  {
18598  pgbuf_unfix_and_init (thread_p, next_page);
18599  }
18600  return NULL;
18601 }
18602 
18603 /*
18604  * btree_set_vpid_previous_vpid () - Sets the prev VPID of a page
18605  * return: error code
18606  * btid(in): BTID
18607  * page_p(in):
18608  * prev(in): a vpid to be set as previous for the input page
18609  */
18610 static int
18612 {
18613  BTREE_NODE_HEADER *header = NULL;
18614 
18615  if (page_p == NULL)
18616  {
18617  return NO_ERROR;
18618  }
18619 
18620  header = btree_get_node_header (thread_p, page_p);
18621  if (header == NULL)
18622  {
18623  return ER_FAILED;
18624  }
18625 
18626  btree_node_header_undo_log (thread_p, &btid->sys_btid->vfid, page_p);
18627  header->prev_vpid = *prev;
18628  btree_node_header_redo_log (thread_p, &btid->sys_btid->vfid, page_p);
18629 
18630  pgbuf_set_dirty (thread_p, page_p, DONT_FREE);
18631 
18632  return NO_ERROR;
18633 }
18634 
18636 btree_compare_key (DB_VALUE * key1, DB_VALUE * key2, TP_DOMAIN * key_domain, int do_coercion, int total_order,
18637  int *start_colp)
18638 {
18640  DB_TYPE key1_type, key2_type;
18641  DB_TYPE dom_type;
18642  int dummy_diff_column;
18643  bool dom_is_desc = false, dummy_next_dom_is_desc;
18644  bool comparable = true;
18645 
18646  assert (key1 != NULL && key2 != NULL && key_domain != NULL);
18647 
18648  key1_type = DB_VALUE_DOMAIN_TYPE (key1);
18649  key2_type = DB_VALUE_DOMAIN_TYPE (key2);
18650  dom_type = TP_DOMAIN_TYPE (key_domain);
18651 
18652  if (DB_IS_NULL (key1))
18653  {
18654  if (DB_IS_NULL (key2))
18655  {
18656  assert (false);
18657  return DB_UNK;
18658  }
18659 
18660  return DB_LT;
18661  }
18662 
18663  if (DB_IS_NULL (key2))
18664  {
18665  if (DB_IS_NULL (key1))
18666  {
18667  assert (false);
18668  return DB_UNK;
18669  }
18670 
18671  return DB_GT;
18672  }
18673 
18674  if (dom_type == DB_TYPE_MIDXKEY)
18675  {
18676  /* safe code */
18677  if (key1_type != DB_TYPE_MIDXKEY)
18678  {
18680  pr_type_name (dom_type));
18681  assert (false);
18682  return DB_UNK;
18683  }
18684  if (key2_type != DB_TYPE_MIDXKEY)
18685  {
18687  pr_type_name (dom_type));
18688  assert (false);
18689  return DB_UNK;
18690  }
18691 
18692  c = pr_midxkey_compare (db_get_midxkey (key1), db_get_midxkey (key2), do_coercion, total_order, -1, start_colp,
18693  NULL, NULL, &dummy_diff_column, &dom_is_desc, &dummy_next_dom_is_desc);
18694  assert_release (c == DB_UNK || (DB_LT <= c && c <= DB_GT));
18695 
18696  if (dom_is_desc)
18697  {
18698  c = ((c == DB_GT) ? DB_LT : (c == DB_LT) ? DB_GT : c);
18699  }
18700  }
18701  else
18702  {
18703  assert (key1_type != DB_TYPE_MIDXKEY);
18704  assert (key2_type != DB_TYPE_MIDXKEY);
18705 
18706  assert (tp_valid_indextype (key1_type));
18707  assert (tp_valid_indextype (key2_type));
18708 
18709  /* safe code */
18710  if (key1_type == DB_TYPE_MIDXKEY)
18711  {
18713  pr_type_name (dom_type));
18714  assert (false);
18715  return DB_UNK;
18716  }
18717  if (key2_type == DB_TYPE_MIDXKEY)
18718  {
18720  pr_type_name (dom_type));
18721  assert (false);
18722  return DB_UNK;
18723  }
18724 
18725  bool are_types_comparable = (TP_ARE_COMPARABLE_KEY_TYPES (key1_type, key2_type)
18726  && TP_ARE_COMPARABLE_KEY_TYPES (key1_type, dom_type)
18727  && TP_ARE_COMPARABLE_KEY_TYPES (key2_type, dom_type));
18728  if (are_types_comparable)
18729  {
18730  // check strings collation
18731  if (TP_IS_STRING_TYPE (key1_type) && TP_IS_STRING_TYPE (key2_type)
18733  {
18734  // not comparable
18735  are_types_comparable = false;
18736  }
18737  }
18738 
18739  if (are_types_comparable)
18740  {
18741  /*
18742  * for do_coercion = 2, we need to process key comparing as char-type
18743  * in case that one of two arguments has varchar-type
18744  * if the other argument has char-type
18745  */
18746  do_coercion = 2;
18747  c = key_domain->type->cmpval (key1, key2, do_coercion, total_order, NULL, key_domain->collation_id);
18748  }
18749  else
18750  {
18751  c = tp_value_compare_with_error (key1, key2, do_coercion, total_order, &comparable);
18752 
18753  if (!comparable)
18754  {
18755  return DB_UNK;
18756  }
18757  }
18758 
18759  assert_release (c == DB_UNK || (DB_LT <= c && c <= DB_GT));
18760 
18761  /* for single-column desc index */
18762  if (key_domain->is_desc)
18763  {
18764  c = ((c == DB_GT) ? DB_LT : (c == DB_LT) ? DB_GT : c);
18765  }
18766  }
18767 
18768  assert_release (c == DB_UNK || (DB_LT <= c && c <= DB_GT));
18769 
18770  return c;
18771 }
18772 
18773 /*
18774  * btree_compare_individual_key_value - Compare individual key values
18775  *
18776  * return : comparison result
18777  * key1 (in) :
18778  * key2 (in) :
18779  * key_domain (in) :
18780  *
18781  * Function expects that both keys are not MIDXKEY. Please also look at btree_compare_key_value.
18782  */
18783 static int
18785 {
18786  int c;
18787  bool key1_is_null, key2_is_null;
18788 
18789  /* should not be MIDXKEY */
18792 
18793  key1_is_null = DB_IS_NULL (key1);
18794  key2_is_null = DB_IS_NULL (key2);
18795 
18796  if (key1_is_null)
18797  {
18798  if (key2_is_null)
18799  {
18800  return DB_EQ;
18801  }
18802  else
18803  {
18804  /* NULL vs. key2 */
18805  return key_domain->is_desc ? DB_GT : DB_LT;
18806  }
18807  }
18808  else
18809  {
18810  if (key2_is_null)
18811  {
18812  /* key1 vs. NULL */
18813  return key_domain->is_desc ? DB_LT : DB_GT;
18814  }
18815  }
18816 
18817  /* both are not null values */
18818  /*
18819  * for do_coercion = 2, we need to process key comparing as char-type
18820  * in case that one of two arguments has varchar-type
18821  * if the other argument has char-type
18822  */
18823  c = key_domain->type->cmpval (key1, key2, 2, 1, NULL, key_domain->collation_id);
18824 
18825  if (key_domain->is_desc)
18826  {
18827  c = ((c == DB_GT) ? DB_LT : (c == DB_LT) ? DB_GT : c);
18828  }
18829 
18830  assert (DB_LT <= c && c <= DB_GT);
18831  return c;
18832 }
18833 
18834 /*
18835  * btree_range_opt_check_add_index_key () - Add key in the array of top N keys for multiple range search optimization.
18836  *
18837  * return : Error code.
18838  * thread_p (in) : Thread entry.
18839  * bts (in) : B-tree scan structure.
18840  * multi_range_opt (in/out) : Multiple range optimization structure.
18841  * p_new_oid (in) : New candidate OID for top N keys.
18842  * key_added (out) : Outputs true if object made it to top N keys.
18843  */
18844 static int
18846  OID * p_new_oid, bool * key_added)
18847 {
18848  DB_MIDXKEY *new_mkey = NULL;
18849  DB_VALUE *new_key_value = NULL;
18850  int error = NO_ERROR, i = 0;
18851  TP_DOMAIN *domain;
18852  bool has_null_domain;
18853 
18854  assert (multi_range_opt->use == true);
18855 
18856  if (DB_VALUE_DOMAIN_TYPE (&(bts->cur_key)) != DB_TYPE_MIDXKEY || multi_range_opt->sort_att_idx == NULL)
18857  {
18858  return ER_FAILED;
18859  }
18860 
18861  *key_added = true;
18862 
18863  assert (multi_range_opt->num_attrs != 0);
18864  if (multi_range_opt->num_attrs == 0)
18865  {
18866  return ER_FAILED;
18867  }
18868 
18869  new_mkey = db_get_midxkey (&(bts->cur_key));
18870  new_key_value = (DB_VALUE *) db_private_alloc (thread_p, multi_range_opt->num_attrs * sizeof (DB_VALUE));
18871  if (new_key_value == NULL)
18872  {
18874  sizeof (DB_VALUE *) * multi_range_opt->num_attrs);
18875  return ER_OUT_OF_VIRTUAL_MEMORY;
18876  }
18877 
18878  for (i = 0; i < multi_range_opt->num_attrs; i++)
18879  {
18880  db_make_null (&new_key_value[i]);
18881  }
18882 
18883  for (i = 0; i < multi_range_opt->num_attrs; i++)
18884  {
18885  error = pr_midxkey_get_element_nocopy (new_mkey, multi_range_opt->sort_att_idx[i], &new_key_value[i], NULL, NULL);
18886  if (error != NO_ERROR)
18887  {
18888  goto exit;
18889  }
18890  }
18891 
18892  /* resolve domains */
18893  if (multi_range_opt->sort_col_dom == NULL)
18894  {
18895  multi_range_opt->sort_col_dom =
18896  (TP_DOMAIN **) db_private_alloc (thread_p, multi_range_opt->num_attrs * sizeof (TP_DOMAIN *));
18897  if (multi_range_opt->sort_col_dom == NULL)
18898  {
18899  error = ER_OUT_OF_VIRTUAL_MEMORY;
18900  goto exit;
18901  }
18902 
18903  for (i = 0; i < multi_range_opt->num_attrs; i++)
18904  {
18905  multi_range_opt->sort_col_dom[i] = &tp_Null_domain;
18906  }
18907  multi_range_opt->has_null_domain = true;
18908  }
18909 
18910  if (multi_range_opt->has_null_domain)
18911  {
18912  has_null_domain = false;
18913  for (i = 0; i < multi_range_opt->num_attrs; i++)
18914  {
18915  assert (multi_range_opt->sort_col_dom[i] != NULL);
18916  if (multi_range_opt->sort_col_dom[i] == &tp_Null_domain)
18917  {
18918  domain = tp_domain_resolve_value (&new_key_value[i], NULL);
18919  if (domain != &tp_Null_domain)
18920  {
18921  multi_range_opt->sort_col_dom[i] = domain;
18922  }
18923  else
18924  {
18925  has_null_domain = true;
18926  }
18927  }
18928  }
18929  multi_range_opt->has_null_domain = has_null_domain;
18930  }
18931 
18932  if (multi_range_opt->cnt == multi_range_opt->size)
18933  {
18934  int c = 0;
18935  DB_MIDXKEY *comp_mkey = NULL;
18936  DB_VALUE comp_key_value;
18937  bool reject_new_elem = false;
18938  RANGE_OPT_ITEM *last_item = NULL;
18939 
18940  last_item = multi_range_opt->top_n_items[multi_range_opt->size - 1];
18941  assert (last_item != NULL);
18942 
18943  comp_mkey = db_get_midxkey (&(last_item->index_value));
18944 
18945  /* if all keys are equal, the new element is rejected */
18946  reject_new_elem = true;
18947  for (i = 0; i < multi_range_opt->num_attrs; i++)
18948  {
18949  db_make_null (&comp_key_value);
18950  error = pr_midxkey_get_element_nocopy (comp_mkey, multi_range_opt->sort_att_idx[i], &comp_key_value, NULL,
18951  NULL);
18952  if (error != NO_ERROR)
18953  {
18954  goto exit;
18955  }
18956 
18957  c = btree_compare_individual_key_value (&comp_key_value, &new_key_value[i], multi_range_opt->sort_col_dom[i]);
18958 
18959  pr_clear_value (&comp_key_value);
18960  if (c != 0)
18961  {
18962  /* see if new element should be rejected or accepted and stop checking keys */
18963  reject_new_elem = (multi_range_opt->is_desc_order[i]) ? (c > 0) : (c < 0);
18964  break;
18965  }
18966  }
18967 
18968  if (reject_new_elem)
18969  {
18970  /* do not add */
18971  *key_added = false;
18972 
18973  if (new_key_value != NULL)
18974  {
18975  for (i = 0; i < multi_range_opt->num_attrs; i++)
18976  {
18977  pr_clear_value (&new_key_value[i]);
18978  }
18979  db_private_free_and_init (thread_p, new_key_value);
18980  }
18981 
18982  return NO_ERROR;
18983  }
18984 
18985  /* overwrite the last item with the new key and OIDs */
18986  pr_clear_value (&(last_item->index_value));
18987  pr_clone_value (&(bts->cur_key), &(last_item->index_value));
18988  COPY_OID (&(last_item->inst_oid), p_new_oid);
18989  }
18990  else
18991  {
18992  RANGE_OPT_ITEM *curr_item = NULL;
18993  /* just insert on last position available */
18994  assert (multi_range_opt->cnt < multi_range_opt->size);
18995 
18996  curr_item = (RANGE_OPT_ITEM *) db_private_alloc (thread_p, sizeof (RANGE_OPT_ITEM));
18997  if (curr_item == NULL)
18998  {
18999  error = ER_OUT_OF_VIRTUAL_MEMORY;
19000  goto exit;
19001  }
19002 
19003  multi_range_opt->top_n_items[multi_range_opt->cnt] = curr_item;
19004  pr_clone_value (&(bts->cur_key), &(curr_item->index_value));
19005 
19006  COPY_OID (&(curr_item->inst_oid), p_new_oid);
19007 
19008  multi_range_opt->cnt++;
19009  }
19010 
19011  /* find the position for this element */
19012  /* if there is only one element => nothing to do */
19013  if (multi_range_opt->cnt > 1)
19014  {
19015  int pos = 0;
19016  error =
19017  btree_top_n_items_binary_search (multi_range_opt->top_n_items, multi_range_opt->sort_att_idx,
19018  multi_range_opt->sort_col_dom, multi_range_opt->is_desc_order, new_key_value,
19019  multi_range_opt->num_attrs, 0, multi_range_opt->cnt - 1, &pos);
19020  if (error != NO_ERROR)
19021  {
19022  goto exit;
19023  }
19024  if (pos != multi_range_opt->cnt - 1)
19025  {
19026  RANGE_OPT_ITEM *temp_item;
19027  int mem_size = (multi_range_opt->cnt - 1 - pos) * sizeof (RANGE_OPT_ITEM *);
19028 
19029  /* copy last item to temp */
19030  temp_item = multi_range_opt->top_n_items[multi_range_opt->cnt - 1];
19031 
19032  /* move all items one position to the right in order to free the position for the new item */
19033  memcpy (multi_range_opt->buffer, &multi_range_opt->top_n_items[pos], mem_size);
19034  memcpy (&multi_range_opt->top_n_items[pos + 1], multi_range_opt->buffer, mem_size);
19035 
19036  /* put new item at its designated position */
19037  multi_range_opt->top_n_items[pos] = temp_item;
19038  }
19039  else
19040  {
19041  /* the new item is already in the correct position */
19042  }
19043  }
19044 
19045 exit:
19046  if (new_key_value != NULL)
19047  {
19048  for (i = 0; i < multi_range_opt->num_attrs; i++)
19049  {
19050  pr_clear_value (&new_key_value[i]);
19051  }
19052 
19053  db_private_free_and_init (thread_p, new_key_value);
19054  }
19055  return error;
19056 }
19057 
19058 /*
19059  * btree_top_n_items_binary_search () - searches for the right position for the keys in new_key_values in top N items
19060  *
19061  * return : error code
19062  * top_n_items (in) : current top N item list
19063  * att_idxs (in) : indexes for midxkey attributes
19064  * domains (in) : domains for midxkey attributes
19065  * desc_order (in) : is descending order for midxkey attributes if NULL, ascending order will be considered
19066  * new_key_values (in) : key values for the new item
19067  * num_keys (in) : number of keys that are compared
19068  * first (in) : position of the first item in current range
19069  * last (in) : position of the last item in current range
19070  * new_pos (out) : the position where the new item fits
19071  *
19072  * NOTE : At each step, split current range in half and compare with the
19073  * middle item. If all keys are equal save the position of middle item.
19074  * If middle item is better, look between middle and last, otherwise
19075  * look between first and middle.
19076  * The recursion stops when the range cannot be split anymore
19077  * (first + 1 <= last), when normally first is better and last is worse
19078  * and the new item should replace last. There is a special case when
19079  * the new item is better than all items in top N. In this case,
19080  * first must be 0 and an extra compare is made (to see if new item
19081  * should in fact replace first).
19082  */
19083 static int
19084 btree_top_n_items_binary_search (RANGE_OPT_ITEM ** top_n_items, int *att_idxs, TP_DOMAIN ** domains, bool * desc_order,
19085  DB_VALUE * new_key_values, int num_keys, int first, int last, int *new_pos)
19086 {
19087  DB_MIDXKEY *comp_mkey = NULL;
19088  DB_VALUE comp_key_value;
19089  RANGE_OPT_ITEM *comp_item;
19090  int i, c, error = NO_ERROR;
19091 
19092  int middle;
19093 
19094  assert (last >= first && new_pos != NULL);
19095  if (last <= first + 1)
19096  {
19097  if (first == 0)
19098  {
19099  /* need to check if the new key is smaller than the first */
19100  comp_item = top_n_items[0];
19101  comp_mkey = db_get_midxkey (&(comp_item->index_value));
19102 
19103  for (i = 0; i < num_keys; i++)
19104  {
19105  db_make_null (&comp_key_value);
19106  error = pr_midxkey_get_element_nocopy (comp_mkey, att_idxs[i], &comp_key_value, NULL, NULL);
19107  if (error != NO_ERROR)
19108  {
19109  return error;
19110  }
19111 
19112  c = btree_compare_individual_key_value (&comp_key_value, &new_key_values[i], domains[i]);
19113 
19114  pr_clear_value (&comp_key_value);
19115  if (c != 0)
19116  {
19117  if ((desc_order != NULL && desc_order[i] ? c > 0 : c < 0))
19118  {
19119  /* new value is not better than the first */
19120  break;
19121  }
19122  else
19123  {
19124  /* new value is better than the first */
19125  new_pos = 0;
19126  return NO_ERROR;
19127  }
19128  }
19129  }
19130  /* new value is equal to first, fall through */
19131  }
19132  /* here: the new values should be between first and last */
19133  *new_pos = last;
19134  return NO_ERROR;
19135  }
19136 
19137  /* compare new value with the value in the middle of the current range */
19138  middle = (last + first) / 2;
19139  comp_item = top_n_items[middle];
19140  comp_mkey = db_get_midxkey (&(comp_item->index_value));
19141 
19142  for (i = 0; i < num_keys; i++)
19143  {
19144  db_make_null (&comp_key_value);
19145  error = pr_midxkey_get_element_nocopy (comp_mkey, att_idxs[i], &comp_key_value, NULL, NULL);
19146  if (error != NO_ERROR)
19147  {
19148  return error;
19149  }
19150 
19151  c = btree_compare_individual_key_value (&comp_key_value, &new_key_values[i], domains[i]);
19152 
19153  pr_clear_value (&comp_key_value);
19154  if (c != 0)
19155  {
19156  if ((desc_order != NULL && desc_order[i] ? c > 0 : c < 0))
19157  {
19158  /* the new value is worse than the one in the middle */
19159  first = middle;
19160  }
19161  else
19162  {
19163  /* the new value is better than the one in the middle */
19164  last = middle;
19165  }
19166  return btree_top_n_items_binary_search (top_n_items, att_idxs, domains, desc_order, new_key_values, num_keys,
19167  first, last, new_pos);
19168  }
19169  }
19170  /* all keys were equal, the new item can be put in current position */
19171  *new_pos = middle;
19172  return NO_ERROR;
19173 }
19174 
19175 /*
19176  * btree_iss_set_key () - save the current key
19177  *
19178  * return: error code
19179  * bts(in):
19180  * iss(in):
19181  */
19182 static int
19184 {
19185  regu_variable_node *key = NULL;
19186  int ret = NO_ERROR;
19187 
19188  /* check environment */
19189  if (DB_VALUE_DOMAIN_TYPE (&bts->cur_key) != DB_TYPE_MIDXKEY || iss == NULL || iss->skipped_range == NULL)
19190  {
19191  assert_release (false);
19193 
19194  return ER_FAILED;
19195  }
19196 
19197  /* get correct key to update value to (key1 for normal scan or key2 for reverse scan); the fetch range will have one
19198  * of the keys NULLed */
19199  if (iss->skipped_range->key1 == NULL)
19200  {
19201  key = iss->skipped_range->key2;
19202  }
19203  else
19204  {
19205  key = iss->skipped_range->key1;
19206  }
19207 
19208  /* check the key */
19209  if (key == NULL || key->value.funcp == NULL || key->value.funcp->operand == NULL
19210  || key->value.funcp->operand->value.type != TYPE_DBVAL)
19211  {
19212  assert_release (false);
19213  return ER_FAILED;
19214  }
19215 
19216  /* save the found key as bound for next fetch */
19217  pr_clear_value (&key->value.funcp->operand->value.value.dbval);
19218  ret = pr_clone_value (&bts->cur_key, &key->value.funcp->operand->value.value.dbval);
19219  if (ret != NO_ERROR)
19220  {
19221  return ret;
19222  }
19223 
19224  return NO_ERROR;
19225 }
19226 
19227 /*****************************************************************************/
19228 /* For migrate_90beta_to_91 */
19229 /*****************************************************************************/
19230 #define MIGRATE_90BETA_TO_91
19231 
19232 #if defined(MIGRATE_90BETA_TO_91)
19233 
19234 static int btree_fix_ovfl_oid_pages_by_btid (THREAD_ENTRY * thread_p, BTID * btid);
19235 static int btree_fix_ovfl_oid_pages_tree (THREAD_ENTRY * thread_p, BTID * btid, char *btname);
19236 static int btree_fix_ovfl_oid_page (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR pg_ptr, char *btname);
19237 static int btree_compare_oid (const void *oid_mem1, const void *oid_mem2);
19238 
19239 static int fixed_pages;
19240 
19241 static int
19243 {
19244  char *btname;
19245  FILE_DESCRIPTORS fdes;
19246  int ret = NO_ERROR;
19247 
19248  assert (!BTID_IS_NULL (btid));
19249  assert (btid->root_pageid != NULL_PAGEID);
19250 
19251  ret = file_descriptor_get (thread_p, &btid->vfid, &fdes);
19252  if (ret != NO_ERROR)
19253  {
19254  ASSERT_ERROR ();
19255  goto exit_on_end;
19256  }
19257 
19258  /* get the index name of the index key */
19259  ret = heap_get_indexinfo_of_btid (thread_p, &fdes.btree.class_oid, btid, NULL, NULL, NULL, NULL, &btname, NULL);
19260  if (ret != NO_ERROR)
19261  {
19262  ASSERT_ERROR ();
19263  goto exit_on_end;
19264  }
19265 
19266  ret = btree_fix_ovfl_oid_pages_tree (thread_p, btid, btname);
19267  if (ret != NO_ERROR)
19268  {
19269  ASSERT_ERROR ();
19270  goto exit_on_end;
19271  }
19272 
19273 exit_on_end:
19274 
19275  if (btname)
19276  {
19277  free_and_init (btname);
19278  }
19279 
19280  return ret;
19281 }
19282 
19283 static int
19284 btree_fix_ovfl_oid_pages_tree (THREAD_ENTRY * thread_p, BTID * btid, char *btname)
19285 {
19286  VPID vpid;
19287  PAGE_PTR pgptr = NULL;
19288  BTREE_ROOT_HEADER *root_header = NULL;
19289  BTREE_NODE_HEADER *header = NULL;
19291 
19292  /* fetch the root page */
19293 
19294  vpid.pageid = btid->root_pageid;
19295  vpid.volid = btid->vfid.volid;
19296 
19297  pgptr = pgbuf_fix (thread_p, &vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
19298  if (pgptr == NULL)
19299  {
19300  return ER_FAILED;
19301  }
19302 
19303  (void) pgbuf_check_page_ptype (thread_p, pgptr, PAGE_BTREE);
19304 
19305  root_header = btree_get_root_header (thread_p, pgptr);
19306  if (root_header == NULL)
19307  {
19308  pgbuf_unfix_and_init (thread_p, pgptr);
19309  return ER_FAILED;
19310  }
19311 
19312  btid_int.sys_btid = btid;
19313  if (btree_glean_root_header_info (thread_p, root_header, &btid_int) != NO_ERROR)
19314  {
19315  pgbuf_unfix_and_init (thread_p, pgptr);
19316  return ER_FAILED;
19317  }
19318 
19319  pgbuf_unfix_and_init (thread_p, pgptr);
19320 
19321  if (BTREE_IS_UNIQUE (btid_int.unique_pk))
19322  {
19323  return NO_ERROR;
19324  }
19325 
19326  pgptr = btree_find_leftmost_leaf (thread_p, btid, &vpid, NULL);
19327  if (pgptr == NULL)
19328  {
19329  return ER_FAILED;
19330  }
19331 
19332  fixed_pages = 0;
19333  fprintf (stdout, "Index: %-50s %8d", btname, fixed_pages);
19334 
19335  /* traverse leaf page links */
19336 
19337  while (true)
19338  {
19339  if (btree_fix_ovfl_oid_page (thread_p, &btid_int, pgptr, btname) != NO_ERROR)
19340  {
19341  pgbuf_unfix_and_init (thread_p, pgptr);
19342  fprintf (stdout, "\n");
19343  return ER_FAILED;
19344  }
19345 
19346  header = btree_get_node_header (thread_p, pgptr);
19347  if (header == NULL)
19348  {
19349  pgbuf_unfix_and_init (thread_p, pgptr);
19350  fprintf (stdout, "\n");
19351  return ER_FAILED;
19352  }
19353 
19354  vpid = header->next_vpid;
19355 
19356  pgbuf_unfix_and_init (thread_p, pgptr);
19357 
19358  if (VPID_ISNULL (&vpid))
19359  {
19360  break;
19361  }
19362 
19363  pgptr = pgbuf_fix (thread_p, &vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
19364  if (pgptr == NULL)
19365  {
19366  fprintf (stdout, "\n");
19367  return ER_FAILED;
19368  }
19369 
19370  (void) pgbuf_check_page_ptype (thread_p, pgptr, PAGE_BTREE);
19371  }
19372 
19373  fprintf (stdout, "\n");
19374 
19375  return NO_ERROR;
19376 }
19377 
19378 static int
19379 btree_fix_ovfl_oid_page (THREAD_ENTRY * thread_p, BTID_INT * btid, PAGE_PTR pg_ptr, char *btname)
19380 {
19381  RECDES leaf_rec, ovfl_rec;
19382  int key_cnt, i, offset;
19383  LEAF_REC leaf_pnt;
19384  bool dummy;
19385  VPID ovfl_vpid;
19386  PAGE_PTR ovfl_page = NULL;
19387  char *rv_data = NULL;
19388  int rv_data_len;
19389  char rv_data_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
19390  BTREE_NODE_HEADER *header = NULL;
19391  int size = BTREE_OBJECT_FIXED_SIZE (btid);
19392 
19393  rv_data = PTR_ALIGN (rv_data_buf, BTREE_MAX_ALIGN);
19394 
19395  key_cnt = btree_node_number_of_keys (thread_p, pg_ptr);
19396 
19397  header = btree_get_node_header (thread_p, pg_ptr);
19398 
19399  assert_release (header != NULL);
19400  assert_release (header->node_level == 1); /* BTREE_LEAF_NODE */
19401 
19402  for (i = 1; i <= key_cnt; i++)
19403  {
19404  if (spage_get_record (thread_p, pg_ptr, i, &leaf_rec, PEEK) != S_SUCCESS)
19405  {
19406  return ER_FAILED;
19407  }
19408 
19409  VPID_SET_NULL (&leaf_pnt.ovfl);
19410  if (btree_read_record (thread_p, btid, pg_ptr, &leaf_rec, NULL, &leaf_pnt, BTREE_LEAF_NODE, &dummy, &offset,
19412  {
19413  return ER_FAILED;
19414  }
19415 
19416  ovfl_vpid = leaf_pnt.ovfl;
19417 
19418  while (!VPID_ISNULL (&ovfl_vpid))
19419  {
19420  ovfl_page = pgbuf_fix (thread_p, &ovfl_vpid, OLD_PAGE, PGBUF_LATCH_WRITE, PGBUF_UNCONDITIONAL_LATCH);
19421  if (ovfl_page == NULL)
19422  {
19423  return ER_FAILED;
19424  }
19425 
19426  (void) pgbuf_check_page_ptype (thread_p, ovfl_page, PAGE_BTREE);
19427 
19428  btree_get_next_overflow_vpid (thread_p, ovfl_page, &ovfl_vpid);
19429 
19430  if (spage_get_record (thread_p, ovfl_page, 1, &ovfl_rec, PEEK) != S_SUCCESS)
19431  {
19432  pgbuf_unfix_and_init (thread_p, ovfl_page);
19433  return ER_FAILED;
19434  }
19435 
19436  /* undo log only */
19437  btree_rv_write_log_record (rv_data, &rv_data_len, &ovfl_rec, BTREE_LEAF_NODE);
19438  log_append_undo_data2 (thread_p, RVBT_NDRECORD_UPD, &btid->sys_btid->vfid, ovfl_page, 1, rv_data_len,
19439  rv_data);
19440 
19441  qsort (ovfl_rec.data, CEIL_PTVDIV (ovfl_rec.length, size), size, btree_compare_oid);
19442 
19443  pgbuf_set_dirty (thread_p, ovfl_page, FREE);
19444 
19445  fprintf (stdout, "\rIndex: %-50s %8d", btname, ++fixed_pages);
19446  if (fixed_pages % 100 == 0)
19447  {
19448  fflush (stdout);
19449  }
19450  }
19451  }
19452 
19453  fflush (stdout);
19454  return NO_ERROR;
19455 }
19456 
19457 static int
19458 btree_compare_oid (const void *oid_mem1, const void *oid_mem2)
19459 {
19460  OID oid1, oid2;
19461 
19462  BTREE_GET_OID (oid_mem1, &oid1);
19464 
19465  BTREE_GET_OID (oid_mem2, &oid2);
19467 
19468  return oid_compare (&oid1, &oid2);
19469 }
19470 #endif /* MIGRATE_90BETA_TO_91 */
19471 
19472 #if !defined(NDEBUG)
19473 static int
19475 {
19476  int ret = NO_ERROR;
19477  int key_cnt;
19478  BTREE_NODE_HEADER *header = NULL;
19479  BTREE_NODE_TYPE node_type;
19480  bool check_interrupt = false;
19481 
19482  assert_release (btid_int != NULL);
19483  assert_release (page_ptr != NULL);
19484 
19485  /* check header validation */
19486 
19487  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
19488 
19489  header = btree_get_node_header (thread_p, page_ptr);
19490  if (header == NULL)
19491  {
19492  assert (false);
19493  return ER_FAILED;
19494  }
19495 
19496  if (key_cnt > 0)
19497  {
19498  assert (header->max_key_len > 0);
19499  }
19500 
19501  assert (header->split_info.pivot >= 0 && header->split_info.pivot <= 1);
19502  assert (header->split_info.index >= 0);
19503  assert (header->node_level > 0);
19504 
19505  assert (header->prev_vpid.volid >= NULL_VOLID);
19506  assert (header->prev_vpid.pageid >= NULL_PAGEID);
19507  assert (header->next_vpid.volid >= NULL_VOLID);
19508  assert (header->next_vpid.pageid >= NULL_PAGEID);
19509 
19510 #if 0 /* DO NOT DELETE ME */
19511  /*
19512  * FOR TEST
19513  * usually should admit below assertions.
19514  * but assert is possible in normal case rarely.
19515  * so, turn on this block in develop stage if you want.
19516  */
19517 
19518  assert (header->node_level < 20);
19519 
19520  assert (header->prev_vpid.volid < 1000);
19521  assert (header->prev_vpid.pageid < 1000000);
19522  assert (header->next_vpid.volid < 1000);
19523  assert (header->next_vpid.pageid < 1000000);
19524 #endif
19525 
19527  {
19528  return NO_ERROR;
19529  }
19530 
19531  /* don't let interrupts break our verification */
19532  check_interrupt = logtb_set_check_interrupt (thread_p, false);
19533 
19534  node_type = (header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
19535 
19536  if (node_type == BTREE_NON_LEAF_NODE)
19537  {
19538  ret = btree_verify_nonleaf_node (thread_p, btid_int, page_ptr);
19539  }
19540  else
19541  {
19542  ret = btree_verify_leaf_node (thread_p, btid_int, page_ptr);
19543  }
19544 
19545  assert_release (ret == NO_ERROR);
19546  (void) logtb_set_check_interrupt (thread_p, check_interrupt);
19547 
19548  return ret;
19549 }
19550 
19551 static int
19553 {
19554  BTREE_NODE_HEADER *header = NULL;
19555  TP_DOMAIN *key_domain;
19556  int key_cnt;
19557  int i;
19558  int offset;
19559  int c;
19560  bool clear_prev_key, clear_curr_key;
19561  DB_VALUE prev_key, curr_key;
19562  RECDES rec;
19563  NON_LEAF_REC non_leaf_pnt;
19564  int error = NO_ERROR;
19565 
19566  assert_release (btid_int != NULL);
19567  assert_release (page_ptr != NULL);
19568 
19569  key_domain = btid_int->key_type;
19570 
19571  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
19572  assert_release (key_cnt >= 1);
19573 
19574  btree_init_temp_key_value (&clear_prev_key, &prev_key);
19575  btree_init_temp_key_value (&clear_curr_key, &curr_key);
19576 
19577  /* check key order; exclude neg-inf separator */
19578  for (i = 1; i < key_cnt; i++)
19579  {
19580  if (spage_get_record (thread_p, page_ptr, i, &rec, PEEK) != S_SUCCESS)
19581  {
19582  assert (false);
19583  return ER_FAILED;
19584  }
19585 
19586  error =
19587  btree_read_record_without_decompression (thread_p, btid_int, &rec, &prev_key, &non_leaf_pnt,
19588  BTREE_NON_LEAF_NODE, &clear_prev_key, &offset, PEEK_KEY_VALUE);
19589  if (error != NO_ERROR)
19590  {
19591  assert (false);
19592  return error;
19593  }
19594 
19595  if (spage_get_record (thread_p, page_ptr, i + 1, &rec, PEEK) != S_SUCCESS)
19596  {
19597  assert (false);
19598  btree_clear_key_value (&clear_prev_key, &prev_key);
19599  return ER_FAILED;
19600  }
19601 
19602  error =
19603  btree_read_record_without_decompression (thread_p, btid_int, &rec, &curr_key, &non_leaf_pnt,
19604  BTREE_NON_LEAF_NODE, &clear_curr_key, &offset, PEEK_KEY_VALUE);
19605  if (error != NO_ERROR)
19606  {
19607  assert (false);
19608  btree_clear_key_value (&clear_prev_key, &prev_key);
19609  return error;
19610  }
19611 
19612  c = btree_compare_key (&prev_key, &curr_key, btid_int->key_type, 1, 1, NULL);
19613 
19614  btree_clear_key_value (&clear_curr_key, &curr_key);
19615  btree_clear_key_value (&clear_prev_key, &prev_key);
19616 
19617  if (c != DB_LT)
19618  {
19619  if (i == 1)
19620  {
19621  header = btree_get_node_header (thread_p, page_ptr);
19622  if (header == NULL)
19623  {
19624  return ER_FAILED;
19625  }
19626 
19627  if (VPID_ISNULL (&(header->next_vpid)))
19628  {
19629  /* This page is first non-leaf page. So, this key is neg-inf dummy key */
19630 
19631  assert (header->next_vpid.volid == NULL_VOLID);
19632 
19633  return NO_ERROR;
19634  }
19635  }
19636 
19637  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19638  assert (false);
19639  return ER_FAILED;
19640  }
19641  }
19642 
19643  return NO_ERROR;
19644 }
19645 
19646 static int
19648 {
19649  BTREE_NODE_HEADER *header = NULL;
19650  TP_DOMAIN *key_domain;
19651  VPID prev_vpid, next_vpid;
19652  int key_cnt, offset, oid_cnt;
19653  int i, k, c;
19654  bool clear_prev_key, clear_curr_key;
19655  DB_VALUE prev_key, curr_key;
19656  RECDES rec;
19657  LEAF_REC leaf_pnt;
19658  OID oid, class_oid;
19659  OR_BUF buf;
19660  int oid_size;
19661  short mvcc_flags;
19662  int error = NO_ERROR;
19664  int common_prefix = 0;
19665  DB_VALUE lower_fence_key;
19666  bool clear_lower_fence_key = false;
19667  DB_VALUE uncompressed_value;
19668 
19669  assert_release (btid_int != NULL);
19670  assert_release (page_ptr != NULL);
19671 
19672  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
19673  {
19674  oid_size = (2 * OR_OID_SIZE);
19675  }
19676  else
19677  {
19678  oid_size = OR_OID_SIZE;
19679  }
19680 
19681  clear_prev_key = clear_curr_key = false;
19682 
19683  key_domain = btid_int->key_type;
19684 
19685  key_cnt = btree_node_number_of_keys (thread_p, page_ptr);
19686 
19687  /* read the header record */
19688  header = btree_get_node_header (thread_p, page_ptr);
19689  if (header == NULL)
19690  {
19691  assert (false);
19692  goto exit_on_error;
19693  }
19694 
19695  prev_vpid = header->prev_vpid;
19696  next_vpid = header->next_vpid;
19697 
19698  btree_init_temp_key_value (&clear_curr_key, &curr_key);
19699  btree_init_temp_key_value (&clear_prev_key, &prev_key);
19700  btree_init_temp_key_value (&clear_lower_fence_key, &lower_fence_key);
19701  db_make_null (&uncompressed_value);
19702 
19703  common_prefix = btree_node_common_prefix (thread_p, btid_int, page_ptr);
19704  if (common_prefix > 0)
19705  {
19706  assert (btree_is_fence_key (page_ptr, 1));
19707  if (spage_get_record (thread_p, page_ptr, 1, &rec, PEEK) != S_SUCCESS)
19708  {
19709  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19710  assert (false);
19711  goto exit_on_error;
19712  }
19713  error =
19714  btree_read_record_without_decompression (thread_p, btid_int, &rec, &lower_fence_key, &leaf_pnt, BTREE_LEAF_NODE,
19715  &clear_lower_fence_key, &offset, PEEK_KEY_VALUE);
19716  if (error != NO_ERROR)
19717  {
19718  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19719  assert (false);
19720  goto exit_on_error;
19721  }
19722  }
19723  /* There must be two fences to have common prefix. */
19724  assert (common_prefix == 0 || (btree_is_fence_key (page_ptr, 1) && btree_is_fence_key (page_ptr, key_cnt)));
19725 
19726  /* check key order */
19727  for (i = 1; i < key_cnt; i++)
19728  {
19729  if (spage_get_record (thread_p, page_ptr, i, &rec, PEEK) != S_SUCCESS)
19730  {
19731  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19732  assert (false);
19733  goto exit_on_error;
19734  }
19735 
19736  error =
19737  btree_read_record_without_decompression (thread_p, btid_int, &rec, &prev_key, &leaf_pnt, BTREE_LEAF_NODE,
19738  &clear_prev_key, &offset, PEEK_KEY_VALUE);
19739  if (error != NO_ERROR)
19740  {
19741  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19742  assert (false);
19743  goto exit_on_error;
19744  }
19745 
19746  /*
19747  * record oid check
19748  */
19749  oid_cnt = btree_record_get_num_oids (thread_p, btid_int, &rec, offset, BTREE_LEAF_NODE);
19750 
19751  (void) btree_leaf_get_first_object (btid_int, &rec, &oid, &class_oid, &mvcc_info);
19753  {
19754  if (oid.pageid != NULL_PAGEID || oid.volid != 0 || oid.slotid != 0)
19755  {
19756  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19757  assert (false);
19758  }
19759  if (i > 1 && i < key_cnt)
19760  {
19761  /* Fence key cannot be in the middle of the node. */
19762  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19763  assert (false);
19764  }
19765  if (i > 1 && VPID_ISNULL (&header->next_vpid))
19766  {
19767  /* Fence key cannot be at the end of index (unless it is the only record in page). */
19768  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19769  assert (false);
19770  }
19771  if (i < key_cnt && VPID_ISNULL (&header->prev_vpid))
19772  {
19773  /* Fence key cannot be at the beginning of index (unless it is the only record in page). */
19774  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19775  assert (false);
19776  }
19777  }
19778  else
19779  {
19780  if (oid.pageid <= NULL_PAGEID || oid.volid <= NULL_VOLID || oid.slotid <= NULL_SLOTID)
19781  {
19782  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19783  assert (false);
19784  }
19785 
19786  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
19787  {
19788  if (class_oid.pageid <= NULL_PAGEID || class_oid.volid <= NULL_VOLID || class_oid.slotid <= NULL_SLOTID)
19789  {
19790  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19791  assert (false);
19792  }
19793  }
19794 
19795  if (common_prefix > 0)
19796  {
19797  /* Check uncompress works. */
19798  error = pr_midxkey_add_prefix (&uncompressed_value, &lower_fence_key, &prev_key, common_prefix);
19799  pr_clear_value (&uncompressed_value);
19800  if (error != NO_ERROR)
19801  {
19802  assert (false);
19803  goto exit_on_error;
19804  }
19805  }
19806  }
19807 
19808  or_init (&buf, rec.data + offset, rec.length - offset);
19809  {
19810  if ((rec.length - offset) == 4)
19811  {
19812  int key_len = btree_get_disk_size_of_key (&prev_key);
19813  printf ("## key_len: %d, offset: %d, reclen: %d\n", key_len, offset, rec.length);
19814  db_value_print (&prev_key);
19815  printf ("\n");
19816  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19817  assert (false);
19818  }
19819 
19820  for (k = 1; k < oid_cnt; k++)
19821  {
19822  mvcc_flags = btree_record_object_get_mvcc_flags (buf.ptr);
19823  or_get_oid (&buf, &oid);
19824  oid.volid = oid.volid & ~BTREE_OID_MVCC_FLAGS_MASK;
19825 
19826  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
19827  {
19828  or_get_oid (&buf, &class_oid);
19829  }
19830  buf.ptr += BTREE_GET_MVCC_INFO_SIZE_FROM_FLAGS (mvcc_flags);
19831 
19832  if (oid.pageid <= NULL_PAGEID && oid.volid <= NULL_VOLID && oid.slotid <= NULL_SLOTID)
19833  {
19834  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19835  assert (false);
19836  }
19837  }
19838  }
19839 
19840  /*
19841  * key order check
19842  */
19844  {
19845  continue;
19846  }
19847 
19848  if (spage_get_record (thread_p, page_ptr, i + 1, &rec, PEEK) != S_SUCCESS)
19849  {
19850  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19851  assert (false);
19852  btree_clear_key_value (&clear_prev_key, &prev_key);
19853  goto exit_on_error;
19854  }
19855 
19857  {
19858  btree_clear_key_value (&clear_prev_key, &prev_key);
19859  continue;
19860  }
19861 
19862  error =
19863  btree_read_record_without_decompression (thread_p, btid_int, &rec, &curr_key, &leaf_pnt, BTREE_LEAF_NODE,
19864  &clear_curr_key, &offset, PEEK_KEY_VALUE);
19865  if (error != NO_ERROR)
19866  {
19867  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19868  assert (false);
19869  goto exit_on_error;
19870  }
19871 
19872  c = btree_compare_key (&prev_key, &curr_key, btid_int->key_type, 1, 1, NULL);
19873 
19874  btree_clear_key_value (&clear_curr_key, &curr_key);
19875  btree_clear_key_value (&clear_prev_key, &prev_key);
19876 
19877  if (c != DB_LT)
19878  {
19879  btree_dump_page (thread_p, stdout, NULL, btid_int, NULL, page_ptr, NULL, 2, 2);
19880  assert (false);
19881  goto exit_on_error;
19882  }
19883  }
19884 
19885  return NO_ERROR;
19886 
19887 exit_on_error:
19888  btree_clear_key_value (&clear_curr_key, &curr_key);
19889  btree_clear_key_value (&clear_prev_key, &prev_key);
19890  btree_clear_key_value (&clear_lower_fence_key, &lower_fence_key);
19891 
19892  return error == NO_ERROR ? ER_FAILED : error;
19893 }
19894 #endif
19895 
19896 /*
19897  * btree_ils_adjust_range () - Adjust scanning range for loose index scan.
19898  *
19899  * return : Error code.
19900  * thread_p (in) : Thread entry.
19901  * bts (in/out) : B-tree scan.
19902  */
19903 static int
19905 {
19906  DB_VALUE new_key, *new_key_dbvals, *target_key;
19907  TP_DOMAIN *dom;
19908  DB_MIDXKEY midxkey;
19909  RANGE old_range;
19910  bool swap_ranges = false;
19911  int i;
19913  DB_VALUE *curr_key = NULL;
19914  int prefix_len = 0;
19915  bool use_desc_index, part_key_desc;
19916 
19917  /* Assert expected arguments. */
19918  assert (bts != NULL);
19919 
19920  key_range = &bts->index_scan_idp->key_vals[bts->index_scan_idp->curr_keyno];
19921  curr_key = &bts->cur_key;
19922  prefix_len = bts->index_scan_idp->indx_info->ils_prefix_len;
19923  use_desc_index = bts->use_desc_index;
19924  part_key_desc = BTREE_IS_PART_KEY_DESC (&bts->btid_int);
19925 
19926  /* check environment */
19927  if (DB_VALUE_DOMAIN_TYPE (curr_key) != DB_TYPE_MIDXKEY)
19928  {
19929  assert_release (false);
19931 
19932  return ER_FAILED;
19933  }
19934 
19935  /* fetch target key */
19936  if (use_desc_index)
19937  {
19938  if (!part_key_desc)
19939  {
19940  swap_ranges = true;
19941  }
19942  }
19943  else
19944  {
19945  if (part_key_desc)
19946  {
19947  swap_ranges = true;
19948  }
19949  }
19950 
19951  if (swap_ranges)
19952  {
19953  /* descending index scan, we adjust upper bound */
19954  target_key = &key_range->key2;
19955  }
19956  else
19957  {
19958  /* ascending index scan, we adjust lower bound */
19959  target_key = &key_range->key1;
19960  }
19961 
19962  /* allocate key buffer */
19963  new_key_dbvals = (DB_VALUE *) db_private_alloc (thread_p, curr_key->data.midxkey.ncolumns * sizeof (DB_VALUE));
19964  if (new_key_dbvals == NULL)
19965  {
19967  curr_key->data.midxkey.ncolumns * sizeof (DB_VALUE));
19968  return ER_FAILED;
19969  }
19970 
19971  /* determine target key and adjust range */
19972  old_range = key_range->range;
19973  switch (key_range->range)
19974  {
19975  case INF_INF:
19976  if (swap_ranges)
19977  {
19978  key_range->range = INF_LT; /* (INF, INF) => (INF, ?) */
19979  }
19980  else
19981  {
19982  key_range->range = GT_INF; /* (INF, INF) => (?, INF) */
19983  }
19984  break;
19985 
19986  case INF_LE:
19987  if (swap_ranges)
19988  {
19989  key_range->range = INF_LT; /* (INF, ?] => (INF, ?) */
19990  }
19991  else
19992  {
19993  key_range->range = GT_LE; /* (INF, ?] => (?, ?] */
19994  }
19995  break;
19996 
19997  case INF_LT:
19998  if (swap_ranges)
19999  {
20000  /* range remains unchanged */
20001  }
20002  else
20003  {
20004  key_range->range = GT_LT; /* (INF, ?) => (?, ?) */
20005  }
20006  break;
20007 
20008  case GE_LE:
20009  if (swap_ranges)
20010  {
20011  key_range->range = GE_LT; /* [?, ?] => [?, ?) */
20012  }
20013  else
20014  {
20015  key_range->range = GT_LE; /* [?, ?] => (?, ?] */
20016  }
20017  break;
20018 
20019  case GE_LT:
20020  if (swap_ranges)
20021  {
20022  /* range remains unchanged */
20023  }
20024  else
20025  {
20026  key_range->range = GT_LT; /* [?, ?) => (?, ?) */
20027  }
20028  break;
20029 
20030  case GE_INF:
20031  if (swap_ranges)
20032  {
20033  key_range->range = GE_LT; /* [?, INF) => [?, ?) */
20034  }
20035  else
20036  {
20037  key_range->range = GT_INF; /* [?, INF) => (?, INF) */
20038  }
20039  break;
20040 
20041  case GT_LE:
20042  if (swap_ranges)
20043  {
20044  key_range->range = GT_LT; /* (?, ?] => (?, ?) */
20045  }
20046  else
20047  {
20048  /* range remains unchanged */
20049  }
20050  break;
20051 
20052  case GT_LT:
20053  /* range remains unchanged */
20054  break;
20055 
20056  case GT_INF:
20057  if (swap_ranges)
20058  {
20059  key_range->range = GT_LT; /* (?, INF) => (?, ?) */
20060  }
20061  else
20062  {
20063  /* range remains unchanged */
20064  }
20065  break;
20066 
20067  default:
20068  assert_release (false); /* should not happen */
20069  break;
20070  }
20071 
20072  /* copy prefix of current key into target key */
20073  for (i = 0; i < prefix_len; i++)
20074  {
20075  pr_midxkey_get_element_nocopy (&curr_key->data.midxkey, i, &new_key_dbvals[i], NULL, NULL);
20076  }
20077 
20078  /* build suffix */
20079 
20080  dom = curr_key->data.midxkey.domain->setdomain;
20081 
20082  /* get to domain */
20083  for (i = 0; i < prefix_len; i++)
20084  {
20085  dom = dom->next;
20086  }
20087 
20088  /* set maximum suffix (min_max_val), the minimum is NULL */
20089  if ((prefix_len < curr_key->data.midxkey.ncolumns)
20090  && ((dom->is_desc && use_desc_index) || (!dom->is_desc && !use_desc_index)))
20091  {
20092  midxkey.min_max_val.position = prefix_len;
20093  midxkey.min_max_val.type = MAX_COLUMN;
20094  }
20095  else
20096  {
20097  midxkey.min_max_val.position = -1;
20098  }
20099 
20100  for (i = prefix_len; i < curr_key->data.midxkey.ncolumns; i++)
20101  {
20102  db_make_null (&new_key_dbvals[i]);
20103  }
20104 
20105  /* build midxkey */
20106  midxkey.buf = NULL;
20107  midxkey.domain = curr_key->data.midxkey.domain;
20108  midxkey.ncolumns = 0;
20109  midxkey.size = 0;
20110  db_make_midxkey (&new_key, &midxkey);
20111  new_key.need_clear = true;
20112  pr_midxkey_add_elements (&new_key, new_key_dbvals, curr_key->data.midxkey.ncolumns,
20113  curr_key->data.midxkey.domain->setdomain);
20114 
20115 #if !defined(NDEBUG)
20116  if (DB_IS_NULL (target_key))
20117  {
20118  assert (!DB_IS_NULL (&new_key));
20119  }
20120  else if (old_range == key_range->range)
20121  {
20122  int cmp_res;
20123 
20124  /* range did not modify, check if we're advancing */
20125  cmp_res = btree_compare_key (target_key, &new_key, midxkey.domain, 1, 1, NULL);
20126  if (use_desc_index)
20127  {
20128  assert (cmp_res == DB_GT);
20129  }
20130  else
20131  {
20132  assert (cmp_res == DB_LT);
20133  }
20134  }
20135 #endif
20136 
20137  /* register key in range */
20138  pr_clear_value (target_key);
20139  pr_clone_value (&new_key, target_key);
20140  pr_clear_value (&new_key);
20141 
20142  for (i = 0; i < prefix_len; i++)
20143  {
20144  pr_clear_value (&new_key_dbvals[i]); /* it might be alloced/copied */
20145  }
20146  db_private_free (thread_p, new_key_dbvals);
20147 
20148  /* all ok */
20149  return btree_scan_update_range (thread_p, bts, key_range);
20150 }
20151 
20152 /*
20153  * btree_get_next_node_info () - Scans b-tree node by node and obtains info.
20154  *
20155  * return : Scan code.
20156  * thread_p (in) : Thread entry.
20157  * btid (in) : B-tree identifier.
20158  * btns (in) : B-tree node scan data.
20159  * node_info (in) : Array of value pointers to store b-tree node information.
20160  */
20161 SCAN_CODE
20162 btree_get_next_node_info (THREAD_ENTRY * thread_p, BTID * btid, BTREE_NODE_SCAN * btns, DB_VALUE ** node_info)
20163 {
20164  RECDES rec;
20165  SCAN_CODE result;
20166  BTREE_NODE_HEADER *node_header;
20167  BTREE_NODE_TYPE node_type;
20168  BTREE_NODE_SCAN_QUEUE_ITEM *new_item = NULL, *crt_item = NULL;
20169  int key_cnt, i;
20170  NON_LEAF_REC nleaf;
20171  LEAF_REC leaf_pnt;
20172  void *rec_header = NULL;
20173  DB_VALUE key_value;
20174  bool clear_key = false;
20175  int dummy;
20176 
20177  assert (btns->crt_page == NULL);
20178 
20179  if (BTREE_NODE_SCAN_IS_QUEUE_EMPTY (btns))
20180  {
20181  if (!btns->first_call)
20182  {
20183  /* Finished scanning for b-tree pages */
20184  result = S_END;
20185  goto end;
20186  }
20187 
20188  /* First call */
20189 
20190  /* Add root page to queue */
20191  new_item = (BTREE_NODE_SCAN_QUEUE_ITEM *) malloc (sizeof (BTREE_NODE_SCAN_QUEUE_ITEM));
20192  if (new_item == NULL)
20193  {
20195  goto error;
20196  }
20197  new_item->crt_vpid.pageid = btid->root_pageid;
20198  new_item->crt_vpid.volid = btid->vfid.volid;
20199  new_item->next = NULL;
20200  BTREE_NODE_SCAN_ADD_PAGE_TO_QUEUE (btns, new_item);
20201 
20202  btns->first_call = false;
20203  }
20204 
20205  BTREE_NODE_SCAN_POP_PAGE_FROM_QUEUE (btns, crt_item);
20206  btns->crt_vpid = crt_item->crt_vpid;
20208  if (btns->crt_page == NULL)
20209  {
20210  goto error;
20211  }
20212 
20213  node_header = btree_get_node_header (thread_p, btns->crt_page);
20214  node_type = (node_header->node_level > 1) ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
20215  key_cnt = btree_node_number_of_keys (thread_p, btns->crt_page);
20216 
20217  rec_header = (node_type == BTREE_NON_LEAF_NODE) ? (void *) &nleaf : (void *) &leaf_pnt;
20218 
20219  if (node_type == BTREE_NON_LEAF_NODE)
20220  {
20221  /* Add children to queue */
20222  for (i = 1; i <= key_cnt; i++)
20223  {
20224  if (spage_get_record (thread_p, btns->crt_page, i, &rec, PEEK) != S_SUCCESS)
20225  {
20226  goto error;
20227  }
20229  new_item = (BTREE_NODE_SCAN_QUEUE_ITEM *) malloc (sizeof (BTREE_NODE_SCAN_QUEUE_ITEM));
20230  if (new_item == NULL)
20231  {
20233  sizeof (BTREE_NODE_SCAN_QUEUE_ITEM));
20234  goto error;
20235  }
20236  new_item->crt_vpid.pageid = nleaf.pnt.pageid;
20237  new_item->crt_vpid.volid = nleaf.pnt.volid;
20238  new_item->next = NULL;
20239  BTREE_NODE_SCAN_ADD_PAGE_TO_QUEUE (btns, new_item);
20240  }
20241  }
20242 
20243  /* Get b-tree page info */
20244 
20245  /* Get volume id and page id */
20246  db_make_int (node_info[BTREE_NODE_INFO_VOLUMEID], btns->crt_vpid.volid);
20247  db_make_int (node_info[BTREE_NODE_INFO_PAGEID], btns->crt_vpid.pageid);
20248 
20249  /* Get node type */
20251  db_make_string (node_info[BTREE_NODE_INFO_NODE_TYPE], (node_type == BTREE_NON_LEAF_NODE) ? "non-leaf" : "leaf");
20252 
20253  /* Get key count */
20254  db_make_int (node_info[BTREE_NODE_INFO_KEY_COUNT], key_cnt);
20255 
20256  if (key_cnt > 0)
20257  {
20258  btree_init_temp_key_value (&clear_key, &key_value);
20259 
20260  /* Get first key */
20261  if (spage_get_record (thread_p, btns->crt_page, 1, &rec, PEEK) != S_SUCCESS)
20262  {
20263  goto error;
20264  }
20265  if (btree_read_record (thread_p, &btns->btid_int, btns->crt_page, &rec, &key_value, rec_header, node_type,
20266  &clear_key, &dummy, PEEK_KEY_VALUE, NULL) != NO_ERROR)
20267  {
20268  goto error;
20269  }
20270 
20272  pr_clone_value (&key_value, node_info[BTREE_NODE_INFO_FIRST_KEY]);
20273  btree_clear_key_value (&clear_key, &key_value);
20274 
20275  /* Get last key */
20276  if (spage_get_record (thread_p, btns->crt_page, key_cnt, &rec, PEEK) != S_SUCCESS)
20277  {
20278  goto error;
20279  }
20280  if (btree_read_record (thread_p, &btns->btid_int, btns->crt_page, &rec, &key_value, rec_header, node_type,
20281  &clear_key, &dummy, PEEK_KEY_VALUE, NULL) != NO_ERROR)
20282  {
20283  goto error;
20284  }
20285 
20287  pr_clone_value (&key_value, node_info[BTREE_NODE_INFO_LAST_KEY]);
20288  btree_clear_key_value (&clear_key, &key_value);
20289  }
20290  else
20291  {
20292  /* Empty node */
20294  db_make_null (node_info[BTREE_NODE_INFO_FIRST_KEY]);
20295 
20297  db_make_null (node_info[BTREE_NODE_INFO_LAST_KEY]);
20298  }
20299 
20300  result = S_SUCCESS;
20301 
20302 end:
20303  if (btns->crt_page != NULL)
20304  {
20305  pgbuf_unfix_and_init (thread_p, btns->crt_page);
20306  }
20307 
20308  if (crt_item != NULL)
20309  {
20310  free_and_init (crt_item);
20311  }
20312  return result;
20313 
20314 error:
20315  result = S_ERROR;
20316  goto end;
20317 }
20318 
20319 static const char *
20320 node_type_to_string (short node_type)
20321 {
20322  return (node_type == BTREE_LEAF_NODE) ? "LEAF" : "NON_LEAF";
20323 }
20324 
20325 /*
20326  * key_type_to_string () - convert key_type to string
20327  * return: the converted string
20328  *
20329  * buf(in/out):
20330  * buf_size(in):
20331  * key_type(in):
20332  */
20333 static char *
20334 key_type_to_string (char *buf, int buf_size, TP_DOMAIN * key_type)
20335 {
20336  int n, remain_size;
20337  char *buf_p = NULL;
20338  TP_DOMAIN *elem = NULL;
20339  const char *format = NULL;
20340  char temp_buf[256] = { 0 };
20341 
20342  assert (key_type != NULL);
20343 
20344  switch (TP_DOMAIN_TYPE (key_type))
20345  {
20346  case DB_TYPE_INTEGER:
20347  case DB_TYPE_FLOAT:
20348  case DB_TYPE_DOUBLE:
20349  case DB_TYPE_OBJECT:
20350  case DB_TYPE_TIME:
20351  case DB_TYPE_TIMESTAMP:
20352  case DB_TYPE_TIMESTAMPTZ:
20353  case DB_TYPE_TIMESTAMPLTZ:
20354  case DB_TYPE_DATETIME:
20355  case DB_TYPE_DATETIMETZ:
20356  case DB_TYPE_DATETIMELTZ:
20357  case DB_TYPE_DATE:
20358  case DB_TYPE_MONETARY:
20359  case DB_TYPE_SHORT:
20360  case DB_TYPE_BIGINT:
20361  case DB_TYPE_OID:
20362  case DB_TYPE_ENUMERATION:
20363  snprintf (buf, buf_size, "%s", pr_type_name (TP_DOMAIN_TYPE (key_type)));
20364  break;
20365 
20366  case DB_TYPE_BIT:
20367  case DB_TYPE_VARBIT:
20368  case DB_TYPE_CHAR:
20369  case DB_TYPE_NCHAR:
20370  case DB_TYPE_VARCHAR:
20371  case DB_TYPE_VARNCHAR:
20372  snprintf (buf, buf_size, "%s(%d)", pr_type_name (TP_DOMAIN_TYPE (key_type)), key_type->precision);
20373  break;
20374 
20375  case DB_TYPE_NUMERIC:
20376  snprintf (buf, buf_size, "%s(%d,%d)", pr_type_name (TP_DOMAIN_TYPE (key_type)), key_type->precision,
20377  key_type->scale);
20378  break;
20379 
20380  case DB_TYPE_MIDXKEY:
20381  n = snprintf (buf, buf_size, "%s(", pr_type_name (TP_DOMAIN_TYPE (key_type)));
20382  buf_p = buf + n;
20383  remain_size = buf_size - n - 1; /* reserve 1 byte for ')' */
20384 
20385  assert_release (remain_size > 0);
20386 
20387  for (elem = key_type->setdomain; elem != NULL; elem = elem->next)
20388  {
20389  format = (elem == key_type->setdomain) ? "%s" : ",%s";
20390  n = snprintf (buf_p, remain_size, format, key_type_to_string (temp_buf, sizeof (temp_buf), elem));
20391 
20392  if (n >= remain_size) /* The buffer has not enough space */
20393  {
20394  strcpy (buf_p + remain_size - sizeof ("..."), "...");
20395  buf_p += remain_size - 1;
20396  break;
20397  }
20398  else
20399  {
20400  buf_p += n;
20401  remain_size -= n;
20402  }
20403  }
20404 
20405  *buf_p = ')';
20406  break;
20407 
20408  default:
20409  /* It is invalid index type? */
20410  assert (!tp_valid_indextype (TP_DOMAIN_TYPE (key_type)));
20411 
20412  buf[0] = '\0';
20413  break;
20414  }
20415 
20416  buf[buf_size - 1] = 0;
20417 
20418  return buf;
20419 }
20420 
20421 /*
20422  * index_attrs_to_string () - convert the attributes info of index to string
20423  * return: NO_ERROR, or ER_code
20424  *
20425  * buf(in/out):
20426  * buf_size(in):
20427  * index_p(in):
20428  * recdes(in):
20429  */
20430 static int
20431 index_attrs_to_string (char *buf, int buf_size, OR_INDEX * index_p, RECDES * recdes)
20432 {
20433  int i, n, remain_size;
20434  char *buf_p = NULL;
20435  char *attr_name;
20436  char format[20];
20437  int error = NO_ERROR;
20438  int alloced_string = 0;
20439  char *string = NULL;
20440 
20441  buf_p = buf;
20442  remain_size = buf_size;
20443 
20444  for (i = 0; i < index_p->n_atts; i++)
20445  {
20446  bool set_break = false;
20447  alloced_string = 0;
20448  string = NULL;
20449 
20450  error = or_get_attrname (recdes, index_p->atts[i]->id, &string, &alloced_string);
20451  if (error != NO_ERROR)
20452  {
20453  set_break = true;
20454  goto clean_string;
20455  }
20456  attr_name = string;
20457 
20458  if (attr_name == NULL)
20459  {
20460  error = ER_FAILED;
20461  set_break = true;
20462  goto clean_string;
20463  }
20464 
20465  format[0] = '\0';
20466  if (strchr (attr_name, ',') != NULL || strchr (attr_name, ' ') != NULL)
20467  {
20468  strcpy (format, (i == 0) ? "[%s]" : ",[%s]");
20469  }
20470  else
20471  {
20472  strcpy (format, (i == 0) ? "%s" : ",%s");
20473  }
20474 
20475  /* Show nothing for default order(ascending), show DESC for descending */
20476  if (index_p->asc_desc[i] != 0)
20477  {
20478  strcat (format, " DESC");
20479  }
20480 
20481  n = snprintf (buf_p, remain_size, format, attr_name);
20482 
20483  clean_string:
20484  if (string != NULL && alloced_string == 1)
20485  {
20486  db_private_free_and_init (NULL, string);
20487  }
20488 
20489  if (set_break == true)
20490  {
20491  break;
20492  }
20493 
20494  if (n >= remain_size) /* The buffer has not enough space */
20495  {
20496  assert_release (buf_size >= (int) sizeof ("..."));
20497  strcpy (buf + buf_size - sizeof ("..."), "...");
20498  break;
20499  }
20500  else
20501  {
20502  buf_p += n;
20503  remain_size -= n;
20504  }
20505  }
20506 
20507  buf[buf_size - 1] = 0;
20508 
20509  return error;
20510 }
20511 
20512 /*
20513  * btree_index_start_scan () - start scan function for show index header/capacity
20514  * return: NO_ERROR, or ER_code
20515  *
20516  * thread_p(in):
20517  * show_type(in):
20518  * arg_values(in):
20519  * arg_cnt(in):
20520  * ptr(in/out): index header/capacity context
20521  */
20522 int
20523 btree_index_start_scan (THREAD_ENTRY * thread_p, int show_type, DB_VALUE ** arg_values, int arg_cnt, void **ptr)
20524 {
20525  int i, error = NO_ERROR;
20526  OID oid;
20527  OR_CLASSREP *classrep = NULL;
20528  int idx_in_cache = -1;
20529  SHOW_INDEX_SCAN_CTX *ctx = NULL;
20530  LC_FIND_CLASSNAME status;
20531  OR_PARTITION *parts = NULL;
20532  int parts_count = 0;
20533  DB_CLASS_PARTITION_TYPE partition_type;
20534  const char *class_name = NULL;
20535 
20536  *ptr = NULL;
20537  ctx = (SHOW_INDEX_SCAN_CTX *) db_private_alloc (thread_p, sizeof (SHOW_INDEX_SCAN_CTX));
20538  if (ctx == NULL)
20539  {
20540  ASSERT_ERROR_AND_SET (error);
20541  goto cleanup;
20542  }
20543  memset (ctx, 0, sizeof (SHOW_INDEX_SCAN_CTX));
20544 
20545  ctx->show_type = show_type;
20546  ctx->is_all = (show_type == SHOWSTMT_ALL_INDEXES_HEADER || show_type == SHOWSTMT_ALL_INDEXES_CAPACITY);
20547 
20548  class_name = db_get_string (arg_values[0]);
20549 
20550  // if you want consitent results, S_LOCK is required.
20551  status = xlocator_find_class_oid (thread_p, class_name, &oid, ctx->is_all ? S_LOCK : SCH_S_LOCK);
20552  if (status == LC_CLASSNAME_ERROR || status == LC_CLASSNAME_DELETED)
20553  {
20554  error = ER_LC_UNKNOWN_CLASSNAME;
20555  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, error, 1, class_name);
20556  goto cleanup;
20557  }
20558 
20559  classrep = heap_classrepr_get (thread_p, &oid, NULL, NULL_REPRID, &idx_in_cache);
20560  if (classrep == NULL)
20561  {
20562  ASSERT_ERROR_AND_SET (error);
20563  goto cleanup;
20564  }
20565 
20566  if (ctx->is_all)
20567  {
20568  assert (arg_cnt == 2);
20569 
20570  partition_type = (DB_CLASS_PARTITION_TYPE) db_get_int (arg_values[1]);
20571  ctx->indexes_count = classrep->n_indexes;
20572  }
20573  else
20574  {
20575  assert (arg_cnt == 3);
20576 
20577  /* get index name which user specified */
20578  ctx->index_name = db_private_strdup (thread_p, db_get_string (arg_values[1]));
20579  if (ctx->index_name == NULL)
20580  {
20581  ASSERT_ERROR_AND_SET (error);
20582  goto cleanup;
20583  }
20584 
20585  partition_type = (DB_CLASS_PARTITION_TYPE) db_get_int (arg_values[2]);
20586  ctx->indexes_count = 1;
20587  }
20588 
20589  /* save oids to context so that we can get btree info when scan next */
20590  if (partition_type == DB_PARTITIONED_CLASS)
20591  {
20592  error = heap_get_class_partitions (thread_p, &oid, &parts, &parts_count);
20593  if (error != NO_ERROR)
20594  {
20595  goto cleanup;
20596  }
20597 
20598  ctx->class_oids = (OID *) db_private_alloc (thread_p, sizeof (OID) * parts_count);
20599  if (ctx->class_oids == NULL)
20600  {
20601  ASSERT_ERROR_AND_SET (error);
20602  goto cleanup;
20603  }
20604 
20605  for (i = 0; i < parts_count; i++)
20606  {
20607  COPY_OID (&ctx->class_oids[i], &parts[i].class_oid);
20608  }
20609 
20610  ctx->class_oid_count = parts_count;
20611  }
20612  else
20613  {
20614  ctx->class_oids = (OID *) db_private_alloc (thread_p, sizeof (OID));
20615  if (ctx->class_oids == NULL)
20616  {
20617  ASSERT_ERROR_AND_SET (error);
20618  goto cleanup;
20619  }
20620 
20621  COPY_OID (&ctx->class_oids[0], &oid);
20622  ctx->class_oid_count = 1;
20623  }
20624 
20625  *ptr = ctx;
20626  ctx = NULL;
20627 
20628 cleanup:
20629 
20630  if (classrep != NULL)
20631  {
20632  heap_classrepr_free_and_init (classrep, &idx_in_cache);
20633  }
20634 
20635  if (parts != NULL)
20636  {
20637  heap_clear_partition_info (thread_p, parts, parts_count);
20638  }
20639 
20640  if (ctx != NULL)
20641  {
20642  if (ctx->index_name != NULL)
20643  {
20644  db_private_free_and_init (thread_p, ctx->index_name);
20645  }
20646 
20647  if (ctx->class_oids != NULL)
20648  {
20649  db_private_free_and_init (thread_p, ctx->class_oids);
20650  }
20651 
20652  db_private_free_and_init (thread_p, ctx);
20653  }
20654 
20655  return error;
20656 }
20657 
20658 /*
20659  * btree_index_next_scan () - next scan function for show index header/capacity
20660  * return: S_ERROR, S_SUCCESS, or S_END
20661  *
20662  * thread_p(in):
20663  * cursor(in):
20664  * out_values(out):
20665  * out_cnt(in):
20666  * ptr(in): index header/capacity context
20667  */
20668 SCAN_CODE
20669 btree_index_next_scan (THREAD_ENTRY * thread_p, int cursor, DB_VALUE ** out_values, int out_cnt, void *ptr)
20670 {
20671  SCAN_CODE ret;
20672  char *class_name = NULL;
20673  OR_CLASSREP *classrep = NULL;
20674  SHOW_INDEX_SCAN_CTX *ctx = NULL;
20675  OID *class_oid_p = NULL;
20676  int idx_in_cache = -1;
20677  int selected_index = 0;
20678  int i, index_idx, oid_idx;
20679  OR_INDEX *index_p = NULL;
20680 
20681  ctx = (SHOW_INDEX_SCAN_CTX *) ptr;
20682  if (cursor >= ctx->indexes_count * ctx->class_oid_count)
20683  {
20684  return S_END;
20685  }
20686 
20687  assert (ctx->indexes_count >= 1);
20688  index_idx = cursor % ctx->indexes_count;
20689  oid_idx = cursor / ctx->indexes_count;
20690 
20691  class_oid_p = &ctx->class_oids[oid_idx];
20692 
20693  if (heap_get_class_name (thread_p, class_oid_p, &class_name) != NO_ERROR || class_name == NULL)
20694  {
20695  ret = S_ERROR;
20696  goto cleanup;
20697  }
20698 
20699  classrep = heap_classrepr_get (thread_p, class_oid_p, NULL, NULL_REPRID, &idx_in_cache);
20700  if (classrep == NULL)
20701  {
20702  ret = S_ERROR;
20703  goto cleanup;
20704  }
20705 
20706  if (ctx->is_all)
20707  {
20708  index_p = &classrep->indexes[index_idx];
20709  }
20710  else
20711  {
20712  selected_index = -1;
20713  for (i = 0; i < classrep->n_indexes; i++)
20714  {
20715  if (intl_identifier_casecmp (classrep->indexes[i].btname, ctx->index_name) == 0)
20716  {
20717  selected_index = i;
20718  break;
20719  }
20720  }
20721 
20722  if (selected_index == -1)
20723  {
20724  /* it must be found since passed semantic check */
20725  assert (false);
20726 
20728  ret = S_ERROR;
20729  goto cleanup;
20730  }
20731 
20732  index_p = &classrep->indexes[selected_index];
20733  }
20734 
20736  {
20737  ret = btree_scan_for_show_index_header (thread_p, out_values, out_cnt, class_name, index_p, class_oid_p);
20738  }
20739  else
20740  {
20742 
20743  ret = btree_scan_for_show_index_capacity (thread_p, out_values, out_cnt, class_name, index_p);
20744  }
20745 
20746 cleanup:
20747 
20748  if (classrep != NULL)
20749  {
20750  heap_classrepr_free_and_init (classrep, &idx_in_cache);
20751  }
20752 
20753  if (class_name != NULL)
20754  {
20755  free_and_init (class_name);
20756  }
20757 
20758  return ret;
20759 }
20760 
20761 /*
20762  * btree_index_end_scan () - end scan function for show index header/capacity
20763  * return: NO_ERROR, or ER_code
20764  *
20765  * thread_p(in):
20766  * ptr(in/out): index header/capacity context
20767  */
20768 int
20769 btree_index_end_scan (THREAD_ENTRY * thread_p, void **ptr)
20770 {
20771  SHOW_INDEX_SCAN_CTX *ctx = NULL;
20772 
20773  ctx = (SHOW_INDEX_SCAN_CTX *) (*ptr);
20774  if (ctx != NULL)
20775  {
20776  if (ctx->index_name != NULL)
20777  {
20778  db_private_free_and_init (thread_p, ctx->index_name);
20779  }
20780 
20781  if (ctx->class_oids != NULL)
20782  {
20783  db_private_free_and_init (thread_p, ctx->class_oids);
20784  }
20785 
20786  db_private_free_and_init (thread_p, ctx);
20787  }
20788 
20789  *ptr = NULL;
20790 
20791  return NO_ERROR;
20792 }
20793 
20794 /*
20795  * btree_scan_for_show_index_header () - scan index header information
20796  * return: S_ERROR, S_SUCCESS, or S_END
20797  *
20798  * thread_p(in):
20799  * out_values(out):
20800  * out_cnt(in):
20801  * class_name(in);
20802  * index_p(in);
20803  * class_oid_p(in);
20804  */
20805 static SCAN_CODE
20806 btree_scan_for_show_index_header (THREAD_ENTRY * thread_p, DB_VALUE ** out_values, int out_cnt, const char *class_name,
20807  OR_INDEX * index_p, OID * class_oid_p)
20808 {
20809  int idx = 0;
20810  int error = NO_ERROR;
20811  VPID root_vpid;
20812  PAGE_PTR root_page_ptr = NULL;
20813  BTREE_ROOT_HEADER *root_header = NULL;
20814  char buf[256] = { 0 };
20815  OR_BUF or_buf;
20816  TP_DOMAIN *key_type;
20817  int num_oids = 0, num_nulls = 0, num_keys = 0;
20818  bool fetch_unique_stats = false;
20819  int unique_stats_idx = -1;
20821  BTID *btid_p = NULL;
20822  HEAP_SCANCACHE scan_cache;
20823  bool scan_cache_inited = false;
20824 
20825  assert_release (index_p != NULL);
20826 
20827  /* get root header point */
20828  btid_p = &index_p->btid;
20829  root_vpid.pageid = btid_p->root_pageid;
20830  root_vpid.volid = btid_p->vfid.volid;
20831 
20832  root_page_ptr = pgbuf_fix (thread_p, &root_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
20833  if (root_page_ptr == NULL)
20834  {
20835  ASSERT_ERROR_AND_SET (error);
20836  goto error;
20837  }
20838 
20839  root_header = btree_get_root_header (thread_p, root_page_ptr);
20840  if (root_header == NULL)
20841  {
20842  ASSERT_ERROR_AND_SET (error);
20843  goto error;
20844  }
20845 
20846  /* scan index header into out_values */
20847  error = db_make_string_copy (out_values[idx], class_name);
20848  idx++;
20849  if (error != NO_ERROR)
20850  {
20851  goto error;
20852  }
20853 
20854  error = db_make_string_copy (out_values[idx], index_p->btname);
20855  idx++;
20856  if (error != NO_ERROR)
20857  {
20858  goto error;
20859  }
20860 
20861  (void) btid_to_string (buf, sizeof (buf), btid_p);
20862  error = db_make_string_copy (out_values[idx], buf);
20863  idx++;
20864  if (error != NO_ERROR)
20865  {
20866  goto error;
20867  }
20868 
20869  db_make_int (out_values[idx], root_header->node.node_level);
20870  idx++;
20871 
20872  db_make_int (out_values[idx], root_header->node.max_key_len);
20873  idx++;
20874 
20875  if (root_header->unique_pk)
20876  {
20877  /* unique stats fetching must not be done under header page latch; reserve space in buffer and defer fetching
20878  * after page unfix */
20879  fetch_unique_stats = true;
20880  unique_stats_idx = idx;
20881  idx += 3;
20882  }
20883  else
20884  {
20885  db_make_int (out_values[idx], root_header->num_oids);
20886  idx++;
20887 
20888  db_make_int (out_values[idx], root_header->num_nulls);
20889  idx++;
20890 
20891  db_make_int (out_values[idx], root_header->num_keys);
20892  idx++;
20893  }
20894 
20895  buf[0] = '\0';
20896  if (!OID_ISNULL (&root_header->topclass_oid))
20897  {
20898  oid_to_string (buf, sizeof (buf), &root_header->topclass_oid);
20899  }
20900  error = db_make_string_copy (out_values[idx], buf);
20901  idx++;
20902  if (error != NO_ERROR)
20903  {
20904  goto error;
20905  }
20906 
20907  db_make_int (out_values[idx], root_header->unique_pk);
20908  idx++;
20909 
20910  (void) vfid_to_string (buf, sizeof (buf), &root_header->ovfid);
20911  error = db_make_string_copy (out_values[idx], buf);
20912  idx++;
20913  if (error != NO_ERROR)
20914  {
20915  goto error;
20916  }
20917 
20918  or_init (&or_buf, root_header->packed_key_domain, -1);
20919  key_type = or_get_domain (&or_buf, NULL, NULL);
20920  (void) key_type_to_string (buf, sizeof (buf), key_type);
20921  error = db_make_string_copy (out_values[idx], buf);
20922  idx++;
20923  if (error != NO_ERROR)
20924  {
20925  goto error;
20926  }
20927 
20928  /* unfix page buffer before heap_get_class_record() */
20929  if (root_page_ptr != NULL)
20930  {
20931  pgbuf_unfix_and_init (thread_p, root_page_ptr);
20932  }
20933 
20934  /* Init scan_cache for heap object retrieving */
20935  (void) heap_scancache_quick_start_root_hfid (thread_p, &scan_cache);
20936  scan_cache_inited = true;
20937 
20938  /* Get the name list with asc/desc info of attributes */
20939  if (heap_get_class_record (thread_p, class_oid_p, &recdes, &scan_cache, COPY) != S_SUCCESS)
20940  {
20941  goto error;
20942  }
20943 
20944  error = index_attrs_to_string (buf, sizeof (buf), index_p, &recdes);
20945  if (error != NO_ERROR)
20946  {
20947  goto error;
20948  }
20949 
20950  error = db_make_string_copy (out_values[idx], buf);
20951  idx++;
20952  if (error != NO_ERROR)
20953  {
20954  goto error;
20955  }
20956 
20957  assert (idx == out_cnt);
20958 
20959  if (fetch_unique_stats)
20960  {
20961  error = logtb_get_global_unique_stats (thread_p, btid_p, &num_oids, &num_nulls, &num_keys);
20962  if (error != NO_ERROR)
20963  {
20964  goto error;
20965  }
20966 
20967  db_make_int (out_values[unique_stats_idx], num_oids);
20968  db_make_int (out_values[unique_stats_idx + 1], num_nulls);
20969  db_make_int (out_values[unique_stats_idx + 2], num_keys);
20970  }
20971 
20972  (void) heap_scancache_end (thread_p, &scan_cache);
20973 
20974  return S_SUCCESS;
20975 
20976 error:
20977 
20978  if (root_page_ptr != NULL)
20979  {
20980  pgbuf_unfix_and_init (thread_p, root_page_ptr);
20981  }
20982 
20983  if (scan_cache_inited)
20984  {
20985  (void) heap_scancache_end (thread_p, &scan_cache);
20986  }
20987 
20988  return S_ERROR;
20989 }
20990 
20991 /*
20992  * btree_key_find_first_visible_row () - MVCC find first visible row
20993  * return: whether the visible row has been found
20994  * btid(in): B+tree index identifier
20995  * rec(in): Record descriptor
20996  * offset(in): Offset of the second OID in key buffer
20997  * node_type(in): node type
20998  * oid(out): Object identifier of the visible row or NULL_OID
20999  * class_oid(out): Object class identifier
21000  * max_oids(in): max OIDs to search for
21001  */
21002 static BTREE_SEARCH
21004  BTREE_NODE_TYPE node_type, OID * oid, OID * class_oid, int max_oids)
21005 {
21008  OR_BUF buf;
21009  int mvcc_flags = 0, length = 0;
21010  bool is_first = true;
21011  MVCC_SNAPSHOT mvcc_snapshot_dirty;
21012  int oids_count = 0;
21013 
21014  assert (btid_int != NULL && rec != NULL && rec->data != NULL && oid != NULL && class_oid != NULL);
21015 
21016  OID_SET_NULL (oid);
21017  OID_SET_NULL (class_oid);
21018  mvcc_snapshot_dirty.snapshot_fnc = mvcc_satisfies_dirty;
21019 
21020  length = rec->length;
21022  {
21023  length -= DB_ALIGN (DISK_VPID_SIZE, INT_ALIGNMENT);
21024  }
21025 
21026  or_init (&buf, rec->data, length);
21027  while (buf.ptr < buf.endptr)
21028  {
21029  /* Get MVCC flags */
21030  mvcc_flags = btree_record_object_get_mvcc_flags (buf.ptr);
21031 
21032  /* Read object OID */
21033  if (or_get_oid (&buf, oid) != NO_ERROR)
21034  {
21035  goto error;
21036  }
21037  /* Clear flags */
21039 
21040  if (btree_is_class_oid_packed (btid_int, rec, node_type, is_first))
21041  {
21042  /* Read class OID */
21043  if (or_get_oid (&buf, class_oid) != NO_ERROR)
21044  {
21045  goto error;
21046  }
21047  }
21048  else if (BTREE_IS_UNIQUE (btid_int->unique_pk))
21049  {
21050  /* Class OID is top class OID */
21051  COPY_OID (class_oid, &btid_int->topclass_oid);
21052  }
21053 
21054  /* Get MVCC information */
21055  if (btree_or_get_mvccinfo (&buf, &mvcc_info, mvcc_flags) != NO_ERROR)
21056  {
21057  goto error;
21058  }
21059 
21060  btree_mvcc_info_to_heap_mvcc_header (&mvcc_info, &mvcc_rec_header);
21061  if (mvcc_snapshot_dirty.snapshot_fnc (thread_p, &mvcc_rec_header, &mvcc_snapshot_dirty) == SNAPSHOT_SATISFIED)
21062  {
21063  /* visible row found it */
21064  if (MVCCID_IS_VALID (mvcc_snapshot_dirty.lowest_active_mvccid)
21065  || MVCCID_IS_VALID (mvcc_snapshot_dirty.highest_completed_mvccid))
21066  {
21067  /* oid is modified by other active transaction */
21068  return BTREE_ACTIVE_KEY_FOUND;
21069  }
21070  else
21071  {
21072  /* inserted by committed transaction */
21073  return BTREE_KEY_FOUND;
21074  }
21075  }
21076 
21077  if (max_oids > 0)
21078  {
21079  oids_count++;
21080  if (oids_count >= max_oids)
21081  {
21082  /* the maximum number of OIDs has been reached => key not found */
21083  break;
21084  }
21085  }
21086 
21087  if (node_type == BTREE_LEAF_NODE && is_first)
21088  {
21089  /* Must skip over the key value to the next object */
21090  or_seek (&buf, offset);
21091  }
21092 
21093  is_first = false;
21094  }
21095 
21096  return BTREE_KEY_NOTFOUND;
21097 
21098 error:
21099  OID_SET_NULL (oid);
21100  OID_SET_NULL (class_oid);
21101  return BTREE_ERROR_OCCURRED;
21102 }
21103 
21104 /*
21105  * btree_insert_mvcc_delid_into_page () - Insert delete MVCCID info.
21106  *
21107  * return : Error code.
21108  * thread_p (in) : Thread entry.
21109  * btid (in) : B-tree info.
21110  * page_ptr (in) : Leaf or overflow page.
21111  * node_type (in) : Leaf or overflow node type.
21112  * key (in) : Key value.
21113  * insert_helper (in) : B-tree insert helper.
21114  * slot_id (in) : Slot ID for b-tree record.
21115  * rec (in) : B-tree record.
21116  * oid_offset (in) : Offset to object being deleted.
21117  */
21118 static int
21120  BTREE_NODE_TYPE node_type, DB_VALUE * key, BTREE_INSERT_HELPER * insert_helper,
21121  PGSLOTID slot_id, RECDES * rec, int oid_offset)
21122 {
21123  int ret = NO_ERROR;
21124  LOG_LSA prev_lsa;
21125 
21126  /* Recovery data. */
21127  LOG_DATA_ADDR addr;
21128 
21129  char *rv_undo_data = NULL;
21130  int rv_undo_data_length;
21131  int rv_undo_data_capacity = IO_MAX_PAGE_SIZE;
21132  char rv_undo_data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
21133  char *rv_undo_data_bufalign = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
21134 
21135  char rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
21136  char *rv_redo_data = PTR_ALIGN (rv_redo_data_buffer, BTREE_MAX_ALIGN);
21138  int rv_redo_data_length = 0;
21139 
21140  /* Assert expected arguments. */
21141  assert (btid != NULL);
21142  assert (page_ptr != NULL);
21143  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_OVERFLOW_NODE);
21144  assert (key != NULL);
21145  assert (insert_helper != NULL);
21146  assert (slot_id > 0);
21147  assert (rec != NULL);
21148  assert (oid_offset >= 0);
21149  assert (insert_helper != NULL);
21150  assert (insert_helper->purpose == BTREE_OP_INSERT_MVCC_DELID
21151  || insert_helper->purpose == BTREE_OP_INSERT_MARK_DELETED);
21152 
21153  /* Prepare logging. */
21154  /* Initialize log address data */
21155  addr.pgptr = page_ptr;
21156  addr.offset = slot_id;
21157  addr.vfid = &btid->sys_btid->vfid;
21158 
21159  /* Undo logging. */
21160  rv_undo_data = rv_undo_data_bufalign;
21161  ret =
21162  btree_rv_save_keyval_for_undo (btid, key, BTREE_INSERT_CLASS_OID (insert_helper), BTREE_INSERT_OID (insert_helper),
21163  BTREE_INSERT_MVCC_INFO (insert_helper), insert_helper->purpose,
21164  rv_undo_data_bufalign, &rv_undo_data, &rv_undo_data_capacity, &rv_undo_data_length);
21165  if (ret != NO_ERROR)
21166  {
21167  return ret;
21168  }
21169 
21170  /* Redo logging. */
21171 #if !defined (NDEBUG)
21172  /* For debugging recovery. */
21173  BTREE_RV_REDO_SET_DEBUG_INFO (&addr, rv_redo_data_ptr, btid, BTREE_RV_DEBUG_ID_INSERT_DELID);
21174 #endif /* !NDEBUG */
21175  if (node_type == BTREE_OVERFLOW_NODE)
21176  {
21178  }
21180 
21181  /* We need to check if insert MVCCID is the same as delete MVCCID for recovery purposes. Take next scenario:
21182  *
21183  * context:
21184  * auto-commit off
21185  * table t (a int), index on t(a)
21186  * table t has row with a value 1.
21187  *
21188  * scenario:
21189  * 1. update t set a = 2 where a = 1;
21190  * 2. update t set a = 1 where a = 2;
21191  * 3=1. update t set a = 2 where a = 1;
21192  * 4. rollback;
21193  *
21194  * Let's follow what happens in key 1:
21195  * Before scenario: OID1-MVCCID1-MVCCID_NULL
21196  * After update#1: OID1-MVCCID1-MVCCID2
21197  * After update#2: OID1-MVCCID1-MVCCID2, OID1-MVCCID2-MVCCID_NULL.
21198  * After update#3: OID1-MVCCID1-MVCCID2, OID1-MVCCID2-MVCCID2.
21199  *
21200  * At rollback, we should execute undo MVCC delete key1,OID1,delid=MVCCID2, undo insert key1,OID1,insid=MVCCID2,
21201  * and again undo MVCC delete key1,OID1,delid=MVCCID2.
21202  * To undo MVCC delete, we usually match by key, OID and delete MVCCID. But in above case this is ambiguous, because
21203  * we have two entries that can match the criteria. And this happens:
21204  * Undo#1: OID1-MVCCID1-MVCCID_NULL, OID1-MVCCID2-MVCCID2.
21205  * This is obviously an invalid state, since the key never looked this way before rollback. Undo insert would not
21206  * find a valid object and would fail.
21207  *
21208  * Somehow, we need to remove the ambiguity. Since this is a rather unlikely case, we prefer to keep the key, OID
21209  * and delete MVCCID matching as a general criteria and do something special just for this case.
21210  * The first MVCC delete undo should match an object that has same insert MVCCID. So, we will hack logging and
21211  * rollback/undo recovery to do the right matching.
21212  *
21213  * Here we need to check insert MVCCID == delete MVCCID. If true, we will mark LOG_DATA_ADDR offset with a special
21214  * flag.
21215  *
21216  * NOTE: If update#1 and update#2 are repeated several times, we end up with several OID1-MVCCID2-MVCCID2 entries.
21217  * At rollback, it does not matter which entry we pick to undo first, as long as we don't undo the original
21218  * entry.
21219  */
21221  if (BTREE_MVCC_INFO_INSID (BTREE_INSERT_MVCC_INFO (insert_helper))
21222  == BTREE_INSERT_MVCC_INFO (insert_helper)->delete_mvccid)
21223  {
21224  /* Mark addr that we need to undo MVCC delete of my object - insert MVCCID must also match. */
21226 
21227  /* Should only be possible for BTREE_OP_INSERT_MVCC_DELID. */
21228  assert (insert_helper->purpose == BTREE_OP_INSERT_MVCC_DELID);
21229  }
21230 
21231  btree_record_add_delid (thread_p, btid, rec, node_type, oid_offset,
21232  BTREE_INSERT_MVCC_INFO (insert_helper)->delete_mvccid, NULL, &rv_redo_data_ptr);
21233 
21234  if (spage_update (thread_p, page_ptr, slot_id, rec) != SP_SUCCESS)
21235  {
21236  assert_release (false);
21237  ret = ER_FAILED;
21238  goto exit_on_error;
21239  }
21240 
21241  /* We need to log previous lsa. */
21242  LSA_COPY (&prev_lsa, pgbuf_get_lsa (page_ptr));
21243 
21244  /* Logging. */
21245  BTREE_RV_GET_DATA_LENGTH (rv_redo_data_ptr, rv_redo_data, rv_redo_data_length);
21246  if (insert_helper->purpose == BTREE_OP_INSERT_MVCC_DELID)
21247  {
21248  log_append_undoredo_data (thread_p, RVBT_MVCC_DELETE_OBJECT, &addr, rv_undo_data_length, rv_redo_data_length,
21249  rv_undo_data, rv_redo_data);
21250  }
21251  else /* BTREE_OP_INSERT_MARK_DELETED */
21252  {
21253  assert (insert_helper->purpose == BTREE_OP_INSERT_MARK_DELETED);
21254  log_append_undoredo_data (thread_p, RVBT_MARK_DELETED, &addr, rv_undo_data_length, rv_redo_data_length,
21255  rv_undo_data, rv_redo_data);
21256  log_append_postpone (thread_p, RVBT_DELETE_OBJECT_POSTPONE, &addr, rv_undo_data_length, rv_undo_data);
21257  }
21258 
21259  btree_insert_log (insert_helper, BTREE_INSERT_MODIFY_MSG ("add delete MVCCID %llu"),
21260  insert_helper->obj_info.mvcc_info.delete_mvccid,
21261  BTREE_INSERT_MODIFY_ARGS (thread_p, insert_helper, page_ptr, &prev_lsa,
21262  node_type == BTREE_LEAF_NODE, slot_id, rec->length, btid->sys_btid));
21263 
21265 
21266  pgbuf_set_dirty (thread_p, page_ptr, DONT_FREE);
21267 
21268  if (rv_undo_data != NULL && rv_undo_data != rv_undo_data_bufalign)
21269  {
21270  db_private_free_and_init (thread_p, rv_undo_data);
21271  }
21272 
21273  return NO_ERROR;
21274 
21275 exit_on_error:
21276 
21277  if (rv_undo_data != NULL && rv_undo_data != rv_undo_data_bufalign)
21278  {
21279  db_private_free_and_init (thread_p, rv_undo_data);
21280  }
21281 
21282  return ret;
21283 }
21284 
21285 /*
21286  * btree_set_mvcc_header_ids_for_update () - set ids of mvcc header for update
21287  * return: nothing
21288  * thread_p(in): thread entry
21289  * do_delete_only(in): true, if need to set del_id only
21290  * do_insert_only(in): true, if need to set ins_id only
21291  * mvcc_id(in): mvcc id to set
21292  * mvcc_rec_header(in): mvcc record header
21293  *
21294  * Note: do_delete_only and do_insert_only can't be both true
21295  */
21296 void
21297 btree_set_mvcc_header_ids_for_update (THREAD_ENTRY * thread_p, bool do_delete_only, bool do_insert_only,
21298  MVCCID * mvcc_id, MVCC_REC_HEADER * mvcc_rec_header)
21299 {
21300  assert (mvcc_rec_header != NULL);
21301  assert (do_delete_only == false || do_insert_only == false);
21302 
21303  BTREE_INIT_MVCC_HEADER (&mvcc_rec_header[0]);
21304  if (do_delete_only == false && do_insert_only == false)
21305  {
21306  MVCC_SET_FLAG_BITS (&mvcc_rec_header[0], OR_MVCC_FLAG_VALID_DELID);
21307  MVCC_SET_DELID (&mvcc_rec_header[0], *mvcc_id);
21308 
21309  BTREE_INIT_MVCC_HEADER (&mvcc_rec_header[1]);
21310  MVCC_SET_FLAG_BITS (&mvcc_rec_header[1], OR_MVCC_FLAG_VALID_INSID);
21311  MVCC_SET_INSID (&mvcc_rec_header[1], *mvcc_id);
21312 
21313  return;
21314  }
21315 
21316  if (do_delete_only == true)
21317  {
21318  MVCC_SET_FLAG_BITS (&mvcc_rec_header[0], OR_MVCC_FLAG_VALID_DELID);
21319  MVCC_SET_DELID (&mvcc_rec_header[0], *mvcc_id);
21320 
21321  return;
21322  }
21323 
21324  /* insert only case */
21325  MVCC_SET_FLAG_BITS (&mvcc_rec_header[0], OR_MVCC_FLAG_VALID_INSID);
21326  MVCC_SET_INSID (&mvcc_rec_header[0], *mvcc_id);
21327 }
21328 
21329 /*
21330  * btree_unpack_mvccinfo () - Check b-tree MVCC flags and unpack any MVCC info into MVCC header.
21331  *
21332  * return : Pointer after the packed MVCC info.
21333  * ptr (in) : Pointer to packed MVCC info.
21334  * mvcc_info (out) : Outputs MVCC info.
21335  * btree_mvcc_flags (in) : Flags that describe the packed MVCC info.
21336  */
21337 char *
21338 btree_unpack_mvccinfo (char *ptr, BTREE_MVCC_INFO * mvcc_info, short btree_mvcc_flags)
21339 {
21340  assert (mvcc_info != NULL && ptr != NULL);
21341 
21342  mvcc_info->flags = btree_mvcc_flags;
21343  mvcc_info->insert_mvccid = MVCCID_ALL_VISIBLE;
21344  mvcc_info->delete_mvccid = MVCCID_NULL;
21345 
21346  if (BTREE_MVCC_INFO_HAS_INSID (mvcc_info))
21347  {
21348  /* Get insert MVCCID */
21349  ptr = or_unpack_mvccid (ptr, &mvcc_info->insert_mvccid);
21350  }
21351 
21352  if (BTREE_MVCC_INFO_HAS_DELID (mvcc_info))
21353  {
21354  /* Get delete MVCCID */
21355  ptr = or_unpack_mvccid (ptr, &mvcc_info->delete_mvccid);
21356  }
21357 
21358  return ptr;
21359 }
21360 
21361 /*
21362  * btree_pack_mvccinfo () - Pack MVCC information into b-tree record.
21363  *
21364  * return : Pointer after the packed MVCC information.
21365  * ptr (in) : Pointer where MVCC information will be packed.
21366  * mvcc_info (in) : MVCC information (saved as a record header).
21367  */
21368 char *
21370 {
21371  if (mvcc_info == NULL)
21372  {
21373  /* No MVCC info to pack */
21374  return ptr;
21375  }
21376  if (BTREE_MVCC_INFO_HAS_INSID (mvcc_info))
21377  {
21378  ptr = or_pack_mvccid (ptr, mvcc_info->insert_mvccid);
21379  }
21380  if (BTREE_MVCC_INFO_HAS_DELID (mvcc_info))
21381  {
21382  ptr = or_pack_mvccid (ptr, mvcc_info->delete_mvccid);
21383  }
21384  return ptr;
21385 }
21386 
21387 /*
21388  * btree_packed_mvccinfo_size () - Packed MVCC info size.
21389  *
21390  * return : Packed MVCC info size.
21391  * mvcc_info (in) : MVCC info.
21392  */
21393 int
21395 {
21396  int size = 0;
21397 
21398  if (mvcc_info == NULL)
21399  {
21400  /* Nothing to pack */
21401  return size;
21402  }
21403 
21404  if (BTREE_MVCC_INFO_HAS_INSID (mvcc_info))
21405  {
21406  size += OR_MVCCID_SIZE;
21407  }
21408 
21409  if (BTREE_MVCC_INFO_HAS_DELID (mvcc_info))
21410  {
21411  size += OR_MVCCID_SIZE;
21412  }
21413 
21414  return size;
21415 }
21416 
21417 /*
21418  * btree_or_get_mvccinfo () - Check b-tree MVCC flags and unpack any MVCC info into MVCC header.
21419  *
21420  * return : Error code.
21421  * buf (in/out) : OR Buffer.
21422  * mvcc_info (out) : MVCC Record header.
21423  * btree_mvcc_flags (in) : Flags that describe the packed MVCC info.
21424  */
21425 static int
21426 btree_or_get_mvccinfo (OR_BUF * buf, BTREE_MVCC_INFO * mvcc_info, short btree_mvcc_flags)
21427 {
21428  int size = BTREE_GET_MVCC_INFO_SIZE_FROM_FLAGS (btree_mvcc_flags);
21429 
21430  if (buf->ptr + size > buf->endptr)
21431  {
21432  /* Overflow error */
21433  return or_overflow (buf);
21434  }
21435 
21436  /* Unpack and update pointer */
21437  buf->ptr = btree_unpack_mvccinfo (buf->ptr, mvcc_info, btree_mvcc_flags);
21438 
21439  return NO_ERROR;
21440 }
21441 
21442 /*
21443  * btree_or_put_mvccinfo () - Set MVCC information into buffer (should be used for b-tree records).
21444  * Only insert/delete MVCCID's will be set depending on MVCC flags.
21445  *
21446  * return : Error code.
21447  * buf (in/out) : OR Buffer.
21448  * mvcc_info (in) : MVCC info (saved as record header).
21449  */
21450 static int
21452 {
21453  int error_code = NO_ERROR;
21454 
21455  if (BTREE_MVCC_INFO_HAS_INSID (mvcc_info))
21456  {
21457  error_code = or_put_mvccid (buf, mvcc_info->insert_mvccid);
21458  if (error_code != NO_ERROR)
21459  {
21460  return error_code;
21461  }
21462  }
21463 
21464  if (BTREE_MVCC_INFO_HAS_DELID (mvcc_info))
21465  {
21466  error_code = or_put_mvccid (buf, mvcc_info->delete_mvccid);
21467  if (error_code != NO_ERROR)
21468  {
21469  return error_code;
21470  }
21471  }
21472 
21473  return error_code;
21474 }
21475 
21476 /*
21477  * btree_unpack_object () - Unpack a b-tree object from the given pointer. Pointer should belong to a b-tree record.
21478  *
21479  * return : Error code.
21480  * ptr (in) : Pointer in b-tree record to unpack object.
21481  * btid_int (in) : B-tree info.
21482  * node_type (in) : Leaf or overflow node type.
21483  * record (in) : B-tree record.
21484  * after_key_offset (in) : Offset in record after packed key.
21485  * oid (out) : Unpacked OID.
21486  * class_oid (out) : Unpacked class OID.
21487  * mvcc_info (out) : Unpacked MVCC info.
21488  */
21489 static char *
21490 btree_unpack_object (char *ptr, BTID_INT * btid_int, BTREE_NODE_TYPE node_type, RECDES * record, int after_key_offset,
21491  OID * oid, OID * class_oid, BTREE_MVCC_INFO * mvcc_info)
21492 {
21493  OR_BUF buffer;
21494 
21495  BTREE_RECORD_OR_BUF_INIT (buffer, record);
21496  buffer.ptr = ptr;
21497 
21498  if (btree_or_get_object (&buffer, btid_int, node_type, after_key_offset, oid, class_oid, mvcc_info) != NO_ERROR)
21499  {
21500  assert (false);
21501  return NULL;
21502  }
21503 
21504  return buffer.ptr;
21505 }
21506 
21507 /*
21508  * btree_pack_object () - Pack a b-tree object into the given pointer. Pointer should belong to a b-tree record.
21509  *
21510  * return : Error code.
21511  * ptr (in) : Pointer in b-tree record to pack object.
21512  * btid_int (in) : B-tree info.
21513  * node_type (in) : Leaf or overflow node type.
21514  * record (in) : B-tree record.
21515  * object_info (in) : B-tree object info.
21516  */
21517 static char *
21518 btree_pack_object (char *ptr, BTID_INT * btid_int, BTREE_NODE_TYPE node_type, RECDES * record,
21520 {
21521  OR_BUF buffer;
21522 
21523  OR_BUF_INIT (buffer, record->data, record->area_size);
21524  buffer.ptr = ptr;
21525 
21526  if (btree_or_put_object (&buffer, btid_int, node_type, object_info) != NO_ERROR)
21527  {
21528  assert (false);
21529  return NULL;
21530  }
21531 
21532  return buffer.ptr;
21533 }
21534 
21535 /*
21536  * btree_or_get_object () - Get object, class OID and its MVCC info from buffer pointing in a b-tree record.
21537  *
21538  * return : Error code.
21539  * buf (in/out) : Buffer pointing to object in b-tree record.
21540  * btid_int (in) : B-tree info.
21541  * node_type (in) : Leaf or overflow node type.
21542  * after_key_offset (int) : Offset to end of packed key for leaf records.
21543  * oid (out) : Outputs OID of object.
21544  * class_oid (out) : Outputs OID of object's class.
21545  * mvcc_info (out) : Outputs MVCC info for object.
21546  *
21547  * NOTE: Buffer.buffer should point to start of b-tree record.
21548  * NOTE: Buffer pointer will be moved after read object.
21549  * If object is first in leaf record, buffer pointer will be moved after the packed key
21550  * (where second objects starts).
21551  */
21552 static int
21553 btree_or_get_object (OR_BUF * buf, BTID_INT * btid_int, BTREE_NODE_TYPE node_type, int after_key_offset, OID * oid,
21554  OID * class_oid, BTREE_MVCC_INFO * mvcc_info)
21555 {
21556  short mvcc_flags = 0; /* MVCC flags read from object OID. */
21557  int error_code = NO_ERROR; /* Error code. */
21558  bool is_first_of_leaf; /* True if the object is first in leaf record. */
21559 
21560  /* Assert arguments meet expectations. */
21561  assert (buf != NULL);
21562  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_OVERFLOW_NODE);
21563  /* Should any of these be optional? */
21564  assert (oid != NULL);
21565  assert (class_oid != NULL);
21566  assert (mvcc_info != NULL);
21567 
21568  /* Assert buffer has expected alignment. */
21569  ASSERT_ALIGN (buf->ptr, INT_ALIGNMENT);
21570 
21571  /* Is this the first object of leaf record? */
21572  is_first_of_leaf = buf->ptr == buf->buffer && node_type == BTREE_LEAF_NODE;
21573 
21574  /* Read MVCC flags. */
21575  mvcc_flags = btree_record_object_get_mvcc_flags (buf->ptr);
21576 
21577  /* Get OID. */
21578  error_code = or_get_oid (buf, oid);
21579  if (error_code != NO_ERROR)
21580  {
21581  return error_code;
21582  }
21583 
21584  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
21585  {
21586  /* Get/set class OID. If this is the first object in leaf record and if record is not marked with
21587  * BTREE_LEAF_RECORD_CLASS_OID, class OID must be top class OID. Otherwise, read it from record. */
21588  if (is_first_of_leaf && !BTREE_OID_IS_RECORD_FLAG_SET (oid, BTREE_LEAF_RECORD_CLASS_OID))
21589  {
21590  COPY_OID (class_oid, &btid_int->topclass_oid);
21591  }
21592  else
21593  {
21594  error_code = or_get_oid (buf, class_oid);
21595  if (error_code != NO_ERROR)
21596  {
21597  assert (false);
21598  return error_code;
21599  }
21600  }
21601  }
21602  else
21603  {
21604  /* Non-unique indexes can be part only of top class. */
21605  COPY_OID (class_oid, &btid_int->topclass_oid);
21606  }
21607 
21608  /* Clear all flags from object. */
21610 
21611  /* Read MVCC info */
21612  error_code = btree_or_get_mvccinfo (buf, mvcc_info, mvcc_flags);
21613  if (error_code != NO_ERROR)
21614  {
21615  assert (false);
21616  return error_code;
21617  }
21618 
21619  if (is_first_of_leaf)
21620  {
21621  /* Advance after the first key. */
21622  error_code = or_seek (buf, after_key_offset);
21623  if (error_code != NO_ERROR)
21624  {
21625  assert (false);
21626  return error_code;
21627  }
21628  }
21629 
21630  /* Successful read. */
21631  return NO_ERROR;
21632 }
21633 
21634 /*
21635  * btree_or_put_object () - Put object data in buffer (of b-tree record).
21636  *
21637  * return : Error code.
21638  * buf (in/out) : Buffer pointing to destination of object data.
21639  * btid_int (in) : B-tree info.
21640  * node_type (in) : Leaf or overflow node type.
21641  * object_info (in) : B-tree object info.
21642  *
21643  * NOTE: Buffer will point at the end of packed object after execution.
21644  */
21645 static int
21647 {
21648  bool is_first_of_leaf; /* True if object is first in a leaf record. */
21649  OID flagged_oid; /* OID of object including flags. */
21650  int error_code = NO_ERROR; /* Error code. */
21651 
21652  /* Assert expected arguments. */
21653  assert (object_info != NULL);
21654  /* All overflow objects must be fixed size. */
21655  assert (node_type == BTREE_LEAF_NODE
21656  || (BTREE_MVCC_INFO_HAS_INSID (&object_info->mvcc_info)
21657  && BTREE_MVCC_INFO_HAS_DELID (&object_info->mvcc_info)));
21658 
21659  /* Is this the first object of leaf record? */
21660  is_first_of_leaf = (buf->ptr == buf->buffer) && node_type == BTREE_LEAF_NODE;
21661 
21662  /* All objects in unique key index except first must be fixed size. */
21663  assert (!BTREE_IS_UNIQUE (btid_int->unique_pk) || !is_first_of_leaf
21664  || (BTREE_MVCC_INFO_HAS_INSID (&object_info->mvcc_info)
21665  && BTREE_MVCC_INFO_HAS_DELID (&object_info->mvcc_info)));
21666 
21667  /* Set MVCC flags into OID. */
21668  COPY_OID (&flagged_oid, &object_info->oid);
21669  BTREE_OID_SET_MVCC_FLAG (&flagged_oid, object_info->mvcc_info.flags);
21670 
21671  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
21672  {
21673  /* Class OID may have to be packed. */
21674  if (is_first_of_leaf)
21675  {
21676  if (OID_EQ (&btid_int->topclass_oid, &object_info->class_oid))
21677  {
21678  /* Don't add class oid. Top class OID will be considered as default. */
21679  error_code = or_put_oid (buf, &flagged_oid);
21680  if (error_code != NO_ERROR)
21681  {
21682  return error_code;
21683  }
21684  }
21685  else
21686  {
21687  /* Flag object with BTREE_LEAF_RECORD_CLASS_OID and also add class OID. */
21688  flagged_oid.slotid |= BTREE_LEAF_RECORD_CLASS_OID;
21689  error_code = or_put_oid (buf, &flagged_oid);
21690  if (error_code != NO_ERROR)
21691  {
21692  return error_code;
21693  }
21694  error_code = or_put_oid (buf, &object_info->class_oid);
21695  if (error_code != NO_ERROR)
21696  {
21697  return error_code;
21698  }
21699  }
21700  }
21701  else
21702  {
21703  /* Add oid and class OID. */
21704  error_code = or_put_oid (buf, &flagged_oid);
21705  if (error_code != NO_ERROR)
21706  {
21707  return error_code;
21708  }
21709  error_code = or_put_oid (buf, &object_info->class_oid);
21710  if (error_code != NO_ERROR)
21711  {
21712  return error_code;
21713  }
21714  }
21715  }
21716  else
21717  {
21718  /* Add OID only */
21719  error_code = or_put_oid (buf, &flagged_oid);
21720  if (error_code != NO_ERROR)
21721  {
21722  return error_code;
21723  }
21724  }
21725 
21726  /* Add MVCC info */
21727  error_code = btree_or_put_mvccinfo (buf, &object_info->mvcc_info);
21728  return error_code;
21729 }
21730 
21731 /*
21732  * btree_set_mvcc_flags_into_oid () - Set MVCC info flags in the volid field of OID.
21733  *
21734  * return : Void.
21735  * p_mvcc_header (in) : MVCC info.
21736  * oid (in/out) : Object identifier.
21737  */
21738 void
21740 {
21741  if (p_mvcc_header == NULL)
21742  {
21743  /* No flag to set */
21744  return;
21745  }
21746  if (MVCC_IS_FLAG_SET (p_mvcc_header, OR_MVCC_FLAG_VALID_INSID))
21747  {
21749  }
21750  if (MVCC_IS_FLAG_SET (p_mvcc_header, OR_MVCC_FLAG_VALID_DELID))
21751  {
21753  }
21754 }
21755 
21756 /*
21757  * btree_clear_mvcc_flags_from_oid () -
21758  *
21759  * return : Void.
21760  * oid (in/out) : Object identifier.
21761  */
21762 void
21764 {
21766 }
21767 
21768 /*
21769  * btree_compare_btids () - B-tree identifier comparator.
21770  *
21771  * return : Positive value is the first identifier is bigger,
21772  * negative if the second identifier is bigger and 0 if the identifiers are equal.
21773  * mem_btid1 (in) : Pointer to first btid value.
21774  * mem_btid2 (in) : Pointer to second btid value.
21775  */
21776 int
21777 btree_compare_btids (void *mem_btid1, void *mem_btid2)
21778 {
21779  const BTID *btid1 = (const BTID *) mem_btid1;
21780  const BTID *btid2 = (const BTID *) mem_btid2;
21781  if (btid1 == btid2)
21782  {
21783  return 0;
21784  }
21785 
21786  if (btid1->root_pageid > btid2->root_pageid)
21787  {
21788  return 1;
21789  }
21790  else if (btid1->root_pageid < btid2->root_pageid)
21791  {
21792  return -1;
21793  }
21794 
21795  if (btid1->vfid.fileid > btid2->vfid.fileid)
21796  {
21797  return 1;
21798  }
21799  else if (btid1->vfid.fileid < btid2->vfid.fileid)
21800  {
21801  return -1;
21802  }
21803 
21804  if (btid1->vfid.volid > btid2->vfid.volid)
21805  {
21806  return 1;
21807  }
21808  else if (btid1->vfid.volid < btid2->vfid.volid)
21809  {
21810  return -1;
21811  }
21812 
21813  return 0;
21814 }
21815 
21816 /*
21817  * btree_check_valid_record () - Check that record data is valid.
21818  *
21819  * return : Error code.
21820  * thread_p (in) : Thread entry.
21821  * btid (in) : B-tree data.
21822  * recp (in) : Record descriptor.
21823  * node_type (in) : Node type (overflow or leaf).
21824  * key (in) : Expected key value (will be checked if not null,
21825  * and if node type is leaf and if key doesn't have overflow pages).
21826  */
21827 int
21829  DB_VALUE * key)
21830 {
21831 #define BTREE_CHECK_VALID_PRINT_REC_MAX_LENGTH 1024
21832  OID oid, class_oid;
21833  MVCCID mvccid;
21834  int vpid_size = 0;
21835  OR_BUF buffer;
21836  short mvcc_flags;
21837  bool is_first_oid = true;
21838  bool has_fixed_size = false;
21839  bool has_overflow_pages = false;
21840  VPID first_overflow_vpid = VPID_INITIALIZER;
21841 
21842  assert (btid != NULL);
21843  assert (recp != NULL && recp->data != NULL && recp->length > 0);
21844  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_OVERFLOW_NODE);
21845 
21846  if (btid == NULL || btid->key_type == NULL)
21847  {
21848  /* We don't have access to b-tree information to check the record. */
21849  return NO_ERROR;
21850  }
21851 
21853  {
21854  char *vpid_ptr = NULL;
21855 
21856  has_overflow_pages = true;
21857  vpid_size = DISK_VPID_ALIGNED_SIZE;
21858 
21859  vpid_ptr = recp->data + recp->length - vpid_size;
21860  OR_GET_VPID (vpid_ptr, &first_overflow_vpid);
21861  if (!log_is_in_crash_recovery ()
21862  && pgbuf_is_valid_page (thread_p, &first_overflow_vpid, true, NULL, NULL) == DISK_INVALID)
21863  {
21864  assert (false);
21865  return ER_FAILED;
21866  }
21867  }
21868 
21869  or_init (&buffer, recp->data, recp->length - vpid_size);
21870  while (buffer.ptr < buffer.endptr)
21871  {
21872  /* Get mvcc flags */
21873  mvcc_flags = btree_record_object_get_mvcc_flags (buffer.ptr);
21874  /* If MVCC is enabled, there are several cases when the object entry must have fixed size, which means that
21875  * insert/delete MVCCID must be present: 1. Overflow objects. 2. First object if leaf record if there are
21876  * overflow OID's. 3. Any non-first object if index is unique. */
21877  has_fixed_size = ((node_type == BTREE_OVERFLOW_NODE) || (has_overflow_pages && is_first_oid)
21878  || (BTREE_IS_UNIQUE (btid->unique_pk) && !is_first_oid));
21879  if (has_fixed_size)
21880  {
21881  assert ((mvcc_flags & BTREE_OID_HAS_MVCC_INSID) && (mvcc_flags & BTREE_OID_HAS_MVCC_DELID));
21882  }
21883  /* Get and check OID */
21884  if (or_get_oid (&buffer, &oid) != NO_ERROR)
21885  {
21886  assert (false);
21887  return ER_FAILED;
21888  }
21890  if (oid.pageid <= 0 || oid.slotid <= 0 || oid.slotid > ((short) (DB_PAGESIZE / sizeof (PGSLOTID)))
21891  || oid.volid < 0)
21892  {
21893  assert (false);
21894  return ER_FAILED;
21895  }
21896  if (btree_is_class_oid_packed (btid, recp, node_type, is_first_oid))
21897  {
21898  /* Get and check class OID */
21899  if (or_get_oid (&buffer, &class_oid) != NO_ERROR)
21900  {
21901  assert (false);
21902  return ER_FAILED;
21903  }
21904  if (class_oid.pageid <= 0 || class_oid.slotid <= 0
21905  || class_oid.slotid > ((short) (DB_PAGESIZE / sizeof (PGSLOTID))) || class_oid.volid < 0)
21906  {
21907  assert (false);
21908  return ER_FAILED;
21909  }
21910  }
21911  if (mvcc_flags & BTREE_OID_HAS_MVCC_INSID)
21912  {
21913  /* Get and check insert MVCCID */
21914  if (or_get_mvccid (&buffer, &mvccid) != NO_ERROR)
21915  {
21916  assert (false);
21917  return ER_FAILED;
21918  }
21919 
21920  /* Remove any possible online_index flags. */
21922  if (!MVCCID_IS_VALID (mvccid))
21923  {
21924  assert (false);
21925  return ER_FAILED;
21926  }
21928  {
21929  assert (false);
21930  return ER_FAILED;
21931  }
21932  }
21933  if (mvcc_flags & BTREE_OID_HAS_MVCC_DELID)
21934  {
21935  /* Get and check delete MVCCID */
21936  if (or_get_mvccid (&buffer, &mvccid) != NO_ERROR)
21937  {
21938  assert (false);
21939  return ER_FAILED;
21940  }
21941  if (mvccid != MVCCID_NULL && !MVCC_ID_PRECEDES (mvccid, log_Gl.hdr.mvcc_next_id)
21942  && !log_is_in_crash_recovery ())
21943  {
21944  assert (false);
21945  return ER_FAILED;
21946  }
21947  }
21948  if (is_first_oid && (node_type == BTREE_LEAF_NODE))
21949  {
21950  /* Key value is also saved */
21952  {
21953  /* Get key value */
21954  DB_VALUE rec_key_value;
21955  TP_DOMAIN *key_domain = NULL;
21956  PR_TYPE *pr_type = NULL;
21957 
21958  db_make_null (&rec_key_value);
21959  key_domain = btid->key_type;
21960  pr_type = key_domain->type;
21961  if (pr_type->index_readval (&buffer, &rec_key_value, key_domain, -1, true, NULL, 0) != NO_ERROR)
21962  {
21963  assert (false);
21964  return ER_FAILED;
21965  }
21966  if (key != NULL && btree_compare_key (key, &rec_key_value, key_domain, 1, 1, NULL) != 0)
21967  {
21968  /* Expected key is not the same with the key found in record data. */
21969  /* This is possible when key fence is used. Should disable this verification or should include the
21970  * fence for compare */
21971  /* For now, do nothing */
21972  }
21973  pr_clear_value (&rec_key_value);
21974  }
21975  else
21976  {
21977  /* Skip overflow key vpid */
21978  buffer.ptr += DISK_VPID_SIZE;
21979  }
21980  buffer.ptr = PTR_ALIGN (buffer.ptr, OR_INT_SIZE);
21981  }
21982  is_first_oid = false;
21983  }
21984  if (buffer.ptr != buffer.endptr)
21985  {
21986  assert (false);
21987  return ER_FAILED;
21988  }
21989  return NO_ERROR;
21990 }
21991 
21992 /*
21993  * btree_check_foreign_key () -
21994  * return: NO_ERROR
21995  * cls_oid(in):
21996  * hfid(in):
21997  * oid(in):
21998  * keyval(in):
21999  * n_attrs(in):
22000  * pk_cls_oid(in):
22001  * pk_btid(in):
22002  * fk_name(in):
22003  */
22004 int
22005 btree_check_foreign_key (THREAD_ENTRY * thread_p, OID * cls_oid, HFID * hfid, OID * oid, DB_VALUE * keyval, int n_attrs,
22006  OID * pk_cls_oid, BTID * pk_btid, const char *fk_name)
22007 {
22008  OID unique_oid;
22009  bool has_null;
22010  DB_VALUE val;
22011  int ret = NO_ERROR;
22012  OID part_oid;
22013  HFID class_hfid;
22014  BTID local_btid;
22015  PRUNING_CONTEXT pcontext;
22016  bool clear_pcontext = false;
22017  OR_CLASSREP *classrepr = NULL;
22018  int classrepr_cacheindex = -1;
22019  BTREE_SEARCH ret_search;
22020 
22021  db_make_null (&val);
22022  OID_SET_NULL (&unique_oid);
22023 
22024  /* SQL standard defines as follows:
22025  * If no <match type> was specified then, for each row R1 of the referencing table,
22026  * either at least one of the values of the referencing columns in R1 shall be a null value,
22027  * or the value of each referencing column in R1 shall be equal to the value of
22028  * the corresponding referenced column in some row of the referenced table.
22029  * Please notice that we don't currently support <match type>.
22030  */
22031  if (n_attrs > 1)
22032  {
22033  has_null = btree_multicol_key_has_null (keyval);
22034  }
22035  else
22036  {
22037  has_null = DB_IS_NULL (keyval);
22038  }
22039 
22040  if (has_null == true)
22041  {
22042  return NO_ERROR;
22043  }
22044 
22045  /* get class representation to find partition information */
22046  classrepr = heap_classrepr_get (thread_p, pk_cls_oid, NULL, NULL_REPRID, &classrepr_cacheindex);
22047  if (classrepr == NULL)
22048  {
22049  goto exit_on_error;
22050  }
22051 
22052  if (classrepr->has_partition_info > 0)
22053  {
22054  (void) partition_init_pruning_context (&pcontext);
22055  clear_pcontext = true;
22056 
22057  ret = partition_load_pruning_context (thread_p, pk_cls_oid, DB_PARTITIONED_CLASS, &pcontext);
22058  if (ret != NO_ERROR)
22059  {
22060  goto exit_on_error;
22061  }
22062  }
22063 
22064  BTID_COPY (&local_btid, pk_btid);
22065  COPY_OID (&part_oid, pk_cls_oid);
22066 
22067  if (classrepr->has_partition_info > 0 && pcontext.partitions != NULL)
22068  {
22069  ret = partition_prune_unique_btid (&pcontext, keyval, &part_oid, &class_hfid, &local_btid);
22070  if (ret != NO_ERROR)
22071  {
22072  goto exit_on_error;
22073  }
22074  }
22075 
22076  ret_search = xbtree_find_unique (thread_p, &local_btid, S_SELECT_WITH_LOCK, keyval, &part_oid, &unique_oid, true);
22077  if (ret_search == BTREE_KEY_NOTFOUND)
22078  {
22079  char *val_print = NULL;
22080 
22081  val_print = pr_valstring (keyval);
22082  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_FK_INVALID, 2, fk_name, (val_print ? val_print : "unknown value"));
22083  if (val_print)
22084  {
22085  db_private_free (thread_p, val_print);
22086  }
22087  ret = ER_FK_INVALID;
22088  goto exit_on_error;
22089  }
22090  else if (ret_search == BTREE_ERROR_OCCURRED)
22091  {
22092  ASSERT_ERROR_AND_SET (ret);
22093  goto exit_on_error;
22094  }
22095 
22096  assert (ret_search == BTREE_KEY_FOUND);
22097  /* TODO: For read committed... Do we need to keep the lock? */
22098 
22099  if (clear_pcontext == true)
22100  {
22101  partition_clear_pruning_context (&pcontext);
22102  }
22103  if (classrepr != NULL)
22104  {
22105  heap_classrepr_free_and_init (classrepr, &classrepr_cacheindex);
22106  }
22107 
22108  return ret;
22109 
22110 exit_on_error:
22111 
22112  if (clear_pcontext == true)
22113  {
22114  partition_clear_pruning_context (&pcontext);
22115  }
22116  if (classrepr != NULL)
22117  {
22118  heap_classrepr_free_and_init (classrepr, &classrepr_cacheindex);
22119  }
22120 
22121  return (ret == NO_ERROR && (ret = er_errid ()) == NO_ERROR) ? ER_FAILED : ret;
22122 }
22123 
22124 /*
22125  * btree_scan_for_show_index_capacity () - scan index capacity information
22126  * return: S_ERROR, S_SUCCESS, or S_END
22127  *
22128  * thread_p(in):
22129  * out_values(out):
22130  * out_cnt(in):
22131  * class_name(in);
22132  * index_p(in);
22133  */
22134 static SCAN_CODE
22135 btree_scan_for_show_index_capacity (THREAD_ENTRY * thread_p, DB_VALUE ** out_values, int out_cnt,
22136  const char *class_name, OR_INDEX * index_p)
22137 {
22138  int idx = 0;
22139  int error = NO_ERROR;
22140  BTREE_CAPACITY cpc;
22141  PAGE_PTR root_page_ptr = NULL;
22142  VPID root_vpid;
22143  char buf[256] = { 0 };
22144  BTID *btid_p = NULL;
22145 
22146  assert_release (index_p != NULL);
22147 
22148  /* get btree capacity */
22149  btid_p = &index_p->btid;
22150  root_vpid.pageid = btid_p->root_pageid;
22151  root_vpid.volid = btid_p->vfid.volid;
22152  root_page_ptr = pgbuf_fix (thread_p, &root_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
22153  if (root_page_ptr == NULL)
22154  {
22155  ASSERT_ERROR_AND_SET (error);
22156  goto cleanup;
22157  }
22158 
22159  error = btree_index_capacity (thread_p, btid_p, &cpc);
22160  if (error != NO_ERROR)
22161  {
22162  goto cleanup;
22163  }
22164 
22165  /* scan index capacity into out_values */
22166  error = db_make_string_copy (out_values[idx], class_name);
22167  idx++;
22168  if (error != NO_ERROR)
22169  {
22170  goto cleanup;
22171  }
22172 
22173  error = db_make_string_copy (out_values[idx], index_p->btname);
22174  idx++;
22175  if (error != NO_ERROR)
22176  {
22177  goto cleanup;
22178  }
22179 
22180  (void) btid_to_string (buf, sizeof (buf), btid_p);
22181  error = db_make_string_copy (out_values[idx], buf);
22182  idx++;
22183  if (error != NO_ERROR)
22184  {
22185  goto cleanup;
22186  }
22187 
22188  db_make_int (out_values[idx], cpc.dis_key_cnt);
22189  idx++;
22190 
22191  db_make_int (out_values[idx], cpc.tot_val_cnt);
22192  idx++;
22193 
22194  db_make_int (out_values[idx], cpc.avg_val_per_key);
22195  idx++;
22196 
22197  db_make_int (out_values[idx], cpc.leaf_pg_cnt);
22198  idx++;
22199 
22200  db_make_int (out_values[idx], cpc.nleaf_pg_cnt);
22201  idx++;
22202 
22203  db_make_int (out_values[idx], cpc.tot_pg_cnt);
22204  idx++;
22205 
22206  db_make_int (out_values[idx], cpc.height);
22207  idx++;
22208 
22209  db_make_int (out_values[idx], cpc.avg_key_len);
22210  idx++;
22211 
22212  db_make_int (out_values[idx], cpc.avg_rec_len);
22213  idx++;
22214 
22215  (void) util_byte_to_size_string (buf, 64, (UINT64) (cpc.tot_space));
22216  error = db_make_string_copy (out_values[idx], buf);
22217  idx++;
22218  if (error != NO_ERROR)
22219  {
22220  goto cleanup;
22221  }
22222 
22223  (void) util_byte_to_size_string (buf, 64, (UINT64) (cpc.tot_used_space));
22224  error = db_make_string_copy (out_values[idx], buf);
22225  idx++;
22226  if (error != NO_ERROR)
22227  {
22228  goto cleanup;
22229  }
22230 
22231  (void) util_byte_to_size_string (buf, 64, (UINT64) (cpc.tot_free_space));
22232  error = db_make_string_copy (out_values[idx], buf);
22233  idx++;
22234  if (error != NO_ERROR)
22235  {
22236  goto cleanup;
22237  }
22238 
22239  db_make_int (out_values[idx], cpc.avg_pg_key_cnt);
22240  idx++;
22241 
22242  (void) util_byte_to_size_string (buf, 64, (UINT64) (cpc.avg_pg_free_sp));
22243  error = db_make_string_copy (out_values[idx], buf);
22244  idx++;
22245  if (error != NO_ERROR)
22246  {
22247  goto cleanup;
22248  }
22249 
22250  assert (idx == out_cnt);
22251 
22252 cleanup:
22253 
22254  if (root_page_ptr != NULL)
22255  {
22256  pgbuf_unfix_and_init (thread_p, root_page_ptr);
22257  }
22258 
22259  return (error == NO_ERROR) ? S_SUCCESS : S_ERROR;
22260 }
22261 
22262 static bool
22264 {
22265  assert (a != NULL);
22266  assert (b != NULL);
22267 
22268 #if !defined(SERVER_MODE)
22269  assert_release (LSA_EQ (a, b));
22270 #endif
22271 
22272  return LSA_EQ (a, b) ? true : false;
22273 }
22274 
22275 /*
22276  * btree_key_find_first_visible_row_from_all_ovf () - MVCC find first visible row in OID overflow pages
22277  * return: whether the visible row has been found
22278  * btid_int(in): B+tree index identifier
22279  * first_ovfl_vpid(in): First overflow vpid
22280  * oid(out): Object identifier of the visible row or NULL_OID
22281  * class_oid(out): Object class identifier
22282  */
22283 static BTREE_SEARCH
22285  OID * oid, OID * class_oid)
22286 {
22287  RECDES ovfl_copy_rec;
22288  char ovfl_copy_rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
22289  VPID next_ovfl_vpid;
22290  PAGE_PTR ovfl_page = NULL;
22292 
22293  assert (btid_int != NULL);
22294  assert (first_ovfl_vpid != NULL);
22295  assert (oid != NULL && class_oid != NULL);
22296 
22297  ovfl_copy_rec.area_size = DB_PAGESIZE;
22298  ovfl_copy_rec.data = PTR_ALIGN (ovfl_copy_rec_buf, BTREE_MAX_ALIGN);
22299  next_ovfl_vpid = *first_ovfl_vpid;
22300 
22301  /* find first visible OID into overflow page */
22302  while (!VPID_ISNULL (&next_ovfl_vpid))
22303  {
22304  ovfl_page = pgbuf_fix (thread_p, &next_ovfl_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
22305  if (ovfl_page == NULL)
22306  {
22307  ASSERT_ERROR ();
22308  goto error;
22309  }
22310 
22311  (void) pgbuf_check_page_ptype (thread_p, ovfl_page, PAGE_BTREE);
22312 
22313  if (spage_get_record (thread_p, ovfl_page, 1, &ovfl_copy_rec, COPY) != S_SUCCESS)
22314  {
22315  goto error;
22316  }
22317  assert (ovfl_copy_rec.length % 4 == 0);
22318 
22319  result =
22320  btree_key_find_first_visible_row (thread_p, btid_int, &ovfl_copy_rec, 0, BTREE_OVERFLOW_NODE, oid, class_oid,
22321  -1);
22322  if (result == BTREE_ERROR_OCCURRED)
22323  {
22324  goto error;
22325  }
22326  else if (result != BTREE_KEY_NOTFOUND)
22327  {
22328  pgbuf_unfix_and_init (thread_p, ovfl_page);
22329  return result;
22330  }
22331 
22332  btree_get_next_overflow_vpid (thread_p, ovfl_page, &next_ovfl_vpid);
22333  pgbuf_unfix_and_init (thread_p, ovfl_page);
22334  }
22335 
22336  return BTREE_KEY_NOTFOUND;
22337 
22338 error:
22339 
22340  if (ovfl_page != NULL)
22341  {
22342  pgbuf_unfix_and_init (thread_p, ovfl_page);
22343  }
22344 
22345  return BTREE_ERROR_OCCURRED;
22346 }
22347 
22348 /*
22349  * btree_rv_undo_global_unique_stats_commit () -
22350  * return: int
22351  * recv(in): Recovery structure
22352  *
22353  * Note: Decrement the in-memory global unique statistics.
22354  */
22355 int
22357 {
22358  char *datap;
22359  int num_nulls, num_oids, num_keys;
22360  BTID btid;
22361 
22362  assert (recv->length >= (3 * OR_INT_SIZE) + OR_BTID_ALIGNED_SIZE);
22363 
22364  /* unpack the root statistics */
22365  datap = (char *) recv->data;
22366 
22367  OR_GET_BTID (datap, &btid);
22368  datap += OR_BTID_ALIGNED_SIZE;
22369 
22370  num_nulls = OR_GET_INT (datap);
22371  datap += OR_INT_SIZE;
22372 
22373  num_oids = OR_GET_INT (datap);
22374  datap += OR_INT_SIZE;
22375 
22376  num_keys = OR_GET_INT (datap);
22377  datap += OR_INT_SIZE;
22378 
22379  /* Because this log record is logical, it will be processed even if the B-tree was deleted. If the B-tree was deleted
22380  * then skip update of unique statistics in global hash. */
22382  {
22383  /* Only in recovery this is possible */
22384  if (disk_is_page_sector_reserved (thread_p, btid.vfid.volid, btid.root_pageid) != DISK_VALID)
22385  {
22386  /* The B-tree was already deleted */
22387  return NO_ERROR;
22388  }
22389  }
22390  else
22391  {
22392  /* This should not happen */
22394  }
22395  if (logtb_update_global_unique_stats_by_delta (thread_p, &btid, -num_oids, -num_nulls, -num_keys, false) != NO_ERROR)
22396  {
22397  goto error;
22398  }
22399 
22401  {
22403  "Recover undo unique statistics for index (%d, %d|%d): "
22404  "nulls=%d, oids=%d, keys=%d. LSA=%lld|%d.\n", btid.root_pageid, btid.vfid.volid, btid.vfid.fileid,
22405  num_nulls, num_oids, num_keys, (long long int) log_Gl.unique_stats_table.curr_rcv_rec_lsa.pageid,
22407  }
22408 
22409  return NO_ERROR;
22410 
22411 error:
22413 
22414  return ER_GENERIC_ERROR;
22415 }
22416 
22417 /*
22418  * btree_rv_redo_global_unique_stats_commit () -
22419  * return: int
22420  * recv(in): Recovery structure
22421  *
22422  * Note: Recover the in-memory global unique statistics.
22423  */
22424 int
22426 {
22427  char *datap;
22428  int num_nulls, num_oids, num_keys;
22429  BTID btid;
22430 
22431  assert (recv->length >= (3 * OR_INT_SIZE) + OR_BTID_ALIGNED_SIZE);
22432 
22433  /* unpack the root statistics */
22434  datap = (char *) recv->data;
22435 
22436  OR_GET_BTID (datap, &btid);
22437  datap += OR_BTID_ALIGNED_SIZE;
22438 
22439  num_nulls = OR_GET_INT (datap);
22440  datap += OR_INT_SIZE;
22441 
22442  num_oids = OR_GET_INT (datap);
22443  datap += OR_INT_SIZE;
22444 
22445  num_keys = OR_GET_INT (datap);
22446  datap += OR_INT_SIZE;
22447 
22448  /* Because this log record is logical, it will be processed even if the B-tree was deleted. If the B-tree was deleted
22449  * then skip update of unique statistics in global hash. */
22450  if (disk_is_page_sector_reserved (thread_p, btid.vfid.volid, btid.root_pageid) != DISK_VALID)
22451  {
22452  /* The B-tree was already deleted */
22453  return NO_ERROR;
22454  }
22455  if (logtb_rv_update_global_unique_stats_by_abs (thread_p, &btid, num_oids, num_nulls, num_keys) != NO_ERROR)
22456  {
22457  goto error;
22458  }
22459 
22461  {
22463  "Recover redo unique statistics for index (%d, %d|%d): "
22464  "nulls=%d, oids=%d, keys=%d. LSA=%lld|%d.\n", btid.root_pageid, btid.vfid.volid, btid.vfid.fileid,
22465  num_nulls, num_oids, num_keys, (long long int) log_Gl.unique_stats_table.curr_rcv_rec_lsa.pageid,
22467  }
22468 
22469  return NO_ERROR;
22470 
22471 error:
22473 
22474  return ER_GENERIC_ERROR;
22475 }
22476 
22477 /*
22478  * btree_search_key_and_apply_functions () - B-tree internal function to traverse the tree in the direction given by
22479  * a key and calling three types of function: one to fix/handle root page,
22480  * one on the traversed nodes and one on the leaf node pointed by key.
22481  *
22482  * return : Error code.
22483  * thread_p (in) : Thread entry.
22484  * btid (in) : B-tree identifier.
22485  * btid_int (out) : Output b-tree info if not NULL.
22486  * key (in) : Search key value.
22487  * root_function (in) : Function called to fix/process root node.
22488  * root_args (in/out) : Arguments for root function.
22489  * advance_function (in) : Function called to advance and process nodes discovered nodes.
22490  * advance_args (in/out) : Arguments for advance function.
22491  * key_function (in) : Function to process key record (and its leaf and overflow nodes).
22492  * process_key_args (in/out) : Arguments for key function.
22493  * search_key (out) : Search key result.
22494  * leaf_page_ptr (out) : If not NULL, it will output the leaf node page where key lead the search.
22495  */
22496 static int
22498  BTREE_ROOT_WITH_KEY_FUNCTION * root_function, void *root_args,
22499  BTREE_ADVANCE_WITH_KEY_FUNCTION * advance_function, void *advance_args,
22500  BTREE_PROCESS_KEY_FUNCTION * key_function, void *process_key_args,
22501  BTREE_SEARCH_KEY_HELPER * search_key, PAGE_PTR * leaf_page_ptr)
22502 {
22503  PAGE_PTR crt_page = NULL; /* Currently fixed page. */
22504  PAGE_PTR advance_page = NULL; /* Next level page. */
22505  int error_code = NO_ERROR; /* Error code. */
22506  BTID_INT local_btid_int; /* Store b-tree info if b-tree info pointer argument is NULL. */
22507  bool is_leaf = false; /* Set to true if crt_page is a leaf node. */
22508  bool stop = false; /* Set to true to stop advancing in b-tree. */
22509  bool restart = false; /* Set to true to restart b-tree traversal from root. */
22510  BTREE_SEARCH_KEY_HELPER local_search_key; /* Store search key result if search key pointer argument is NULL. */
22511 
22512  /* Assert expected arguments. */
22513  assert (btid != NULL);
22514  assert (key != NULL);
22515  assert (advance_function != NULL);
22516 
22517  if (leaf_page_ptr != NULL)
22518  {
22519  /* Initialize leaf_page_ptr as NULL. */
22520  *leaf_page_ptr = NULL;
22521  }
22522 
22523  if (btid_int == NULL)
22524  {
22525  /* Use local variable to store b-tree info. */
22526  btid_int = &local_btid_int;
22527  }
22528  if (search_key == NULL)
22529  {
22530  /* Use local variable to store search key result. */
22531  search_key = &local_search_key;
22532  }
22533 
22534 start_btree_traversal:
22535  /* Traversal starting point. The function will try to locate key while calling 3 types of manipulation functions: 1.
22536  * Root function: It may be used to fix and modify root page. If no such function is provided,
22537  * btree_get_root_with_key is used by default. 2. Advance function: It is used to determine the path to follow in
22538  * order to locate the key in leaf node. It can manipulate the nodes it passes (merge, split). 3. Process key
22539  * function: It must process the leaf and overflow key/OIDs pages where key is/should be found. It can be a read-only
22540  * function or it can insert/delete/modify the key. */
22541 
22542  /* Reset restart flag. */
22543  restart = false;
22544  is_leaf = false;
22545  search_key->result = BTREE_KEY_NOTFOUND;
22546  search_key->slotid = NULL_SLOTID;
22547 
22548  /* Make sure current page has been unfixed before restarting traversal. */
22549  if (crt_page != NULL)
22550  {
22551  pgbuf_unfix_and_init (thread_p, crt_page);
22552  }
22553 
22554  /* Fix b-tree root page. */
22555  if (root_function == NULL)
22556  {
22557  /* No root function is provided. Use default function that gets root page and b-tree data
22558  * (btree_get_root_with_key). */
22559  root_function = btree_get_root_with_key;
22560  }
22561  /* Call root function. */
22562  error_code =
22563  root_function (thread_p, btid, btid_int, key, &crt_page, &is_leaf, search_key, &stop, &restart, root_args);
22564  if (error_code != NO_ERROR)
22565  {
22566  ASSERT_ERROR ();
22567  goto error;
22568  }
22569  if (stop)
22570  {
22571  /* Stop condition was met. Do not advance. */
22572  goto end;
22573  }
22574  if (restart)
22575  {
22576  /* Restart from top. */
22577  goto start_btree_traversal;
22578  }
22579  /* Root page must be fixed. */
22580  assert (crt_page != NULL);
22581 
22582  /* Advance until leaf page is found. */
22583  while (!is_leaf)
22584  {
22585  /* Call advance function. */
22586  error_code =
22587  advance_function (thread_p, btid_int, key, &crt_page, &advance_page, &is_leaf, search_key, &stop, &restart,
22588  advance_args);
22589  if (error_code != NO_ERROR)
22590  {
22591  /* Error! */
22592  ASSERT_ERROR ();
22593  goto error;
22594  }
22595  if (stop)
22596  {
22597  /* Stop search here */
22598  goto end;
22599  }
22600  if (restart)
22601  {
22602  /* Search must be restarted from top. */
22603  if (advance_page != NULL)
22604  {
22605  pgbuf_unfix_and_init (thread_p, advance_page);
22606  }
22607  goto start_btree_traversal;
22608  }
22609 
22610  /* Advance if not leaf. */
22611  if (!is_leaf)
22612  {
22613  /* Free current node page and set advance_page as current. */
22614  assert (advance_page != NULL);
22615  if (crt_page != NULL)
22616  {
22617  pgbuf_unfix (thread_p, crt_page);
22618  }
22619  crt_page = advance_page;
22620  advance_page = NULL;
22621  }
22622  }
22623 
22624  /* Leaf page is reached. */
22625 
22626  assert (is_leaf && !stop && !restart);
22627  assert (crt_page != NULL);
22628  assert (btree_get_node_header (thread_p, crt_page) != NULL
22629  && btree_get_node_header (thread_p, crt_page)->node_level == 1);
22630 
22631  if (key_function != NULL)
22632  {
22633  /* Call key_function. */
22634  /* Key args must be also provided. */
22635  assert (process_key_args != NULL);
22636  error_code = key_function (thread_p, btid_int, key, &crt_page, search_key, &restart, process_key_args);
22637  if (error_code != NO_ERROR)
22638  {
22639  /* Error! */
22640  ASSERT_ERROR ();
22641  goto error;
22642  }
22643  if (restart)
22644  {
22645  /* Search must be restarted. */
22646  goto start_btree_traversal;
22647  }
22648  }
22649 
22650  /* Finished */
22651 
22652 end:
22653  /* Safe guard: don't leak fixed pages. */
22654  assert (advance_page == NULL);
22655 
22656  if (is_leaf && leaf_page_ptr != NULL)
22657  {
22658  /* Output leaf page. */
22659  *leaf_page_ptr = crt_page;
22660  }
22661  else if (crt_page != NULL)
22662  {
22663  /* Unfix leaf page. */
22664  pgbuf_unfix (thread_p, crt_page);
22665  }
22666  return NO_ERROR;
22667 
22668 error:
22669  /* Error! */
22670  /* Unfix all used pages. */
22671  if (crt_page != NULL)
22672  {
22673  pgbuf_unfix (thread_p, crt_page);
22674  }
22675  if (advance_page != NULL)
22676  {
22677  pgbuf_unfix (thread_p, advance_page);
22678  }
22679  assert (error_code != NO_ERROR);
22680  ASSERT_ERROR ();
22681  return error_code;
22682 }
22683 
22684 /*
22685  * btree_get_root_with_key () - BTREE_ROOT_WITH_KEY_FUNCTION used by default to read root page header and get b-tree
22686  * data from header.
22687  *
22688  * return : Error code.
22689  * thread_p (in) : Thread entry.
22690  * btid (in) : B-tree identifier.
22691  * btid_int (out) : BTID_INT (B-tree data).
22692  * key (in) : Key value.
22693  * root_page (out) : Output b-tree root page.
22694  * is_leaf (out) : Output true if root is leaf page.
22695  * search_key (out) : Output key search result (if root is also leaf).
22696  * stop (out) : Output true if advancing in b-tree should stop.
22697  * restart (out) : Output true if advancing in b-tree should be restarted.
22698  * other_args (in/out) : BTREE_ROOT_WITH_KEY_ARGS (outputs BTID_INT).
22699  */
22700 static int
22702  PAGE_PTR * root_page, bool * is_leaf, BTREE_SEARCH_KEY_HELPER * search_key, bool * stop,
22703  bool * restart, void *other_args)
22704 {
22705  BTREE_ROOT_HEADER *root_header = NULL;
22706  int error_code = NO_ERROR;
22707 
22708  /* Assert expected arguments. */
22709  assert (btid != NULL);
22710  assert (key != NULL);
22711  assert (root_page != NULL && *root_page == NULL);
22712  assert (is_leaf != NULL);
22713  assert (search_key != NULL);
22714 
22715  bool reuse_btid_int = other_args ? *((bool *) other_args) : false;
22716 
22717  /* Get root page and BTID_INT. */
22718  *root_page =
22719  btree_fix_root_with_info (thread_p, btid, PGBUF_LATCH_READ, NULL, &root_header, (reuse_btid_int ? NULL : btid_int));
22720  if (*root_page == NULL)
22721  {
22722  /* Error! */
22723  ASSERT_ERROR_AND_SET (error_code);
22724  return error_code;
22725  }
22726  assert (btid_int != NULL);
22727 
22728  if (DB_VALUE_TYPE (key) == DB_TYPE_MIDXKEY && key->data.midxkey.domain == NULL)
22729  {
22730  /* Use domain from b-tree info. */
22731  key->data.midxkey.domain = btid_int->key_type;
22732  }
22733 
22734  *is_leaf = (root_header->node.node_level == 1);
22735  if (*is_leaf)
22736  {
22737  /* Check if key is found in page. */
22738  error_code = btree_search_leaf_page (thread_p, btid_int, *root_page, key, search_key);
22739  if (error_code != NO_ERROR)
22740  {
22741  ASSERT_ERROR ();
22742  return error_code;
22743  }
22744  }
22745  /* Success. */
22746  return NO_ERROR;
22747 }
22748 
22749 /*
22750  * btree_advance_and_find_key () - Fix next node in b-tree following given key.
22751  * If argument is leaf-node, return if key is found and the slot if key instead.
22752  *
22753  * return : Error code.
22754  * thread_p (in) : Thread entry.
22755  * btid_int (in) : B-tree data.
22756  * key (in) : Search key value.
22757  * crt_page (in) : Page of current node.
22758  * advance_to_page (out) : Fixed page of child node found by following key.
22759  * is_leaf (out) : Output true if current page is leaf node.
22760  * key_slotid (out) : Output slotid of key if found, otherwise NULL_SLOTID.
22761  * stop (out) : Output true if advancing in b-tree should be stopped.
22762  * restart (out) : Output true if advancing in b-tree should be restarted from top.
22763  * other_args (in/out) : Not used.
22764  */
22765 static int
22767  PAGE_PTR * advance_to_page, bool * is_leaf, BTREE_SEARCH_KEY_HELPER * search_key,
22768  bool * stop, bool * restart, void *other_args)
22769 {
22770  BTREE_NODE_HEADER *node_header;
22771  BTREE_NODE_TYPE node_type;
22772  VPID child_vpid;
22773  int error_code;
22774 
22775  assert (btid_int != NULL);
22776  assert (key != NULL);
22777  assert (crt_page != NULL && *crt_page != NULL);
22778  assert (advance_to_page != NULL && *advance_to_page == NULL);
22779  assert (search_key != NULL);
22780 
22781  /* Get node header. */
22782  node_header = btree_get_node_header (thread_p, *crt_page);
22783  if (node_header == NULL)
22784  {
22785  assert_release (false);
22786  return ER_FAILED;
22787  }
22788  node_type = node_header->node_level > 1 ? BTREE_NON_LEAF_NODE : BTREE_LEAF_NODE;
22789 
22790  if (node_type == BTREE_LEAF_NODE)
22791  {
22792  /* Leaf level was reached, stop advancing. */
22793  *is_leaf = true;
22794 
22795  /* Is key in page */
22796  error_code = btree_search_leaf_page (thread_p, btid_int, *crt_page, key, search_key);
22797  if (error_code != NO_ERROR)
22798  {
22799  ASSERT_ERROR ();
22800  return error_code;
22801  }
22802 
22803  /* Make sure slot ID is set if key was found. */
22804  assert (search_key->result != BTREE_KEY_FOUND || search_key->slotid != NULL_SLOTID);
22805  }
22806  else
22807  {
22808  /* Non-leaf page. */
22809  *is_leaf = false;
22810 
22811  error_code = btree_search_nonleaf_page (thread_p, btid_int, *crt_page, key, &search_key->slotid, &child_vpid,
22812  NULL);
22813  if (error_code != NO_ERROR)
22814  {
22815  ASSERT_ERROR ();
22816  return error_code;
22817  }
22818 
22819  /* Advance to child. */
22820  assert (!VPID_ISNULL (&child_vpid));
22821  *advance_to_page = pgbuf_fix (thread_p, &child_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
22822  if (*advance_to_page == NULL)
22823  {
22824  /* Error fixing child. */
22825  ASSERT_ERROR_AND_SET (error_code);
22826  return error_code;
22827  }
22828  }
22829 
22830  /* Success. */
22831  return NO_ERROR;
22832 }
22833 
22834 /*
22835  * btree_key_find_unique_version_oid () - Find the visible object version from key. Since the index is unique,
22836  * there must be at most one visible version.
22837  *
22838  * return : Error code.
22839  * thread_p (in) : Thread entry.
22840  * btid_int (in) : B-tree info.
22841  * key (in) : Key value.
22842  * leaf_page (in/out) : Leaf page pointer.
22843  * search_key (in) : Search key result.
22844  * restart (out) : Set to true if index must be traversed again from root node.
22845  * other_args (in/out) : BTREE_FIND_UNIQUE_HELPER *.
22846  */
22847 static int
22849  BTREE_SEARCH_KEY_HELPER * search_key, bool * restart, void *other_args)
22850 {
22851  RECDES record; /* Key record (leaf or overflow). */
22852  LEAF_REC leaf_info; /* Leaf record info (key_len & ovfl). */
22853  int error_code = NO_ERROR; /* Error code. */
22854  int offset; /* Offset in record data where key is ended and OID list is started. */
22855  /* Helper used to process record and find visible object. */
22857  /* Helper used to describe find unique process and to output results. */
22858  BTREE_FIND_UNIQUE_HELPER *find_unique_helper = (BTREE_FIND_UNIQUE_HELPER *) other_args;
22859  OID unique_oid = OID_INITIALIZER; /* OID of unique object. */
22860  bool clear_key = false; /* Clear key */
22861 
22862  /* Assert expected arguments. */
22863  assert (btid_int != NULL);
22864  assert (key != NULL);
22865  assert (leaf_page != NULL && *leaf_page != NULL);
22866  assert (find_unique_helper != NULL);
22867 
22868  PERF_UTIME_TRACKER_TIME (thread_p, &find_unique_helper->time_track, PSTAT_BT_TRAVERSE);
22869  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &find_unique_helper->time_track, PSTAT_BT_FIND_UNIQUE_TRAVERSE);
22870 
22871  /* Initialize find unique helper. */
22872  find_unique_helper->found_object = false;
22873 
22874  /* Normally, this function should be called only on unique indexes. However there are some exceptions in catalog
22875  * classes (e.g. db_user, db_class) that have alternative mechanisms to ensure unicity. They still call find unique
22876  * on index to quickly get OID with name. This function does work for these cases. So, asserting index is unique is
22877  * not necessary here. */
22878 
22879  if (search_key->result != BTREE_KEY_FOUND)
22880  {
22881  /* Key was not found. */
22882  return NO_ERROR;
22883  }
22884 
22885  /* Find unique visible object version. Since the index is unique, there can only be one visible version in the key.
22886  * Parse all key objects until the one visible to current transaction is found. NOTE: The newest object version in
22887  * the key is always kept first. This is also usually the object being manipulated by running transactions. However,
22888  * this isn't always the visible object for current transaction. The visible version for current transaction may be
22889  * deleted by another, but still visible due to snapshot. */
22890 
22891  /* Get key leaf record. */
22892  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &record, PEEK) != S_SUCCESS)
22893  {
22894  /* Unexpected error. */
22895  assert_release (false);
22896  return ER_FAILED;
22897  }
22898  /* Read key leaf record. */
22899  error_code =
22900  btree_read_record (thread_p, btid_int, *leaf_page, &record, NULL, &leaf_info, BTREE_LEAF_NODE, &clear_key, &offset,
22901  PEEK_KEY_VALUE, NULL);
22902  if (error_code != NO_ERROR)
22903  {
22904  ASSERT_ERROR ();
22905  return error_code;
22906  }
22907 
22908  /* Initialize the helper for btree_record_satisfies_snapshot. */
22909  /* OID buffer. Only one OID will be copied. */
22910  rec_process_helper.oid_ptr = &unique_oid;
22911  rec_process_helper.oid_capacity = 1;
22912  /* MVCC snapshot. */
22913  rec_process_helper.snapshot = find_unique_helper->snapshot;
22914 
22915  /* Match class OID. */
22916  COPY_OID (&rec_process_helper.match_class_oid, &find_unique_helper->match_class_oid);
22917 
22918  /* Call btree_record_satisfies_snapshot on each object found in key. */
22919  error_code =
22920  btree_key_process_objects (thread_p, btid_int, &record, offset, &leaf_info, btree_record_satisfies_snapshot,
22921  &rec_process_helper);
22922  if (error_code != NO_ERROR)
22923  {
22924  /* Error! */
22925  ASSERT_ERROR ();
22926  return error_code;
22927  }
22928  if (rec_process_helper.oid_cnt > 0)
22929  {
22930  /* Found visible object. */
22931  assert (rec_process_helper.oid_cnt == 1);
22932  assert (!OID_ISNULL (&unique_oid));
22933  find_unique_helper->found_object = true;
22934  COPY_OID (&find_unique_helper->oid, &unique_oid);
22935  }
22936 
22937  PERF_UTIME_TRACKER_TIME (thread_p, &find_unique_helper->time_track, PSTAT_BT_LEAF);
22938  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &find_unique_helper->time_track, PSTAT_BT_FIND_UNIQUE);
22939  return NO_ERROR;
22940 }
22941 
22942 /*
22943  * btree_key_find_and_lock_unique () - Find key and lock its unique non-dirty version.
22944  *
22945  * return : Error code.
22946  * thread_p (in) : Thread entry.
22947  * btid_int (in) : B-tree info.
22948  * key (in) : Key value.
22949  * leaf_page (in/out) : Leaf node page (where key would normally belong).
22950  * search_key (in) : Search key result.
22951  * restart (out) : Set to true if b-tree traversal must be restarted from root.
22952  * other_args (in/out) : BTREE_FIND_UNIQUE_HELPER *.
22953  */
22954 static int
22956  BTREE_SEARCH_KEY_HELPER * search_key, bool * restart, void *other_args)
22957 {
22958  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
22959  {
22960  return btree_key_find_and_lock_unique_of_unique (thread_p, btid_int, key, leaf_page, search_key, restart,
22961  other_args);
22962  }
22963  else
22964  {
22965  return btree_key_find_and_lock_unique_of_non_unique (thread_p, btid_int, key, leaf_page, search_key, restart,
22966  other_args);
22967  }
22968 }
22969 
22970 /*
22971  * btree_key_find_and_lock_unique_of_unique () - Find key and lock its first object (if not deleted or dirty).
22972  *
22973  * return : Error code.
22974  * thread_p (in) : Thread entry.
22975  * btid_int (in) : B-tree info.
22976  * key (in) : Key value.
22977  * leaf_page (in/out) : Leaf node page (where key would normally belong).
22978  * search_key (in) : Search key result.
22979  * restart (out) : Set to true if b-tree traversal must be restarted from root.
22980  * other_args (in/out) : BTREE_FIND_UNIQUE_HELPER *.
22981  */
22982 static int
22984  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
22985  void *other_args)
22986 {
22987  OID unique_oid, unique_class_oid; /* Unique object OID and class OID. */
22988  /* Unique object MVCC info. */
22990  /* Converted from b-tree MVCC info to check if object satisfies delete. */
22992  /* Helper used to describe find unique process and to output results. */
22993  BTREE_FIND_UNIQUE_HELPER *find_unique_helper = NULL;
22994  RECDES record; /* Key leaf record. */
22995  int error_code = NO_ERROR; /* Error code. */
22996  MVCC_SATISFIES_DELETE_RESULT satisfies_delete; /* Satisfies delete result. */
22997 #if defined (SERVER_MODE)
22998  /* Next variables are not required for stand-alone mode. */
22999  bool try_cond_lock = false; /* Try conditional lock. */
23000  bool was_page_refixed = false; /* Set to true if conditional lock failed and page had to be re-fixed. */
23001 #endif /* SERVER_MODE */
23002 
23003  /* Assert expected arguments. */
23004  assert (btid_int != NULL);
23005  assert (BTREE_IS_UNIQUE (btid_int->unique_pk));
23006  assert (key != NULL);
23007  assert (leaf_page != NULL && *leaf_page != NULL);
23008  assert (restart != NULL);
23009  assert (other_args != NULL);
23010 
23011  /* other_args is find unique helper. */
23012  find_unique_helper = (BTREE_FIND_UNIQUE_HELPER *) other_args;
23013 
23014  PERF_UTIME_TRACKER_TIME (thread_p, &find_unique_helper->time_track, PSTAT_BT_TRAVERSE);
23015  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &find_unique_helper->time_track, PSTAT_BT_FIND_UNIQUE_TRAVERSE);
23016 
23017  /* Locking is required. */
23018  assert (find_unique_helper->lock_mode >= S_LOCK);
23019 
23020  /* Assume result is BTREE_KEY_NOTFOUND. It will be set to BTREE_KEY_FOUND if key is found and its first object is
23021  * successfully locked. */
23022  find_unique_helper->found_object = false;
23023 
23024  if (search_key->result != BTREE_KEY_FOUND)
23025  {
23026  /* Key doesn't exist. Exit. */
23027  goto error_or_not_found;
23028  }
23029 
23030  /* Lock key non-dirty version to protect it. Non-dirty or newest key version is always kept first. Locking object is
23031  * possible if object is not deleted and it is not dirty (its inserter/deleter is not active). If inserter or
23032  * deleter is active, or if conditional lock on object failed, current transaction must suspend until the object lock
23033  * holder is completed. This also means unfixing leaf page first. Current algorithm tries to avoid traversing the
23034  * b-tree back from root after resume. If conditional lock on object fails, leaf node must be unfixed and then fixed
23035  * again after object is locked. If page no longer exists or if the page is no longer usable (key is not in page),
23036  * the process is restarted (while holding the lock however). NOTE: Stand-alone mode doesn't require locking. It
23037  * should only check whether the first key object is deleted or not. */
23038 
23039  /* Initialize unique_oid */
23040  OID_SET_NULL (&unique_oid);
23041 
23042  /* Loop until first object is successfully locked or until it is found as deleted. */
23043  while (true)
23044  {
23045  /* Safe guard: leaf node page must be fixed. */
23046  assert (*leaf_page != NULL);
23047 
23048  /* Get key record. */
23049  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &record, PEEK) != S_SUCCESS)
23050  {
23051  /* Unexpected error. */
23052  assert (false);
23054  error_code = ER_FAILED;
23055  goto error_or_not_found;
23056  }
23057  /* Get first object */
23058  error_code = btree_leaf_get_first_object (btid_int, &record, &unique_oid, &unique_class_oid, &mvcc_info);
23059  if (error_code != NO_ERROR)
23060  {
23061  /* Error! */
23062  ASSERT_ERROR ();
23063  goto error_or_not_found;
23064  }
23065 
23066  if (!OID_ISNULL (&find_unique_helper->match_class_oid)
23067  && !OID_EQ (&find_unique_helper->match_class_oid, &unique_class_oid))
23068  {
23069  /* Class OID didn't match. */
23070  /* Consider key not found. */
23071  goto error_or_not_found;
23072  }
23073 
23074 #if defined (SERVER_MODE)
23075  /* Did we already lock an object and was the object changed? If so, unlock it. */
23076  if (!OID_ISNULL (&find_unique_helper->locked_oid) && !OID_EQ (&find_unique_helper->locked_oid, &unique_oid))
23077  {
23078  lock_unlock_object_donot_move_to_non2pl (thread_p, &find_unique_helper->locked_oid,
23079  &find_unique_helper->locked_class_oid,
23080  find_unique_helper->lock_mode);
23081  OID_SET_NULL (&find_unique_helper->locked_oid);
23082  }
23083 #endif /* SERVER_MODE */
23084 
23085  /* Check whether object can be locked. */
23086  btree_mvcc_info_to_heap_mvcc_header (&mvcc_info, &mvcc_header);
23087  satisfies_delete = mvcc_satisfies_delete (thread_p, &mvcc_header);
23088  switch (satisfies_delete)
23089  {
23092 #if defined (SA_MODE)
23093  /* Impossible. */
23094  assert_release (false);
23095  error_code = ER_FAILED;
23096  goto error_or_not_found;
23097 #else /* !SA_MODE */ /* SERVER_MODE */
23098  /* Object is being inserted/deleted. We need to lock and suspend until it's fate is decided. */
23099  assert (!lock_has_lock_on_object (&unique_oid, &unique_class_oid, find_unique_helper->lock_mode));
23100 #endif /* SERVER_MODE */
23101  /* Fall through. */
23103 #if defined (SERVER_MODE)
23104  /* Must lock object. */
23105  if (!OID_ISNULL (&find_unique_helper->locked_oid))
23106  {
23107  /* Object already locked. */
23108  /* Safe guard. */
23109  assert (OID_EQ (&find_unique_helper->locked_oid, &unique_oid));
23110  assert (satisfies_delete == DELETE_RECORD_CAN_DELETE);
23111 
23112  /* Return result. */
23113  COPY_OID (&find_unique_helper->oid, &unique_oid);
23114  find_unique_helper->found_object = true;
23115  return NO_ERROR;
23116  }
23117  /* Don't try conditional lock if DELETE_RECORD_INSERT_IN_PROGRESS or DELETE_RECORD_DELETE_IN_PROGRESS. Most likely it will
23118  * fail. */
23119  try_cond_lock = (satisfies_delete == DELETE_RECORD_CAN_DELETE);
23120  /* Lock object. */
23121  error_code =
23122  btree_key_lock_object (thread_p, btid_int, key, leaf_page, NULL, &unique_oid, &unique_class_oid,
23123  find_unique_helper->lock_mode, search_key, try_cond_lock, restart,
23124  &was_page_refixed);
23125  if (error_code != NO_ERROR)
23126  {
23127  ASSERT_ERROR ();
23128  goto error_or_not_found;
23129  }
23130  /* Object locked. */
23131  assert (lock_has_lock_on_object (&unique_oid, &unique_class_oid, find_unique_helper->lock_mode) > 0);
23132  COPY_OID (&find_unique_helper->locked_oid, &unique_oid);
23133  COPY_OID (&find_unique_helper->locked_class_oid, &unique_class_oid);
23134  if (*restart)
23135  {
23136  /* Need to restart from top. */
23137  return NO_ERROR;
23138  }
23139  if (search_key->result == BTREE_KEY_BETWEEN)
23140  {
23141  /* Key no longer exist. */
23142  goto error_or_not_found;
23143  }
23144  assert (search_key->result == BTREE_KEY_FOUND);
23145  if (was_page_refixed)
23146  {
23147  /* Key was found but we still need to re-check first object. Since page was re-fixed, it may have
23148  * changed. */
23149  break; /* switch (satisfies_delete) */
23150  }
23151  else
23152  {
23153  /* Safe guard */
23154  assert (satisfies_delete == DELETE_RECORD_CAN_DELETE);
23155  }
23156 #endif /* SERVER_MODE */
23157 
23158  /* Object was found. Return result. */
23159  COPY_OID (&find_unique_helper->oid, &unique_oid);
23160  find_unique_helper->found_object = true;
23161  return NO_ERROR;
23162 
23163  case DELETE_RECORD_DELETED:
23165  /* Key object is deleted. */
23166  goto error_or_not_found;
23167 
23168  default:
23169  /* Unhandled/unexpected case. */
23170  assert_release (false);
23171  error_code = ER_FAILED;
23172  goto error_or_not_found;
23173  } /* switch (satisfies_delete) */
23174  }
23175  /* Impossible to reach. Loop can only be broken by returns or jumps to error_or_not_found label. */
23176  assert_release (false);
23177  error_code = ER_FAILED;
23178  /* Fall through. */
23179 
23180 error_or_not_found:
23181  assert (find_unique_helper->found_object == false);
23182 
23183 #if defined (SERVER_MODE)
23184  if (!OID_ISNULL (&find_unique_helper->locked_oid))
23185  {
23186  /* Unlock object. */
23187  lock_unlock_object_donot_move_to_non2pl (thread_p, &find_unique_helper->locked_oid,
23188  &find_unique_helper->locked_class_oid, find_unique_helper->lock_mode);
23189  OID_SET_NULL (&find_unique_helper->locked_oid);
23190  }
23191 #endif
23192 
23193  PERF_UTIME_TRACKER_TIME (thread_p, &find_unique_helper->time_track, PSTAT_BT_LEAF);
23194  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &find_unique_helper->time_track, PSTAT_BT_FIND_UNIQUE);
23195  return error_code;
23196 }
23197 
23198 /*
23199  * btree_key_find_and_lock_unique_of_non_unique () - Find key non-dirty version and lock it.
23200  * This is usually called in indexes of system classes
23201  * that should be unique but are not.
23202  *
23203  * return : Error code.
23204  * thread_p (in) : Thread entry.
23205  * btid_int (in) : B-tree info.
23206  * key (in) : Key value.
23207  * leaf_page (in/out) : Leaf node page (where key would normally belong).
23208  * search_key (in) : Search key result.
23209  * restart (out) : Set to true if b-tree traversal must be restarted from root.
23210  * other_args (in/out) : BTREE_FIND_UNIQUE_HELPER *.
23211  */
23212 static int
23214  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key,
23215  bool * restart, void *other_args)
23216 {
23217  OID unique_oid, unique_class_oid; /* Unique object OID and class OID. */
23218  /* Unique object MVCC info. */
23220  /* Converted from b-tree MVCC info to check if object satisfies delete. */
23222  /* Helper used to describe find unique process and to output results. */
23223  BTREE_FIND_UNIQUE_HELPER *find_unique_helper = NULL;
23224  RECDES record; /* Key leaf record. */
23225  int error_code = NO_ERROR; /* Error code. */
23226  MVCC_SATISFIES_DELETE_RESULT satisfies_delete; /* Satisfies delete result. */
23227  PAGE_PTR overflow_page = NULL; /* Overflow page. */
23228  VPID next_overflow_vpid = VPID_INITIALIZER; /* VPID of next overflow page. */
23229  OR_BUF buf; /* Buffer used to read b-tree records. */
23230  bool start_reading_leaf_record = true; /* Set to true when needs to start reading from first leaf object. */
23231  PAGE_PTR prev_overflow_page = NULL; /* Saved pointer to previous overflow page when next is fixed. */
23232  int offset_after_key = 0; /* For leaf record, offset where packed key is ended. */
23233  BTREE_NODE_TYPE node_type; /* Current node type. */
23234  LEAF_REC leaf_rec_info; /* Leaf record info. */
23235  bool dummy_clear_key; /* Dummy. */
23236 #if defined (SERVER_MODE)
23237  /* Next variables are not required for stand-alone mode. */
23238  bool try_cond_lock = false; /* Try conditional lock. */
23239  bool was_page_refixed = false; /* Set to true if conditional lock failed and page had to be re-fixed. */
23240 #endif /* SERVER_MODE */
23241  PERF_UTIME_TRACKER ovf_fix_time_track;
23242 
23243  /* Assert expected arguments. */
23244  assert (btid_int != NULL);
23245  assert (!BTREE_IS_UNIQUE (btid_int->unique_pk));
23246  assert (key != NULL);
23247  assert (leaf_page != NULL && *leaf_page != NULL);
23248  assert (restart != NULL);
23249  assert (other_args != NULL);
23250 
23251  /* other_args is find unique helper. */
23252  find_unique_helper = (BTREE_FIND_UNIQUE_HELPER *) other_args;
23253 
23254  PERF_UTIME_TRACKER_TIME (thread_p, &find_unique_helper->time_track, PSTAT_BT_TRAVERSE);
23255  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &find_unique_helper->time_track, PSTAT_BT_FIND_UNIQUE_TRAVERSE);
23256 
23257  /* Locking is required. */
23258  assert (find_unique_helper->lock_mode >= S_LOCK);
23259 
23260  /* Assume result is BTREE_KEY_NOTFOUND. It will be set to BTREE_KEY_FOUND if key is found and its first object is
23261  * successfully locked. */
23262  find_unique_helper->found_object = false;
23263 
23264  if (search_key->result != BTREE_KEY_FOUND)
23265  {
23266  /* Key doesn't exist. Exit. */
23267  goto error_or_not_found;
23268  }
23269 
23270  /* Lock key non-dirty version to protect it. Since this is not an unique index, but can have only one non-dirty
23271  * version, this must be searched through the leaf/overflow records. Locking object is possible if object is not
23272  * deleted and it is not dirty (its inserter/deleter is not active). If inserter or deleter is active, or if
23273  * conditional lock on object failed, current transaction must suspend until the object lock holder is completed.
23274  * This also means unfixing leaf/overflow pages first. Current algorithm tries to avoid traversing the b-tree back
23275  * from root after resume. If conditional lock on object fails, leaf/overflow nodes must be unfixed and then fixed
23276  * again after object is locked. If leaf page no longer exists or if the page is no longer usable (key is not in
23277  * page), the process is restarted from root. If leaf page is still valid, leaf/overflow records are processed again.
23278  * NOTE: Stand-alone mode doesn't require locking. It should only check whether the first key object is deleted or not. */
23279 
23280  /* Initialize unique_oid */
23281  OID_SET_NULL (&unique_oid);
23282  OID_SET_NULL (&unique_class_oid);
23283 
23284  /* Loop until a visible object is successfully locked or if the entire key is processed. */
23285  while (true)
23286  {
23287  /* Safe guard: leaf node page must be fixed. */
23288  assert (*leaf_page != NULL);
23289 
23290  if (start_reading_leaf_record)
23291  {
23292  /* Read leaf record. */
23293  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &record, PEEK) != S_SUCCESS)
23294  {
23295  assert_release (false);
23296  error_code = ER_FAILED;
23297  goto error_or_not_found;
23298  }
23299  node_type = BTREE_LEAF_NODE;
23300  error_code =
23301  btree_read_record (thread_p, btid_int, *leaf_page, &record, NULL, &leaf_rec_info, node_type,
23302  &dummy_clear_key, &offset_after_key, PEEK_KEY_VALUE, NULL);
23303  if (error_code != NO_ERROR)
23304  {
23305  ASSERT_ERROR ();
23306  goto error_or_not_found;
23307  }
23308  /* Get first overflow vpid. */
23309  VPID_COPY (&next_overflow_vpid, &leaf_rec_info.ovfl);
23310  /* Initialize buffer to read from record. */
23311  BTREE_RECORD_OR_BUF_INIT (buf, &record);
23312  /* Get first object. */
23313  error_code =
23314  btree_or_get_object (&buf, btid_int, node_type, offset_after_key, &unique_oid, &unique_class_oid,
23315  &mvcc_info);
23316  if (error_code != NO_ERROR)
23317  {
23318  ASSERT_ERROR ();
23319  goto error_or_not_found;
23320  }
23321  start_reading_leaf_record = false;
23322  }
23323  else
23324  {
23325  /* Get next object. */
23326 
23327  if (buf.ptr == buf.endptr)
23328  {
23329  /* Processed all objects in this record. */
23330  if (VPID_ISNULL (&next_overflow_vpid))
23331  {
23332  /* Not other overflow pages. Not found. */
23333  goto error_or_not_found;
23334  }
23335  /* Fix next overflow page. */
23336  if (overflow_page != NULL)
23337  {
23338  /* Save this overflow page. */
23339  prev_overflow_page = overflow_page;
23340  }
23341  PERF_UTIME_TRACKER_START (thread_p, &ovf_fix_time_track);
23342  /* Fix next overflow page. */
23343  overflow_page =
23344  pgbuf_fix (thread_p, &next_overflow_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
23345  btree_perf_ovf_oids_fix_time (thread_p, &ovf_fix_time_track);
23346  if (overflow_page == NULL)
23347  {
23348  ASSERT_ERROR_AND_SET (error_code);
23349  goto error_or_not_found;
23350  }
23351  if (prev_overflow_page != NULL)
23352  {
23353  pgbuf_unfix_and_init (thread_p, prev_overflow_page);
23354  }
23355  /* Now read leaf record. */
23356  if (spage_get_record (thread_p, overflow_page, 1, &record, PEEK) != S_SUCCESS)
23357  {
23358  assert_release (false);
23359  error_code = ER_FAILED;
23360  goto error_or_not_found;
23361  }
23362  /* Initialize buffer to read record. */
23363  BTREE_RECORD_OR_BUF_INIT (buf, &record);
23364  /* Key is not kept in overflow pages. */
23365  offset_after_key = 0;
23366  node_type = BTREE_OVERFLOW_NODE;
23367  /* Get VPID of next overflow page. */
23368  error_code = btree_get_next_overflow_vpid (thread_p, overflow_page, &next_overflow_vpid);
23369  if (error_code != NO_ERROR)
23370  {
23371  ASSERT_ERROR ();
23372  goto error_or_not_found;
23373  }
23374  }
23375  /* Assert there are objects in current record. */
23376  assert (buf.ptr < buf.endptr);
23377  error_code =
23378  btree_or_get_object (&buf, btid_int, node_type, offset_after_key, &unique_oid, &unique_class_oid,
23379  &mvcc_info);
23380  if (error_code != NO_ERROR)
23381  {
23382  ASSERT_ERROR ();
23383  goto error_or_not_found;
23384  }
23385  }
23386  /* Safe guard: object was read. */
23387  assert (!OID_ISNULL (&unique_oid));
23388  assert (!OID_ISNULL (&unique_class_oid));
23389 
23390  if (!OID_ISNULL (&find_unique_helper->match_class_oid)
23391  && !OID_EQ (&find_unique_helper->match_class_oid, &unique_class_oid))
23392  {
23393  /* Class does not match. Try another object. */
23394  continue;
23395  }
23396 
23397  /* Check whether object can be locked. */
23398  btree_mvcc_info_to_heap_mvcc_header (&mvcc_info, &mvcc_header);
23399  satisfies_delete = mvcc_satisfies_delete (thread_p, &mvcc_header);
23400  switch (satisfies_delete)
23401  {
23404 #if defined (SA_MODE)
23405  /* Impossible. */
23406  assert_release (false);
23407  error_code = ER_FAILED;
23408  goto error_or_not_found;
23409 #else /* !SA_MODE */ /* SERVER_MODE */
23410  /* Object is being inserted/deleted. We need to lock and suspend until it's fate is decided. */
23411  assert (!lock_has_lock_on_object (&unique_oid, &unique_class_oid, find_unique_helper->lock_mode));
23412 #endif /* SERVER_MODE */
23413  /* Fall through. */
23415 #if defined (SERVER_MODE)
23416  /* Must lock object. */
23417  if (!OID_ISNULL (&find_unique_helper->locked_oid))
23418  {
23419  if (OID_EQ (&find_unique_helper->locked_oid, &unique_oid))
23420  {
23421  /* Object already locked. */
23422  assert (satisfies_delete == DELETE_RECORD_CAN_DELETE);
23423 
23424  /* Return result. */
23425  COPY_OID (&find_unique_helper->oid, &unique_oid);
23426  find_unique_helper->found_object = true;
23427  if (overflow_page != NULL)
23428  {
23429  pgbuf_unfix_and_init (thread_p, overflow_page);
23430  }
23431  /* Leaf page will be unfixed by caller. */
23432  return NO_ERROR;
23433  }
23434  else
23435  {
23436  /* Unlock object. */
23437  lock_unlock_object_donot_move_to_non2pl (thread_p, &find_unique_helper->locked_oid,
23438  &find_unique_helper->locked_class_oid,
23439  find_unique_helper->lock_mode);
23440  OID_SET_NULL (&find_unique_helper->locked_oid);
23441  }
23442  }
23443  /* Don't try conditional lock if DELETE_RECORD_INSERT_IN_PROGRESS or DELETE_RECORD_DELETE_IN_PROGRESS.
23444  * Most likely it will fail. */
23445  try_cond_lock = (satisfies_delete == DELETE_RECORD_CAN_DELETE);
23446  /* Lock object. */
23447  error_code =
23448  btree_key_lock_object (thread_p, btid_int, key, leaf_page, &overflow_page, &unique_oid, &unique_class_oid,
23449  find_unique_helper->lock_mode, search_key, try_cond_lock, restart,
23450  &was_page_refixed);
23451  if (error_code != NO_ERROR)
23452  {
23453  ASSERT_ERROR ();
23454  goto error_or_not_found;
23455  }
23456  /* Object locked. */
23457  assert (lock_has_lock_on_object (&unique_oid, &unique_class_oid, find_unique_helper->lock_mode) > 0);
23458  COPY_OID (&find_unique_helper->locked_oid, &unique_oid);
23459  COPY_OID (&find_unique_helper->locked_class_oid, &unique_class_oid);
23460  if (*restart)
23461  {
23462  /* Need to restart from top. */
23463  assert (overflow_page == NULL);
23464  return NO_ERROR;
23465  }
23466  if (search_key->result == BTREE_KEY_BETWEEN)
23467  {
23468  /* Key no longer exist. */
23469  goto error_or_not_found;
23470  }
23471  assert (search_key->result == BTREE_KEY_FOUND);
23472  if (was_page_refixed)
23473  {
23474  /* Key was found but we still need to re-check objects. Since page was re-fixed, record may have changed
23475  * (and also positions of objects). */
23476  start_reading_leaf_record = true;
23477  was_page_refixed = false;
23478  /* Safe guard: overflow page must be unfixed. */
23479  assert (overflow_page == NULL);
23480  break; /* switch (satisfies_delete) */
23481  }
23482  else
23483  {
23484  /* Safe guard */
23485  assert (satisfies_delete == DELETE_RECORD_CAN_DELETE);
23486  }
23487 #endif /* SERVER_MODE */
23488 
23489  /* Object was found. Return result. */
23490  COPY_OID (&find_unique_helper->oid, &unique_oid);
23491  find_unique_helper->found_object = true;
23492  if (overflow_page != NULL)
23493  {
23494  pgbuf_unfix_and_init (thread_p, overflow_page);
23495  }
23496  /* Leaf page will be unfixed by caller. */
23497  return NO_ERROR;
23498 
23499  case DELETE_RECORD_DELETED:
23501  /* This object is deleted. */
23502  /* Continue to next object. */
23503  break;
23504 
23505  default:
23506  /* Unhandled/unexpected case. */
23507  assert_release (false);
23508  error_code = ER_FAILED;
23509  goto error_or_not_found;
23510  } /* switch (satisfies_delete) */
23511  }
23512  /* Impossible to reach. Loop can only be broken by returns or jumps to error_or_not_found label. */
23513  assert_release (false);
23514  error_code = ER_FAILED;
23515  /* Fall through. */
23516 
23517 error_or_not_found:
23518  assert (find_unique_helper->found_object == false);
23519 
23520  if (overflow_page != NULL)
23521  {
23522  pgbuf_unfix_and_init (thread_p, overflow_page);
23523  }
23524  if (prev_overflow_page != NULL)
23525  {
23526  pgbuf_unfix_and_init (thread_p, prev_overflow_page);
23527  }
23528 
23529 #if defined (SERVER_MODE)
23530  if (!OID_ISNULL (&find_unique_helper->locked_oid))
23531  {
23532  /* Unlock object. */
23533  lock_unlock_object_donot_move_to_non2pl (thread_p, &find_unique_helper->locked_oid,
23534  &find_unique_helper->locked_class_oid, find_unique_helper->lock_mode);
23535  OID_SET_NULL (&find_unique_helper->locked_oid);
23536  }
23537 #endif
23538 
23539  PERF_UTIME_TRACKER_TIME (thread_p, &find_unique_helper->time_track, PSTAT_BT_LEAF);
23540  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &find_unique_helper->time_track, PSTAT_BT_FIND_UNIQUE);
23541 
23542  return error_code;
23543 }
23544 
23545 #if defined (SERVER_MODE)
23546 /*
23547  * btree_key_lock_object () - Lock object when its leaf page is held.
23548  *
23549  * return : Error code.
23550  * thread_p (in) : Thread entry.
23551  * btid_int (in) : B-tree identifier.
23552  * key (in) : Key.
23553  * leaf_page (in/out) : Pointer to leaf node page.
23554  * overflow_page (in/out) : Pointer to fixed overflow page. If leaf page must be unfixed, this will be unfixed too
23555  * (without fixing it again).
23556  * oid (in) : OID of object to lock.
23557  * class_oid (in) : Class OID of object to lock.
23558  * lock_mode (in) : Lock mode.
23559  * search_key (in.out) : Search key result. Can change if page is unfixed.
23560  * try_cond_lock (in) : True to try conditional lock first. If false, page is unfixed directly.
23561  * restart (out) : Outputs true when page had to be unfixed and was not considered valid to be reused.
23562  * was_page_refixed (out) : Outputs true if page had to be unfixed.
23563  *
23564  * TODO: Extend this function to handle overflow OID's page too.
23565  */
23566 static int
23567 btree_key_lock_object (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key, PAGE_PTR * leaf_page,
23568  PAGE_PTR * overflow_page, OID * oid, OID * class_oid, LOCK lock_mode,
23569  BTREE_SEARCH_KEY_HELPER * search_key, bool try_cond_lock, bool * restart,
23570  bool * was_page_refixed)
23571 {
23572  VPID leaf_vpid; /* VPID of leaf page. */
23573  int lock_result; /* Result of tried locks. */
23574  int error_code = NO_ERROR; /* Error code. */
23575  PGBUF_LATCH_MODE latch_mode; /* Leaf page latch mode. */
23576  LOG_LSA page_lsa; /* Leaf page LSA before is unfixed. */
23577  PERF_UTIME_TRACKER lock_time_track;
23578 
23579  /* Assert expected arguments. */
23580  assert (btid_int != NULL);
23581  assert (key != NULL && !DB_IS_NULL (key) && !btree_multicol_key_is_null (key));
23582  assert (leaf_page != NULL && *leaf_page != NULL);
23583  assert (oid != NULL && !OID_ISNULL (oid));
23584  assert (class_oid != NULL && !OID_ISNULL (class_oid));
23585  assert (lock_mode >= S_LOCK);
23586  assert (search_key != NULL && search_key->result == BTREE_KEY_FOUND);
23587 
23588  if (try_cond_lock)
23589  {
23590  /* Try conditional lock. */
23591  lock_result = lock_object (thread_p, oid, class_oid, lock_mode, LK_COND_LOCK);
23592  if (lock_result == LK_GRANTED)
23593  {
23594  /* Successful locking. */
23595  return NO_ERROR;
23596  }
23597  }
23598 
23599  /* In order to avoid keeping latched pages while being suspended on locks, leaf page must be unfixed. After lock is
23600  * obtained, page can be re-fixed and re-used. */
23601  /* If an overflow page is also fixed, when leaf page is unfixed, overflow page is also unfixed. It is up to the
23602  * caller to handle re-fix and resume. */
23603  if (overflow_page != NULL && *overflow_page != NULL)
23604  {
23605  pgbuf_unfix_and_init (thread_p, *overflow_page);
23606  }
23607 
23608  /* Save page VPID. */
23609  pgbuf_get_vpid (*leaf_page, &leaf_vpid);
23610  assert (!VPID_ISNULL (&leaf_vpid));
23611  /* Save page LSA. */
23612  LSA_COPY (&page_lsa, pgbuf_get_lsa (*leaf_page));
23613  /* Save page latch mode for re-fix. */
23614  latch_mode = pgbuf_get_latch_mode (*leaf_page);
23615  /* Unfix page. */
23616  pgbuf_unfix_and_init (thread_p, *leaf_page);
23617  if (was_page_refixed != NULL)
23618  {
23619  /* Output page was unfixed. */
23620  *was_page_refixed = true;
23621  }
23622 
23623  /* Lock object. */
23624  PERF_UTIME_TRACKER_START (thread_p, &lock_time_track);
23625  lock_result = lock_object (thread_p, oid, class_oid, lock_mode, LK_UNCOND_LOCK);
23626  btree_perf_unique_lock_time (thread_p, &lock_time_track, lock_mode);
23627  if (lock_result != LK_GRANTED)
23628  {
23629  ASSERT_ERROR_AND_SET (error_code);
23630  return error_code;
23631  }
23632  /* Lock granted. */
23633 
23634  /* Try to re-fix page. */
23635  error_code = pgbuf_fix_if_not_deallocated (thread_p, &leaf_vpid, latch_mode, PGBUF_UNCONDITIONAL_LATCH, leaf_page);
23636  if (error_code != NO_ERROR)
23637  {
23638  ASSERT_ERROR ();
23639  goto error;
23640  }
23641  if (*leaf_page == NULL)
23642  {
23643  /* deallocated */
23644  *restart = true;
23645  return NO_ERROR;
23646  }
23647  /* Page successfully re-fixed. */
23648 
23649  /* Check if page is changed. */
23650  if (LSA_EQ (&page_lsa, pgbuf_get_lsa (*leaf_page)))
23651  {
23652  /* Page not changed. */
23653  return NO_ERROR;
23654  }
23655  /* Page has changed. */
23656 
23657  /* Check if page is still valid for our key. */
23658  if (!BTREE_IS_PAGE_VALID_LEAF (thread_p, *leaf_page))
23659  {
23660  /* Page was deallocated/reused for other purposes. Very unlikely, but had to check. */
23661  *restart = true;
23662  return NO_ERROR;
23663  }
23664 
23665  /* Page is a b-tree leaf. */
23666  /* Make sure key still belongs to the page. */
23667  error_code = btree_leaf_is_key_between_min_max (thread_p, btid_int, *leaf_page, key, search_key);
23668  if (error_code != NO_ERROR)
23669  {
23670  ASSERT_ERROR ();
23671  goto error;
23672  }
23673  if (search_key->result == BTREE_KEY_BETWEEN)
23674  {
23675  /* Search key to find slot. */
23676  error_code = btree_search_leaf_page (thread_p, btid_int, *leaf_page, key, search_key);
23677  if (error_code != NO_ERROR)
23678  {
23679  ASSERT_ERROR ();
23680  goto error;
23681  }
23682  }
23683  switch (search_key->result)
23684  {
23685  case BTREE_KEY_FOUND:
23686  case BTREE_KEY_BETWEEN:
23687  /* Key belongs to this page. */
23688  return NO_ERROR;
23689  case BTREE_KEY_SMALLER:
23690  case BTREE_KEY_NOTFOUND:
23691  case BTREE_KEY_BIGGER:
23692  /* Key is no longer in this page. Need to restart. */
23693  *restart = true;
23694  return NO_ERROR;
23695  default:
23696  /* Unexpected. */
23697  assert_release (false);
23698  return ER_FAILED;
23699  }
23700 
23701  /* Shouldn't be here: Unhandled case. */
23702  assert_release (false);
23703  error_code = ER_FAILED;
23704  /* Fall through. */
23705 
23706 error:
23707  assert (error_code != NO_ERROR);
23708  ASSERT_ERROR ();
23709 
23710  lock_unlock_object_donot_move_to_non2pl (thread_p, oid, class_oid, lock_mode);
23711  return error_code;
23712 }
23713 #endif /* SERVER_MODE */
23714 
23715 /*
23716  * btree_record_process_objects () - Generic routine to process the objects of a record (leaf or overflow).
23717  *
23718  * return : Error code.
23719  * thread_p (in) : Thread entry.
23720  * btid_int (in) : B-tree info.
23721  * node_type (in) : Node type - LEAF or OVERFLOW.
23722  * record (in) : Record descriptor.
23723  * after_key_offset (in) : Offset in record where key value is ended (for leaf record).
23724  * func (in) : BTREE_PROCESS_OBJECT_FUNCTION *.
23725  * args (in/out) : Arguments for internal function.
23726  */
23727 static int
23728 btree_record_process_objects (THREAD_ENTRY * thread_p, BTID_INT * btid_int, BTREE_NODE_TYPE node_type, RECDES * record,
23729  int after_key_offset, bool * stop, BTREE_PROCESS_OBJECT_FUNCTION * func, void *args)
23730 {
23731  OR_BUF buffer; /* Buffer used to process record data. */
23732  int error_code = NO_ERROR; /* Error code. */
23733  char *object_ptr = NULL; /* Pointer in record data where current object starts. */
23734  OID oid; /* OID of current object. */
23735  OID class_oid; /* Class OID of current object. */
23736  BTREE_MVCC_INFO mvcc_info; /* MVCC info of current object. */
23737 
23738  /* Assert expected arguments. */
23739  assert (btid_int != NULL);
23740  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_OVERFLOW_NODE);
23741  assert (record != NULL);
23742  assert (func != NULL);
23743 
23744  /* Initialize buffer. */
23745  BTREE_RECORD_OR_BUF_INIT (buffer, record);
23746 
23747  /* Loop for all objects in buffer. */
23748  while (buffer.ptr < buffer.endptr)
23749  {
23750  /* Save current object pointer in record data. */
23751  object_ptr = buffer.ptr;
23752 
23753  /* Get object data: OID, class OID and MVCC info. */
23754  error_code = btree_or_get_object (&buffer, btid_int, node_type, after_key_offset, &oid, &class_oid, &mvcc_info);
23755  if (error_code != NO_ERROR)
23756  {
23757  /* Unexpected error. */
23758  assert (false);
23759  return error_code;
23760  }
23761 
23762  /* Call internal function. */
23763  error_code = func (thread_p, btid_int, record, object_ptr, &oid, &class_oid, &mvcc_info, stop, args);
23764  if (error_code != NO_ERROR)
23765  {
23766  /* Error! */
23767  return error_code;
23768  }
23769 
23770  if (*stop)
23771  {
23772  /* Stop processing the record objects */
23773  return NO_ERROR;
23774  }
23775  }
23776  /* Finished processing buffer. */
23777 
23778  /* Safe guard: current pointer should point to expected end of buffer. */
23779  assert (buffer.ptr == buffer.endptr);
23780 
23781  /* Success. */
23782  return NO_ERROR;
23783 }
23784 
23785 /*
23786  * btree_key_process_objects () - Generic key processing function that calls given BTREE_PROCESS_OBJECT_FUNCTION function
23787  * on all key objects (unless an error or stop argument forces an early out).
23788  *
23789  * return : Error code.
23790  * thread_p (in) : Thread entry.
23791  * btid_int (in) : B-tree identifier.
23792  * leaf_record (in) : Leaf record.
23793  * after_key_offset (in) : Offset to OID list, where packed key is ended.
23794  * leaf_info (in) : Leaf info.
23795  * func (in) : Internal function to process each key object.
23796  * args (in/out) : Arguments for internal function.
23797  *
23798  * NOTE: Leaf record of objects must be obtained before calling this function.
23799  * TODO: Consider using write latch for overflow pages.
23800  */
23801 static int
23802 btree_key_process_objects (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * leaf_record, int after_key_offset,
23803  LEAF_REC * leaf_info, BTREE_PROCESS_OBJECT_FUNCTION * func, void *args)
23804 {
23805  int error_code = NO_ERROR;
23806  bool stop = false;
23807  RECDES peeked_ovf_recdes;
23808  VPID ovf_vpid;
23809  PAGE_PTR ovf_page = NULL, prev_ovf_page = NULL;
23810  PERF_UTIME_TRACKER ovf_fix_time_track;
23811 
23812  /* Assert expected arguments. */
23813  assert (btid_int != NULL);
23814  assert (leaf_record != NULL);
23815  assert (leaf_info != NULL);
23816  assert (func != NULL);
23817 
23818  /* Start by processing leaf record. */
23819  error_code =
23820  btree_record_process_objects (thread_p, btid_int, BTREE_LEAF_NODE, leaf_record, after_key_offset, &stop, func,
23821  args);
23822  if (error_code != NO_ERROR || stop)
23823  {
23824  /* Error or just stop with NO_ERROR. */
23825  assert (error_code == NO_ERROR || er_errid () != NO_ERROR);
23826  return error_code;
23827  }
23828 
23829  /* Process overflow OID's. */
23830  VPID_COPY (&ovf_vpid, &leaf_info->ovfl);
23831  while (!VPID_ISNULL (&ovf_vpid))
23832  {
23833  /* Fix overflow page. */
23834  PERF_UTIME_TRACKER_START (thread_p, &ovf_fix_time_track);
23835  ovf_page = pgbuf_fix (thread_p, &ovf_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
23836  btree_perf_ovf_oids_fix_time (thread_p, &ovf_fix_time_track);
23837  if (ovf_page == NULL)
23838  {
23839  ASSERT_ERROR_AND_SET (error_code);
23840  if (prev_ovf_page != NULL)
23841  {
23842  pgbuf_unfix_and_init (thread_p, prev_ovf_page);
23843  }
23844  return error_code;
23845  }
23846  if (prev_ovf_page != NULL)
23847  {
23848  /* Now unfix previous overflow page. */
23849  pgbuf_unfix_and_init (thread_p, prev_ovf_page);
23850  }
23851  /* Get overflow OID's record. */
23852  if (spage_get_record (thread_p, ovf_page, 1, &peeked_ovf_recdes, PEEK) != S_SUCCESS)
23853  {
23854  assert_release (false);
23855  pgbuf_unfix_and_init (thread_p, ovf_page);
23856  return ER_FAILED;
23857  }
23858  /* Call internal function on overflow record. */
23859  error_code =
23860  btree_record_process_objects (thread_p, btid_int, BTREE_OVERFLOW_NODE, &peeked_ovf_recdes, 0, &stop, func,
23861  args);
23862  if (error_code != NO_ERROR)
23863  {
23864  /* Error . */
23865  ASSERT_ERROR ();
23866  pgbuf_unfix_and_init (thread_p, ovf_page);
23867  return error_code;
23868  }
23869  else if (stop)
23870  {
23871  /* Stop. */
23872  pgbuf_unfix_and_init (thread_p, ovf_page);
23873  return NO_ERROR;
23874  }
23875  /* Get VPID of next overflow page */
23876  error_code = btree_get_next_overflow_vpid (thread_p, ovf_page, &ovf_vpid);
23877  if (error_code != NO_ERROR)
23878  {
23879  assert_release (false);
23880  pgbuf_unfix_and_init (thread_p, ovf_page);
23881  return error_code;
23882  }
23883  /* Save overflow page until next one is fixed to protect the link between them. */
23884  prev_ovf_page = ovf_page;
23885  }
23886  /* All objects have been processed. If overflow page is fixed, unfix it. */
23887  if (ovf_page != NULL)
23888  {
23889  pgbuf_unfix_and_init (thread_p, ovf_page);
23890  }
23891  /* Successfully processed all key objects. */
23892  return NO_ERROR;
23893 }
23894 
23895 /*
23896  * btree_record_satisfies_snapshot () - BTREE_PROCESS_OBJECT_FUNCTION.
23897  * Output visible objects according to snapshot. If snapshot is NULL, all
23898  * objects are saved.
23899  *
23900  * return : Error code.
23901  * thread_p (in) : Thread entry.
23902  * btid_int (in) : B-tree info.
23903  * record (in) : B-tree leaf/overflow record.
23904  * object_ptr (in) : Pointer to object in record data.
23905  * oid (in) : Object OID.
23906  * class_oid (in) : Object class OID.
23907  * mvcc_info (in) : Object MVCC info.
23908  * stop (out) : Set to true if index is unique and visible object is found and if this is not a debug build.
23909  * args (in/out) : BTREE_REC_SATISFIES_SNAPSHOT_HELPER *.
23910  */
23911 static int
23912 btree_record_satisfies_snapshot (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record, char *object_ptr,
23913  OID * oid, OID * class_oid, BTREE_MVCC_INFO * mvcc_info, bool * stop, void *args)
23914 {
23915  /* Helper used to filter objects and OID's of visible ones. */
23917  MVCC_REC_HEADER mvcc_header_for_snapshot;
23918 
23919  /* Assert expected arguments. */
23920  assert (btid_int != NULL);
23921  assert (record != NULL);
23922  assert (object_ptr != NULL);
23923  assert (oid != NULL);
23924  assert (class_oid != NULL);
23925  assert (mvcc_info != NULL);
23926  assert (stop != NULL);
23927  assert (args != NULL);
23928 
23929  helper = (BTREE_REC_SATISFIES_SNAPSHOT_HELPER *) args;
23930 
23931  btree_mvcc_info_to_heap_mvcc_header (mvcc_info, &mvcc_header_for_snapshot);
23932  if (helper->snapshot == NULL
23933  || helper->snapshot->snapshot_fnc (thread_p, &mvcc_header_for_snapshot, helper->snapshot) == SNAPSHOT_SATISFIED)
23934  {
23935  /* Snapshot satisfied or not required. */
23936 
23937  /* If unique index, we may need to match class OID. */
23938  if (BTREE_IS_UNIQUE (btid_int->unique_pk) && !OID_ISNULL (&helper->match_class_oid)
23939  && !OID_EQ (&helper->match_class_oid, class_oid))
23940  {
23941  /* Class OID did not match. Ignore this object. */
23942  return NO_ERROR;
23943  }
23944 
23945  /* Make sure that if this is unique index, only one visible object is found. */
23946  assert (!BTREE_IS_UNIQUE (btid_int->unique_pk) || helper->oid_cnt == 0);
23947 
23948  if (helper->oid_cnt >= helper->oid_capacity)
23949  {
23950  /* OID buffer is not big enough. There was a mistake estimating the number of objects. */
23951  assert (false);
23953  return ER_FAILED;
23954  }
23955 
23956  /* Save OID in buffer. */
23957  memcpy (helper->oid_ptr, oid, sizeof (*oid));
23958  /* Increment buffer pointer and OID counter. */
23959  helper->oid_ptr++;
23960  helper->oid_cnt++;
23961 
23962 #if defined (NDEBUG)
23963  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
23964  {
23965  /* Stop after first visible object. Since this is a unique index, there shouldn't be more than one visible
23966  * anyway. */
23967  *stop = true;
23968  /* Debug doesn't stop. It will continue to check there are no other visible objects. */
23969  }
23970 #endif
23971  }
23972 
23973  /* Success */
23974  return NO_ERROR;
23975 }
23976 
23977 /*
23978  * xbtree_find_unique () - Find (and sometimes lock) object in key of unique index.
23979  *
23980  * return : BTREE_SEARCH result.
23981  * thread_p (in) : Thread entry.
23982  * btid (in) : B-tree identifier.
23983  * scan_op_type (in) : Operation type (purpose) of finding unique key object.
23984  * key (in) : Key value.
23985  * class_oid (in) : Class OID.
23986  * oid (out) : Found (and sometimes locked) object OID.
23987  * is_all_class_srch (in) : True if search is based on all classes contained in the class hierarchy.
23988  */
23990 xbtree_find_unique (THREAD_ENTRY * thread_p, BTID * btid, SCAN_OPERATION_TYPE scan_op_type, DB_VALUE * key,
23991  OID * class_oid, OID * oid, bool is_all_class_srch)
23992 {
23993  /* Helper used to describe find unique process and to output results. */
23995  int error_code = NO_ERROR;
23997  BTREE_PROCESS_KEY_FUNCTION *key_function = NULL;
23998  MVCC_SNAPSHOT dirty_snapshot;
23999 #if defined (SERVER_MODE)
24000  int lock_result;
24001  LOCK class_lock;
24002 #endif
24003 
24004  /* Assert expected arguments. */
24005  assert (btid != NULL);
24006  assert (scan_op_type == S_SELECT || scan_op_type == S_SELECT_WITH_LOCK || scan_op_type == S_DELETE
24007  || scan_op_type == S_UPDATE);
24008  assert (class_oid != NULL && !OID_ISNULL (class_oid));
24009  assert (oid != NULL);
24010 
24011  PERF_UTIME_TRACKER_START (thread_p, &find_unique_helper.time_track);
24012 
24013  /* Initialize oid as NULL. */
24014  OID_SET_NULL (oid);
24015 
24016  if (key == NULL || db_value_is_null (key) || btree_multicol_key_is_null (key))
24017  {
24018  /* Early out: Consider key is not found. */
24019  return BTREE_KEY_NOTFOUND;
24020  }
24021 
24022  if (!is_all_class_srch)
24023  {
24024  /* Object class must match class_oid argument. */
24025  COPY_OID (&find_unique_helper.match_class_oid, class_oid);
24026  }
24027 
24028 #if defined (SERVER_MODE)
24029  /* Make sure transaction has intention lock on table. */
24030  class_lock = (scan_op_type == S_SELECT || scan_op_type == S_SELECT_WITH_LOCK) ? IS_LOCK : IX_LOCK;
24031  lock_result = lock_object (thread_p, class_oid, oid_Root_class_oid, class_lock, LK_UNCOND_LOCK);
24032  if (lock_result != LK_GRANTED)
24033  {
24034  return BTREE_ERROR_OCCURRED;
24035  }
24036 #endif /* SERVER_MODE */
24037 
24038  if (scan_op_type == S_SELECT)
24039  {
24040  /*
24041  * If MVCC disabled, do not use snapshot and lock. If MVCC enabled and
24042  * find unique in catalog classes, use dirty version without lock.
24043  * Otherwise, use dirty version with lock since need to check whether the
24044  * object exists.
24045  */
24046  if (mvcc_is_mvcc_disabled_class (class_oid))
24047  {
24048  find_unique_helper.snapshot = logtb_get_mvcc_snapshot (thread_p);
24049  key_function = btree_key_find_unique_version_oid;
24050  find_unique_helper.lock_mode = NULL_LOCK;
24051  }
24052  else
24053  {
24054  dirty_snapshot.snapshot_fnc = mvcc_satisfies_dirty;
24055  find_unique_helper.snapshot = &dirty_snapshot;
24056 
24057  if (tf_is_catalog_class (class_oid))
24058  {
24059  /* find the key without lock */
24060  key_function = btree_key_find_unique_version_oid;
24061  find_unique_helper.lock_mode = NULL_LOCK;
24062  }
24063  else
24064  {
24065  /* find the key with lock */
24066  key_function = btree_key_find_and_lock_unique;
24067  find_unique_helper.lock_mode = S_LOCK;
24068  scan_op_type = S_SELECT_WITH_LOCK;
24069  }
24070  }
24071  }
24072  else
24073  {
24074  /* S_SELECT_LOCK_DIRTY, S_DELETE, S_UPDATE. */
24075  assert (scan_op_type == S_SELECT_WITH_LOCK || scan_op_type == S_DELETE || scan_op_type == S_UPDATE);
24076 
24077  /* First key object must be locked and returned. */
24078  find_unique_helper.lock_mode = (scan_op_type == S_SELECT_WITH_LOCK) ? S_LOCK : X_LOCK;
24079  key_function = btree_key_find_and_lock_unique;
24080  }
24081 
24082  if (logtb_find_current_isolation (thread_p) >= TRAN_REP_READ || (find_unique_helper.lock_mode >= S_LOCK))
24083  {
24084  /*
24085  * Acquire snapshot in RR if not already acquired. This is needed since
24086  * the transaction need to know the actual visible objects - before
24087  * instance locking. In this way future commands of current transaction
24088  * may correctly detect visible objects.
24089  */
24090  (void) logtb_get_mvcc_snapshot (thread_p);
24091  }
24092 
24093  /* Find unique key and object. */
24094  error_code =
24095  btree_search_key_and_apply_functions (thread_p, btid, NULL, key, NULL, NULL, advance_function, NULL, key_function,
24096  &find_unique_helper, NULL, NULL);
24097  if (error_code != NO_ERROR)
24098  {
24099  /* Error! */
24100  /* Error must be set! */
24101  ASSERT_ERROR ();
24102 #if defined (SERVER_MODE)
24103  /* Safe guard: don't keep lock if error has occurred. */
24104  if (!OID_ISNULL (&find_unique_helper.locked_oid))
24105  {
24106  /* Make sure to unlock the object. */
24107  lock_unlock_object_donot_move_to_non2pl (thread_p, &find_unique_helper.locked_oid,
24108  &find_unique_helper.locked_class_oid, find_unique_helper.lock_mode);
24109  OID_SET_NULL (&find_unique_helper.locked_oid);
24110  }
24111 #endif /* SERVER_MODE */
24112  return BTREE_ERROR_OCCURRED;
24113  }
24114 
24115 #if defined (SERVER_MODE)
24116  /* Safe guard: if op is S_SELECT, nothing should be locked. */
24117  assert (scan_op_type != S_SELECT || OID_ISNULL (&find_unique_helper.locked_oid));
24118 #endif /* SERVER_MODE */
24119 
24120  if (find_unique_helper.found_object)
24121  {
24122  /* Key found. */
24123  /* Output found object OID. */
24124  assert (!OID_ISNULL (&find_unique_helper.oid));
24125  COPY_OID (oid, &find_unique_helper.oid);
24126 
24127 #if defined (SERVER_MODE)
24128  /* Safe guard: object is supposed to be locked. */
24129  assert (scan_op_type == S_SELECT || lock_has_lock_on_object (oid, class_oid, find_unique_helper.lock_mode) > 0);
24130 #endif /* SERVER_MODE */
24131 
24132  return BTREE_KEY_FOUND;
24133  }
24134  /* Key/object not found. */
24135 
24136 #if defined (SERVER_MODE)
24137  /* Safe guard: no lock is kept if object was not found. */
24138  assert (OID_ISNULL (&find_unique_helper.locked_oid));
24139 #endif /* SERVER_MODE */
24140  return BTREE_KEY_NOTFOUND;
24141 }
24142 
24143 /*
24144  * btree_count_oids () - BTREE_PROCESS_OBJECT_FUNCTION - Increment object counter.
24145  *
24146  * return : Error code.
24147  * thread_p (in) : Thread entry.
24148  * btid_int (in) : B-tree info.
24149  * record (in) : B-tree leaf/overflow record.
24150  * object_ptr (in) : Pointer to object in record data.
24151  * oid (in) : Object OID.
24152  * class_oid (in) : Object class OID.
24153  * mvcc_info (in) : Object MVCC info.
24154  * stop (out) : Set to true if index is unique and visible object is found and if this is not a debug build.
24155  * args (in/out) : Integer object counter. Outputs incremented value.
24156  */
24157 STATIC_INLINE int
24158 btree_count_oids (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record, char *object_ptr, OID * oid,
24159  OID * class_oid, MVCC_REC_HEADER * mvcc_header, bool * stop, void *args)
24160 {
24161  /* Assert expected arguments. */
24162  assert (args != NULL);
24163 
24164  /* Increment counter. */
24165  (*((int *) args))++;
24166  return NO_ERROR;
24167 }
24168 
24169 /*
24170  * btree_range_scan_count_oids_leaf_and_one_ovf () - Count key objects from leaf record and if it has an overflow page,
24171  * also add its full capacity.
24172  *
24173  * return : OID count or error code.
24174  * thread_p (in) : Thread entry.
24175  * bts (in) : B-tree scan.
24176  */
24177 static int
24179 {
24180  int leaf_oids_count = 0;
24181 
24182  /* Count leaf objects. */
24183  leaf_oids_count =
24184  btree_record_get_num_oids (thread_p, &bts->btid_int, &bts->key_record, bts->offset, BTREE_LEAF_NODE);
24185  if (leaf_oids_count < 0)
24186  {
24187  /* Error */
24188  ASSERT_ERROR ();
24189  return leaf_oids_count;
24190  }
24191 
24192  if (VPID_ISNULL (&bts->leaf_rec_info.ovfl))
24193  {
24194  /* No overflow. */
24195  return leaf_oids_count;
24196  }
24197 
24198  /* Estimate one overflow. Do not just count first overflow records. They may be all invisible, in which case we need
24199  * to proceed to next overflow. Just to be on the safe side, take into consideration one full overflow page. */
24200  return leaf_oids_count + BTREE_MAX_OIDCOUNT_IN_OVERFLOW_RECORD (&bts->btid_int);
24201 }
24202 
24203 /*
24204  * btree_range_scan_start () - Start a range scan by finding the first eligible key.
24205  *
24206  * return : Error code.
24207  * thread_p (in) : Thread entry.
24208  * bts (in) : B-tree scan structure.
24209  *
24210  * NOTE: Key is considered eligible if:
24211  * - Key is within desired range.
24212  * - Key is not a fence key.
24213  * - Key passes filter.
24214  */
24215 static int
24217 {
24218  int error_code = NO_ERROR;
24219  bool found = false;
24220 
24221  /* Assert expected arguments. */
24222  assert (bts != NULL);
24223  assert (VPID_ISNULL (&bts->C_vpid));
24224  assert (bts->C_page == NULL);
24225 
24226  /* Find starting key. */
24227  /* Starting key must be checked and pass filters first. */
24229  if (bts->key_range.lower_key == NULL)
24230  {
24231  /* No lower limit. Just find lowest key in index. */
24232  error_code = btree_find_lower_bound_leaf (thread_p, bts, NULL);
24233  if (error_code != NO_ERROR)
24234  {
24235  ASSERT_ERROR ();
24236  return error_code;
24237  }
24238  if (bts->end_scan)
24239  {
24240  return NO_ERROR;
24241  }
24242  }
24243  else
24244  {
24245  /* Has lower limit. Try to locate the key. */
24246  error_code =
24247  btree_locate_key (thread_p, &bts->btid_int, bts->key_range.lower_key, &bts->C_vpid, &bts->slot_id,
24248  &bts->C_page, &found);
24249  if (error_code != NO_ERROR)
24250  {
24251  ASSERT_ERROR ();
24252  return error_code;
24253  }
24254  if (!found && bts->use_desc_index)
24255  {
24256  /* Key was not found and the bts->slot_id was positioned to next key bigger than bts->key_range.lower_key.
24257  * For descending scan, we should be positioned on the first smaller when key is not found. Update
24258  * bts->slot_id. */
24259  bts->slot_id--;
24260  }
24261  if (found && (bts->key_range.range == GT_LT || bts->key_range.range == GT_LE || bts->key_range.range == GT_INF))
24262  {
24263  /* Lower limit key was found, but the scan range must be bigger than the limit. Go to next key. */
24264  /* Mark the key as consumed and let btree_range_scan_advance_over_filtered_keys handle it. */
24266  }
24267  }
24268  /* Found starting key. */
24269  error_code = btree_range_scan_advance_over_filtered_keys (thread_p, bts);
24270  if (error_code != NO_ERROR)
24271  {
24272  ASSERT_ERROR ();
24273  return error_code;
24274  }
24275  if (bts->force_restart_from_root)
24276  {
24277  /* Scan is not yet started. Descending scan failed to position on first key. Restart scan. */
24278  assert (bts->use_desc_index);
24279  if (bts->C_page != NULL)
24280  {
24281  pgbuf_unfix_and_init (thread_p, bts->C_page);
24282  }
24283  VPID_SET_NULL (&bts->C_vpid);
24284  btree_scan_clear_key (bts);
24286  return NO_ERROR;
24287  }
24288 
24289  /* Start scanning */
24290  bts->is_scan_started = true;
24291  return NO_ERROR;
24292 }
24293 
24294 /*
24295  * btree_range_scan_resume () - Function used to resume range scans after being interrupted. It will try to resume from
24296  * saved leaf node (if possible). Otherwise, current key must looked up starting from
24297  * b-tree root.
24298  *
24299  * return : Error code.
24300  * thread_p (in) : Thread entry.
24301  * bts (in) : B-tree scan helper.
24302  */
24303 static int
24305 {
24306  int error_code = NO_ERROR;
24308  bool found = false;
24309 
24310  assert (bts->force_restart_from_root || !VPID_ISNULL (&bts->C_vpid));
24311  assert (bts->C_page == NULL);
24312  assert (!DB_IS_NULL (&bts->cur_key));
24313  assert (!BTS_IS_INDEX_ILS (bts));
24314 
24315  /* Resume range scan. It can be resumed from same leaf or by looking up the key again from root. */
24316  if (!bts->force_restart_from_root)
24317  {
24318  /* Try to resume from saved leaf node. */
24319  error_code =
24321  &bts->C_page);
24322  if (error_code != NO_ERROR)
24323  {
24324  ASSERT_ERROR ();
24325  return error_code;
24326  }
24327  if (bts->C_page != NULL)
24328  {
24329  /* Try to resume from this page */
24330  if (LSA_EQ (&bts->cur_leaf_lsa, pgbuf_get_lsa (bts->C_page)))
24331  {
24332  /* Leaf page suffered no changes while range search was interrupted. Range search can be resumed using
24333  * current position. */
24334  return btree_range_scan_advance_over_filtered_keys (thread_p, bts);
24335  }
24336 
24337  /* Page suffered some changes. */
24338  if (BTREE_IS_PAGE_VALID_LEAF (thread_p, bts->C_page))
24339  {
24340  /* Page is still a valid leaf page. Check if key still exists. */
24341  /* Is key still in this page? */
24342  error_code =
24343  btree_leaf_is_key_between_min_max (thread_p, &bts->btid_int, bts->C_page, &bts->cur_key, &search_key);
24344  if (error_code != NO_ERROR)
24345  {
24346  /* Error! */
24347  pgbuf_unfix_and_init (thread_p, bts->C_page);
24348  ASSERT_ERROR ();
24349  return error_code;
24350  }
24351  if (search_key.result == BTREE_KEY_BETWEEN)
24352  {
24353  /* We need to find slot of key. */
24354  error_code =
24355  btree_search_leaf_page (thread_p, &bts->btid_int, bts->C_page, &bts->cur_key, &search_key);
24356  if (error_code != NO_ERROR)
24357  {
24358  /* Error! */
24359  pgbuf_unfix_and_init (thread_p, bts->C_page);
24360  ASSERT_ERROR ();
24361  return error_code;
24362  }
24363  assert (search_key.result != BTREE_KEY_NOTFOUND);
24364  }
24365  switch (search_key.result)
24366  {
24367  case BTREE_KEY_FOUND:
24368  /* Key was found. Use this key. */
24369  bts->slot_id = search_key.slotid;
24370  return btree_range_scan_advance_over_filtered_keys (thread_p, bts);
24371 
24372  case BTREE_KEY_BETWEEN:
24373  /* Key should have been in this page, but it was removed. Proceed to next key. */
24374  if (bts->use_desc_index)
24375  {
24376  /* Use previous slot. */
24377  bts->slot_id = search_key.slotid - 1;
24378  assert (bts->slot_id >= 1 && bts->slot_id <= btree_node_number_of_keys (thread_p, bts->C_page));
24379  }
24380  else
24381  {
24382  /* Use next slotid. */
24383  bts->slot_id = search_key.slotid;
24384  assert (bts->slot_id >= 1 && bts->slot_id <= btree_node_number_of_keys (thread_p, bts->C_page));
24385  }
24387  return btree_range_scan_advance_over_filtered_keys (thread_p, bts);
24388 
24389  case BTREE_KEY_SMALLER:
24390  case BTREE_KEY_BIGGER:
24391  case BTREE_KEY_NOTFOUND:
24392  /* Key is no longer in this leaf node. Locate key by advancing from root. */
24393  /* Fall through. */
24394  break;
24395 
24396  default:
24397  /* Unexpected. */
24398  assert (false);
24399  pgbuf_unfix_and_init (thread_p, bts->C_page);
24400  return ER_FAILED;
24401  }
24402  }
24403  else /* !BTREE_IS_PAGE_VALID_LEAF (bts->C_page) */
24404  {
24405  /* Page must have been deallocated/reused for other purposes. */
24406  /* Fall through. */
24407  }
24408  pgbuf_unfix_and_init (thread_p, bts->C_page);
24409  }
24410  else
24411  {
24412  /* bts->C_Page is null because it was deallocated. we need to search the key again. fall through */
24413  }
24414  }
24415  /* Couldn't resume from saved leaf node. */
24416 
24417  /* No page should be fixed. */
24418  assert (bts->C_page == NULL);
24419 
24420  /* Reset bts->force_restart_from_root flag. */
24421  bts->force_restart_from_root = false;
24422 
24423  /* Search key from top. */
24424  error_code = btree_locate_key (thread_p, &bts->btid_int, &bts->cur_key, &bts->C_vpid, &bts->slot_id,
24425  &bts->C_page, &found);
24426  if (error_code != NO_ERROR)
24427  {
24428  ASSERT_ERROR ();
24429  return error_code;
24430  }
24431  /* Safe guard. */
24432  assert (btree_get_node_level (thread_p, bts->C_page) == 1);
24433  if (found)
24434  {
24435  /* Found key. Resume from here. */
24436  return btree_range_scan_advance_over_filtered_keys (thread_p, bts);
24437  }
24438  if (btree_node_number_of_keys (thread_p, bts->C_page) < 1)
24439  {
24440  assert (btree_node_number_of_keys (thread_p, bts->C_page) == 0);
24441  /* No more keys. */
24442  bts->end_scan = true;
24443  return NO_ERROR;
24444  }
24445  /* Not found. */
24446  if (bts->use_desc_index)
24447  {
24448  bts->slot_id = bts->slot_id - 1;
24449  }
24451  return btree_range_scan_advance_over_filtered_keys (thread_p, bts);
24452 }
24453 
24454 /*
24455  * btree_range_scan_read_record () - Read b-tree record for b-tree range scan.
24456  *
24457  * return : Error code.
24458  * thread_p (in) : Thread entry.
24459  * bts (in/out) : B-tree scan.
24460  */
24461 static int
24463 {
24464  /* Clear current key value if needed. */
24465  btree_scan_clear_key (bts);
24466  /* Read record key (and other info). */
24467  return btree_read_record (thread_p, &bts->btid_int, bts->C_page, &bts->key_record, &bts->cur_key, &bts->leaf_rec_info,
24468  bts->node_type, &bts->clear_cur_key, &bts->offset, COPY_KEY_VALUE, bts);
24469 }
24470 
24471 /*
24472  * btree_range_scan_advance_over_filtered_keys () - Find a key to pass all all filters.
24473  *
24474  * return : Error code.
24475  * thread_p (in) : Thread entry.
24476  * bts (in/out) : B-tree scan helper.
24477  */
24478 static int
24480 {
24481  int inc_slot; /* Slot incremental value to advance to next key. */
24482  VPID next_vpid; /* VPID of next leaf. */
24483  BTREE_NODE_HEADER *node_header; /* Leaf node header. */
24484  int key_count; /* Node key count. */
24485  PAGE_PTR next_node_page = NULL; /* Page pointer to next leaf node. */
24486  int error_code; /* Error code. */
24487  bool is_range_satisfied; /* True if range is satisfied. */
24488  bool is_filter_satisfied; /* True if filter is satisfied. */
24489 
24490  /* Assert expected arguments. */
24491  assert (bts != NULL);
24492  assert (bts->C_page != NULL);
24493 
24494  /* Initialize. */
24495  node_header = btree_get_node_header (thread_p, bts->C_page);
24496  assert (node_header != NULL);
24497  assert (node_header->node_level == 1);
24498 
24499  if (bts->key_status == BTS_KEY_IS_VERIFIED)
24500  {
24501  /* Current key is not yet consumed, but it already passed range/filter checks. Resume scan on current key. */
24502 
24503  /* Safe guard: this is not a fence key. */
24504  assert (!btree_is_fence_key (bts->C_page, bts->slot_id));
24505 
24506  /* This must be a resumed scan. Page was probably unfixed since last key read and may be changed. To be sure,
24507  * obtain record again. */
24508 
24509  /* Read record. */
24510  /* Get current key. */
24511  if (spage_get_record (thread_p, bts->C_page, bts->slot_id, &bts->key_record, PEEK) != S_SUCCESS)
24512  {
24513  /* Unexpected error. */
24514  assert (false);
24515  return ER_FAILED;
24516  }
24517  /* TODO: Don't clear/copy key again. */
24518  error_code = btree_range_scan_read_record (thread_p, bts);
24519  if (error_code != NO_ERROR)
24520  {
24521  ASSERT_ERROR ();
24522  return error_code;
24523  }
24524  /* Continue with current key. */
24525  return NO_ERROR;
24526  }
24527  /* Current key is completely consumed or is a new key. */
24529 
24530  if (bts->key_range_max_value_equal)
24531  {
24532  /* Range was already consumed. End the scan. */
24533  bts->end_scan = true;
24534  return NO_ERROR;
24535  }
24536 
24537  /* If current key is not new and is completely consumed, go to next key. */
24538  inc_slot = bts->use_desc_index ? -1 : 1;
24539  if (bts->key_status == BTS_KEY_IS_CONSUMED)
24540  {
24541  /* Go to next key. */
24542  bts->slot_id += inc_slot;
24543  }
24544  else
24545  {
24546  /* Assert expected key status. */
24548  }
24549 
24550  /* Get VPID of next leaf and key count in current leaf. */
24551  next_vpid = bts->use_desc_index ? node_header->prev_vpid : node_header->next_vpid;
24552  key_count = btree_node_number_of_keys (thread_p, bts->C_page);
24553  assert (key_count >= 0);
24554 
24555  while (true)
24556  {
24557  /* Safe guard: current leaf is fixed. */
24558  assert (bts->C_page != NULL);
24559 
24560  /* If key is not in the range 1 -> key count, a different leaf node must be fixed. This while should pass over
24561  * all empty or consumed leaf nodes. */
24562  while (bts->slot_id <= 0 || bts->slot_id > key_count || key_count == 0)
24563  {
24564  /* Current leaf node was consumed (or was empty). Try next leaf node. */
24565 
24566  /* If scan is descending, current slot_id is expected to be 0, if it is ascending, then slot_id must be
24567  * key_count + 1. */
24568  assert ((bts->use_desc_index && bts->slot_id == 0)
24569  || (!bts->use_desc_index && bts->slot_id == key_count + 1));
24570  if (VPID_ISNULL (&next_vpid))
24571  {
24572  /* Index was consumed. */
24573  bts->end_scan = true;
24574  return NO_ERROR;
24575  }
24576  if (bts->use_desc_index)
24577  {
24578  /* Descending index will try conditional latch on previous leaf. If that fails, current leaf will be
24579  * unfixed and pages fixed in normal order. However, until pages are successfully fixed, it is possible
24580  * that they are not reusable and scan must be restarted from root. We assume and hope this is does not
24581  * happen too often (must see). */
24582  error_code =
24583  btree_range_scan_descending_fix_prev_leaf (thread_p, bts, &key_count, &node_header, &next_vpid);
24584  if (error_code != NO_ERROR)
24585  {
24586  ASSERT_ERROR ();
24587  return ER_FAILED;
24588  }
24589  if (bts->force_restart_from_root)
24590  {
24591  /* Failed to continue descending scan. Restart from root. */
24592  return NO_ERROR;
24593  }
24594  }
24595  else
24596  {
24597  /* Fix next leaf page. */
24598  next_node_page = pgbuf_fix (thread_p, &next_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
24599  if (next_node_page == NULL)
24600  {
24601  ASSERT_ERROR_AND_SET (error_code);
24602  return error_code;
24603  }
24604  /* Advance to next node. */
24605  pgbuf_unfix (thread_p, bts->C_page);
24606  bts->C_page = next_node_page;
24607  VPID_COPY (&bts->C_vpid, &next_vpid);
24608  next_node_page = NULL;
24609  /* Initialize stuff. */
24610  key_count = btree_node_number_of_keys (thread_p, bts->C_page);
24611  node_header = btree_get_node_header (thread_p, bts->C_page);
24612  assert (node_header != NULL);
24613  assert (node_header->node_level == 1);
24614  /* Ascending scan: start from first key in page and then advance to next page. */
24615  bts->slot_id = 1;
24616  next_vpid = node_header->next_vpid;
24617  }
24618  }
24619 
24620  /* Get current key. */
24621  if (spage_get_record (thread_p, bts->C_page, bts->slot_id, &bts->key_record, PEEK) != S_SUCCESS)
24622  {
24623  /* Unexpected error. */
24624  assert (false);
24625  return ER_FAILED;
24626  }
24627 
24629  {
24630  /* Not a fence key. */
24631  /* Handle. */
24632  error_code = btree_range_scan_read_record (thread_p, bts);
24633  if (error_code != NO_ERROR)
24634  {
24635  ASSERT_ERROR ();
24636  return error_code;
24637  }
24638  error_code =
24639  btree_apply_key_range_and_filter (thread_p, bts, BTS_IS_INDEX_ISS (bts), &is_range_satisfied,
24640  &is_filter_satisfied, bts->need_to_check_null);
24641  if (error_code != NO_ERROR)
24642  {
24643  ASSERT_ERROR ();
24644  return error_code;
24645  }
24646 
24647  if (!is_range_satisfied)
24648  {
24649  /* Range is not satisfied, which means scan is ended. */
24650  bts->end_scan = true;
24651  return NO_ERROR;
24652  }
24653  /* Range satisfied. */
24654 
24655  if (is_filter_satisfied)
24656  {
24657  /* Filter is satisfied, which means key can be used. */
24658  bts->read_keys++;
24660  return NO_ERROR;
24661  }
24662  /* Filter not satisfied. Try next key. */
24663  /* Fall through. */
24664  }
24665  else
24666  {
24667  /* Key is fence and must be filtered. */
24668  /* Safe guard: Fence keys can be only first or last keys in page, but never first or last in entire index. */
24669  assert ((bts->slot_id == 1 && !VPID_ISNULL (&node_header->prev_vpid))
24670  || (bts->slot_id == key_count && !VPID_ISNULL (&node_header->next_vpid)));
24671  /* Fall through. */
24672 
24673  /* TODO: Get key info should be able to obtain fences too. */
24674  }
24675 
24676  /* Key is not usable. Advance. */
24677  bts->slot_id += inc_slot;
24678  }
24679  /* Impossible to reach. */
24680  assert (false);
24681  return ER_FAILED;
24682 }
24683 
24684 /*
24685  * btree_range_scan_descending_fix_prev_leaf () - Fix previous leaf node without generating cross latches with regular
24686  * scans and by trying to avoid a key lookup from root.
24687  *
24688  * return : Error code.
24689  * thread_p (in) : Thread entry.
24690  * bts (in) : B-tree scan data.
24691  * key_count (in/out) : Current page key count.
24692  * node_header_ptr (in/out) : Current page node header.
24693  * next_vpid (in/out) : Next (actually previous) leaf VPID.
24694  */
24695 static int
24697  BTREE_NODE_HEADER ** node_header_ptr, VPID * next_vpid)
24698 {
24699  PAGE_PTR prev_leaf = NULL; /* Page pointer to previous leaf node. */
24700  VPID prev_leaf_vpid; /* VPID of previous leaf node. */
24701  int error_code = NO_ERROR; /* Error code. */
24702  /* Search key result. */
24704 
24705  /* Assert expected arguments. */
24706  assert (bts != NULL && bts->use_desc_index == true);
24707  assert (key_count != NULL);
24708  assert (next_vpid != NULL);
24709 
24710  VPID_COPY (&prev_leaf_vpid, next_vpid);
24711 
24712  /* Conditional latch for previous page. */
24713  prev_leaf = pgbuf_fix (thread_p, &prev_leaf_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_CONDITIONAL_LATCH);
24714  if (prev_leaf != NULL)
24715  {
24716  /* Previous leaf was successfully latched. Advance. */
24717  pgbuf_unfix_and_init (thread_p, bts->C_page);
24718  bts->C_page = prev_leaf;
24719  VPID_COPY (&bts->C_vpid, &prev_leaf_vpid);
24720  *key_count = btree_node_number_of_keys (thread_p, bts->C_page);
24721  bts->slot_id = *key_count;
24722  *node_header_ptr = btree_get_node_header (thread_p, bts->C_page);
24723  VPID_COPY (next_vpid, &(*node_header_ptr)->prev_vpid);
24724  return NO_ERROR;
24725  }
24726  /* Conditional latch failed. */
24727 
24728  /* Unfix current page and retry. */
24729  pgbuf_unfix_and_init (thread_p, bts->C_page);
24730  error_code =
24731  pgbuf_fix_if_not_deallocated (thread_p, &prev_leaf_vpid, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH, &prev_leaf);
24732  if (error_code != NO_ERROR)
24733  {
24734  ASSERT_ERROR ();
24735  return error_code;
24736  }
24737  if (prev_leaf == NULL)
24738  {
24739  /* deallocated */
24740  bts->force_restart_from_root = true;
24741  return NO_ERROR;
24742  }
24743  if (!BTREE_IS_PAGE_VALID_LEAF (thread_p, prev_leaf))
24744  {
24745  /* Page deallocated/reused, but not currently a valid b-tree leaf. */
24746  /* Try again from top. */
24747  bts->force_restart_from_root = true;
24748  pgbuf_unfix_and_init (thread_p, prev_leaf);
24749  return NO_ERROR;
24750  }
24751  /* Valid leaf. */
24752 
24753  *node_header_ptr = btree_get_node_header (thread_p, prev_leaf);
24754  if (!VPID_EQ (&(*node_header_ptr)->next_vpid, &bts->C_vpid))
24755  {
24756  /* No longer linked leaves. Restart search from top. */
24757  bts->force_restart_from_root = true;
24758  pgbuf_unfix_and_init (thread_p, prev_leaf);
24759  return NO_ERROR;
24760  }
24761  /* Pages are still linked. */
24762 
24763  /* Fix current page too. */
24765  if (bts->C_page == NULL)
24766  {
24767  ASSERT_ERROR_AND_SET (error_code);
24768  pgbuf_unfix_and_init (thread_p, prev_leaf);
24769  return error_code;
24770  }
24771 
24772  /* Before searching the key in leaf page, we must make sure to handle this next peculiar case: 1. First key search of
24773  * descending scan. Lower key limit is located as first in leaf page. 2. Range scan says strictly less than lower key
24774  * limit. 3. The algorithm tries to fix go to previous leaf. However, bts->cur_key does not yet store any key values.
24775  * Set bts->cur_key to lower key limit of range. NOTE: If there is no lower limit, this case cannot happen. */
24776  if (!bts->is_scan_started && bts->key_range.lower_key != NULL && DB_IS_NULL (&bts->cur_key))
24777  {
24778  pr_clone_value (bts->key_range.lower_key, &bts->cur_key);
24779  bts->clear_cur_key = true;
24780  /* Next steps will go before bts->key_range.lower_key. */
24781  }
24782  /* We must have a non-null current key. */
24783  assert (!DB_IS_NULL (&bts->cur_key));
24784 
24785  /* Normally, key is found in current page. */
24786  error_code = btree_leaf_is_key_between_min_max (thread_p, &bts->btid_int, bts->C_page, &bts->cur_key, &search_key);
24787  if (error_code != NO_ERROR)
24788  {
24789  ASSERT_ERROR ();
24790  pgbuf_unfix_and_init (thread_p, prev_leaf);
24791  return error_code;
24792  }
24793  if (search_key.result == BTREE_KEY_BETWEEN)
24794  {
24795  /* Search to find slot. */
24796  error_code = btree_search_leaf_page (thread_p, &bts->btid_int, bts->C_page, &bts->cur_key, &search_key);
24797  if (error_code != NO_ERROR)
24798  {
24799  ASSERT_ERROR ();
24800  pgbuf_unfix_and_init (thread_p, prev_leaf);
24801  return error_code;
24802  }
24803  }
24804  switch (search_key.result)
24805  {
24806  case BTREE_KEY_BETWEEN:
24807  /* Still in current page. Current slotid cannot be 1. */
24808  assert (search_key.slotid > 1);
24809  /* Fall through to advance to previous key. Search function returned the first bigger key. */
24810  case BTREE_KEY_FOUND:
24811  if (search_key.slotid > 1)
24812  {
24813  /* Must check remaining keys in current page. */
24814  bts->slot_id = search_key.slotid - 1;
24815 
24816  *key_count = btree_node_number_of_keys (thread_p, bts->C_page);
24817  *node_header_ptr = btree_get_node_header (thread_p, bts->C_page);
24818  VPID_COPY (next_vpid, &(*node_header_ptr)->prev_vpid);
24819 
24820  /* Unfix previous page. */
24821  pgbuf_unfix_and_init (thread_p, prev_leaf);
24822  return NO_ERROR;
24823  }
24824 
24825  /* Found key in the first slot. */
24826  /* Safe guard: could not fall through from BTREE_KEY_BETWEEN. */
24827  assert (search_key.result == BTREE_KEY_FOUND);
24828  /* Move to previous page. */
24829  pgbuf_unfix_and_init (thread_p, bts->C_page);
24830  bts->C_page = prev_leaf;
24831  VPID_COPY (&bts->C_vpid, &prev_leaf_vpid);
24832  *key_count = btree_node_number_of_keys (thread_p, bts->C_page);
24833  VPID_COPY (next_vpid, &(*node_header_ptr)->prev_vpid);
24834  bts->slot_id = *key_count;
24835  return NO_ERROR;
24836 
24837  case BTREE_KEY_BIGGER:
24838  /* TODO: Go to next page until key is found? */
24839  /* For now just fall through to restart. */
24840  case BTREE_KEY_NOTFOUND:
24841  /* Unknown key fate. */
24842  bts->force_restart_from_root = true;
24843  pgbuf_unfix_and_init (thread_p, prev_leaf);
24844  return NO_ERROR;
24845 
24846  case BTREE_KEY_SMALLER:
24847  /* Fall through to try search key in previous page. */
24848  break;
24849 
24850  default:
24851  /* Unhandled. */
24852  assert_release (false);
24853  pgbuf_unfix_and_init (thread_p, prev_leaf);
24854  return ER_FAILED;
24855  }
24856  /* Search key in previous page. */
24857  assert (search_key.result == BTREE_KEY_SMALLER);
24858  /* Unfix current page. */
24859  pgbuf_unfix_and_init (thread_p, bts->C_page);
24860  error_code = btree_leaf_is_key_between_min_max (thread_p, &bts->btid_int, prev_leaf, &bts->cur_key, &search_key);
24861  if (error_code != NO_ERROR)
24862  {
24863  ASSERT_ERROR ();
24864  pgbuf_unfix_and_init (thread_p, prev_leaf);
24865  return error_code;
24866  }
24867  if (search_key.result == BTREE_KEY_BETWEEN)
24868  {
24869  /* Search to find slot. */
24870  error_code = btree_search_leaf_page (thread_p, &bts->btid_int, prev_leaf, &bts->cur_key, &search_key);
24871  if (error_code != NO_ERROR)
24872  {
24873  ASSERT_ERROR ();
24874  pgbuf_unfix_and_init (thread_p, prev_leaf);
24875  return error_code;
24876  }
24877  }
24878  switch (search_key.result)
24879  {
24880  case BTREE_KEY_BIGGER:
24881  case BTREE_KEY_BETWEEN:
24882  /* First bigger key. */
24883  assert (search_key.slotid > 1);
24884  /* Fall through to advance to previous key. */
24885  case BTREE_KEY_FOUND:
24886  /* Update bts and advance to previous key. */
24887  bts->C_page = prev_leaf;
24888  VPID_COPY (&bts->C_vpid, &prev_leaf_vpid);
24889  *key_count = btree_node_number_of_keys (thread_p, bts->C_page);
24890  VPID_COPY (next_vpid, &(*node_header_ptr)->prev_vpid);
24891  bts->slot_id = search_key.slotid - 1;
24892 
24893  /* If this was the first key in page, now slot_id will be 0. */
24894  /* This function should get called again, but looking for another previous leaf node. */
24895  return NO_ERROR;
24896 
24897  default:
24898  /* BTREE_KEY_SMALLER */
24899  /* BTREE_KEY_NOTFOUND */
24900  /* Unknown key fate. */
24901  bts->force_restart_from_root = true;
24902  pgbuf_unfix_and_init (thread_p, prev_leaf);
24903  return NO_ERROR;
24904  }
24905 
24906  /* Impossible to reach. */
24907  assert_release (false);
24908  return ER_FAILED;
24909 }
24910 
24911 /*
24912  * btree_range_scan () - Generic function to do a range scan on b-tree. It can scan key by key starting with first
24913  * (or last key for descending scans). For each key, it calls an internal function to process
24914  * the key.
24915  *
24916  * return : Error code.
24917  * thread_p (in) : Thread entry.
24918  * bts (in) : B-tree scan structure.
24919  * key_func (in) : Internal function to call when an eligible key is found.
24920  */
24921 int
24923 {
24924  int error_code = NO_ERROR; /* Error code. */
24925 
24926  /* Assert expected arguments. */
24927  assert (bts != NULL);
24928  assert (key_func != NULL);
24929 
24930  PERF_UTIME_TRACKER_START (thread_p, &bts->time_track);
24931 
24932  /* Reset end_scan and end_one_iteration flags. */
24933  bts->end_scan = false;
24934  bts->end_one_iteration = false;
24935 
24936  /* Reset read counters for iteration. */
24937  bts->n_oids_read_last_iteration = 0;
24938 
24939  if (bts->index_scan_idp != NULL && bts->index_scan_idp->oid_list != NULL)
24940  {
24941  /* Reset oid_ptr. */
24942  bts->oid_ptr = bts->index_scan_idp->oid_list->oidp;
24943  }
24944 
24945  while (!bts->end_scan && !bts->end_one_iteration)
24946  {
24947  bts->is_interrupted = false;
24948 
24949  /* Start from top or resume. */
24950  if (!bts->is_scan_started)
24951  {
24952  /* Must find a starting key. */
24953  error_code = btree_range_scan_start (thread_p, bts);
24954  }
24955  else
24956  {
24957  /* Resume from current key. */
24958  error_code = btree_range_scan_resume (thread_p, bts);
24959  }
24960 
24961  PERF_UTIME_TRACKER_TIME (thread_p, &bts->time_track, PSTAT_BT_TRAVERSE);
24962  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &bts->time_track, PSTAT_BT_RANGE_SEARCH_TRAVERSE);
24963 
24964  if (error_code != NO_ERROR)
24965  {
24966  ASSERT_ERROR ();
24967  goto exit_on_error;
24968  }
24969  if (bts->end_scan)
24970  {
24971  /* Scan is finished. */
24972  break;
24973  }
24974  if (bts->force_restart_from_root)
24975  {
24976  /* Couldn't advance. Restart from root. */
24977  assert (bts->use_desc_index);
24978  btree_log_if_enabled ("Notification: descending range scan had to be interrupted and restarted from root.\n");
24979  if (bts->C_page != NULL)
24980  {
24981  pgbuf_unfix_and_init (thread_p, bts->C_page);
24982  }
24983  if (BTS_IS_INDEX_ILS (bts))
24984  {
24985  /* Reset scan to avoid using btree_range_scan_resume () */
24986  bts_reset_scan (thread_p, bts);
24987  }
24988  continue;
24989  }
24990 
24991  /* Scan is now positioned on an usable key; consumed/fence/filtered keys have been skipped. */
24992 
24993  while (true)
24994  {
24995  /* For each valid key */
24996  assert (bts->C_page != NULL);
24997  assert (!VPID_ISNULL (&bts->C_vpid));
24998  assert (bts->O_page == NULL);
24999  /* Do we need P_page here? */
25000  assert (bts->P_page == NULL);
25002  assert (!bts->is_interrupted && !bts->end_one_iteration && !bts->end_scan);
25003 
25004  /* Call internal key function. */
25005  error_code = key_func (thread_p, bts);
25006  if (error_code != NO_ERROR)
25007  {
25008  ASSERT_ERROR ();
25009  goto exit_on_error;
25010  }
25011  if (bts->is_interrupted || bts->end_one_iteration || bts->end_scan)
25012  {
25013  /* Interrupt key processing loop. */
25014  break;
25015  }
25016  /* Current key must be consumed. Find a new valid key. */
25018  error_code = btree_range_scan_advance_over_filtered_keys (thread_p, bts);
25019  if (error_code != NO_ERROR)
25020  {
25021  ASSERT_ERROR ();
25022  goto exit_on_error;
25023  }
25024  if (bts->end_scan)
25025  {
25026  /* Finished scan. */
25027  break;
25028  }
25029  if (bts->force_restart_from_root)
25030  {
25031  /* Couldn't advance. Restart from root. */
25032  assert (bts->use_desc_index);
25033  btree_log_if_enabled ("Notification: descending range scan had to be interrupted and restarted from "
25034  "root.\n");
25035  if (bts->C_page != NULL)
25036  {
25037  pgbuf_unfix_and_init (thread_p, bts->C_page);
25038  }
25039  break;
25040  }
25041  }
25042 
25043  PERF_UTIME_TRACKER_TIME (thread_p, &bts->time_track, PSTAT_BT_LEAF);
25044  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &bts->time_track, PSTAT_BT_RANGE_SEARCH);
25045  }
25046 
25047 end:
25048  /* End scan or end one iteration or maybe an error case. */
25049  assert (bts->end_scan || bts->end_one_iteration || error_code != NO_ERROR);
25050 
25051  if (error_code != NO_ERROR)
25052  {
25053  /* Clear current key (if not already cleared). */
25054  btree_scan_clear_key (bts);
25055  }
25056 
25057  if (bts->end_scan)
25058  {
25059  /* Scan is ended. Reset current page VPID and is_scan_started flag */
25060  VPID_SET_NULL (&bts->C_vpid);
25061  bts->is_scan_started = false;
25062 
25063  /* Clear current key (if not already cleared). */
25064  btree_scan_clear_key (bts);
25065 
25066  assert (VPID_ISNULL (&bts->O_vpid));
25067  }
25068 
25069  if (bts->C_page != NULL)
25070  {
25071  /* Unfix current page and save its LSA. */
25072  assert (bts->end_scan || VPID_EQ (pgbuf_get_vpid_ptr (bts->C_page), &bts->C_vpid));
25073  LSA_COPY (&bts->cur_leaf_lsa, pgbuf_get_lsa (bts->C_page));
25074  pgbuf_unfix_and_init (thread_p, bts->C_page);
25075  }
25076  else
25077  {
25078  /* No current page fixed. */
25079  LSA_SET_NULL (&bts->cur_leaf_lsa);
25080  }
25081  assert (bts->O_page == NULL);
25082 
25083  if (bts->P_page != NULL)
25084  {
25085  /* bts->P_page is still used by btree_find_boundary_leaf. Make sure it is unfixed. */
25086  VPID_SET_NULL (&bts->P_vpid);
25087  pgbuf_unfix_and_init (thread_p, bts->P_page);
25088  }
25089 
25090  if (bts->key_limit_upper != NULL)
25091  {
25092  /* Update upper limit for next scans. */
25094  assert ((*bts->key_limit_upper) >= 0);
25095  }
25096 
25097  PERF_UTIME_TRACKER_TIME (thread_p, &bts->time_track, PSTAT_BT_LEAF);
25098  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &bts->time_track, PSTAT_BT_RANGE_SEARCH);
25099 
25100  return error_code;
25101 
25102 exit_on_error:
25103  error_code = error_code != NO_ERROR ? error_code : ER_FAILED;
25104  goto end;
25105 }
25106 
25107 /*
25108  * btree_range_scan_select_visible_oids () - BTREE_RANGE_SCAN_PROCESS_KEY_FUNC
25109  * Used internally by btree_range_scan to select visible objects from key
25110  * OID's.
25111  * Handling depends on the type of scan:
25112  * 1. Multiple ranges optimization.
25113  * 2. Covering index.
25114  * 3. Index skip scan.
25115  * 4. Regular index scan.
25116  *
25117  * return : Error code.
25118  * thread_p (in) : Thread entry.
25119  * bts (in) : B-tree scan info.
25120  */
25121 int
25123 {
25124  int error_code = NO_ERROR; /* Returned error code. */
25125  int oid_count; /* Total number of objects of this key. Overflow OID's are also considered. For unique
25126  * indexes, only one object is considered (we know for sure there can not be more than
25127  * one visible). */
25128  int total_oid_count; /* Total count of key OID's considering OID's already read and all OID's of current
25129  * key. */
25130  bool stop = false; /* Set to true when processing record should stop. */
25131  VPID overflow_vpid = VPID_INITIALIZER; /* Overflow VPID. */
25132  PAGE_PTR overflow_page = NULL; /* Current overflow page. */
25133  PAGE_PTR prev_overflow_page = NULL; /* Previous overflow page. */
25134  RECDES ovf_record; /* Overflow page record. */
25135  int oid_size = OR_OID_SIZE; /* Object size (OID/class OID). */
25136  int save_oid_count = 0; /* While processing the overflow pages we are required to save the last page that had
25137  * any visible objects. Use this to save object counter before processing overflow and
25138  * compare with the count of objects after processing the overflow. */
25139  VPID last_visible_overflow; /* VPID of last overflow page that had at least one visible OID. If the scan is
25140  * interrupted because too many objects were processed, it will be resumed after this
25141  * overflow page. */
25142  PERF_UTIME_TRACKER ovf_fix_time_track;
25143 
25144  /* Assert b-tree scan is valid. */
25145  assert (bts != NULL);
25146  assert (!VPID_ISNULL (&bts->C_vpid));
25147  assert (bts->C_page != NULL);
25148  /* MRO and covering index optimization are not compatible with bts->need_count_only. */
25149  assert (!BTS_NEED_COUNT_ONLY (bts) || (!BTS_IS_INDEX_MRO (bts) && !BTS_IS_INDEX_COVERED (bts)));
25150 
25151  /* Index skip scan optimization has an early out when a new key is found: */
25152  if (BTS_IS_INDEX_ISS (bts)
25155  {
25156  /* A new key was found. End current scan here (another range scan will be started after a range based on current
25157  * key is defined. */
25158  /* Set key. */
25159  error_code = btree_iss_set_key (bts, &bts->index_scan_idp->iss);
25160  if (error_code != NO_ERROR)
25161  {
25162  return error_code;
25163  }
25164  /* Set oid_cnt to 1. */
25165  bts->n_oids_read_last_iteration = 1;
25166 
25167  /* End this scan here. */
25168  bts->end_scan = true;
25169  return NO_ERROR;
25170  }
25171  /* Not an early out of ISS optimization. */
25172  /* Safe guard: Not an ISS or ISS current op is ISS_OP_DO_RANGE_SEARCH. */
25174 
25175  /* ISS and regular scans can use a buffer to store read OID's. Covering index optimization uses a list file, while
25176  * MRO uses a Top N structure. When OID buffer is used, it can be filled during scan. If this happens, interrupt
25177  * range scan, handle currently buffered objects and resume with an empty buffer. NOTE: There are two types of limits
25178  * used here. A soft limit and hard limit. Hard limit is used when key has too many objects and don't fit soft limit.
25179  * Hard limit is necessary to handle key processing interrupt/resume. Soft key limit is ignored if no objects have
25180  * been processed in this iteration. */
25181  /* Don't do any checks for MRO or if objects only have to be counted. */
25182  if (!BTS_IS_INDEX_MRO (bts) && !BTS_NEED_COUNT_ONLY (bts))
25183  {
25184  if (!bts->is_key_partially_processed)
25185  {
25186  /* If no objects have been processed this iteration, don't count. If already processed more than soft limit,
25187  * don't count. If currently process + key's leaf and first overflow count exceed soft limit, stop iteration
25188  * before processing key. */
25190  {
25191  /* Count the objects. */
25192  if (BTREE_IS_UNIQUE (bts->btid_int.unique_pk))
25193  {
25194  /* Only one object can be visible for each key. */
25195  oid_count = 1;
25196  }
25197  else
25198  {
25199  /* Get the number of OID in current key's leaf and first overflow page. */
25200  oid_count = btree_range_scan_count_oids_leaf_and_one_ovf (thread_p, bts);
25201  if (oid_count < 0)
25202  {
25203  /* Unexpected error. */
25204  ASSERT_ERROR ();
25205  return oid_count;
25206  }
25207  }
25208  total_oid_count = bts->n_oids_read_last_iteration + oid_count;
25209 
25210  if (!BTS_IS_SOFT_CAPACITY_ENOUGH (bts, total_oid_count))
25211  {
25212  /* Stop this iteration and resume with an empty buffer. */
25213  bts->end_one_iteration = true;
25214  return NO_ERROR;
25215  }
25216  else
25217  {
25218  /* Safe guard: if soft capacity is enough, then hard limit must also be enough. */
25219  assert (BTS_IS_HARD_CAPACITY_ENOUGH (bts, total_oid_count));
25220  }
25221  /* There is enough space to handle key objects at least in its leaf and first overflow. */
25222  }
25223  }
25224  else
25225  {
25226  /* Key was not fully processed. Resume from its current overflow. */
25227  /* The interrupt algorithm is based on next facts: 1. Objects can be vacuumed. Overflow pages can be
25228  * deallocated when all its objects are vacuumed. Entire key cannot be vacuumed while interrupted (because at
25229  * least one visible object existed in previous iteration). 2. Objects can be swapped from overflow page to
25230  * leaf record. Only one object from first overflow page is swapped. If this object is not visible, it can
25231  * be vacuumed again and another object is swapped. This can continue until this thread resumes scan or until
25232  * a visible object is swapped. Note that first overflow page will be deallocated if all its objects have
25233  * been swapped. This is possible if page has at most one visible object that cannot be vacuumed. 3. New
25234  * objects can be inserted and new overflow pages can be created. Interrupt/resume system tries to work with
25235  * constants in this behavior: 1. A key without overflow pages is never interrupted (actually a key without
25236  * at least four overflow pages is never interrupted since all its objects can be processed in default
25237  * buffer). 2. If interrupted, the buffer should have at least default number of OID's - one overflow page of
25238  * OID's. This means roughly default_buffer_size / OR_OID_SIZE - db_page_size / OID_WITH_MVCC_INFO_SIZE =
25239  * 16k*4/8 - 16k/32 with default parameters = ~7.5k objects. 3. When interrupted, bts saves last overflow
25240  * page with at least one visible object to make sure it is not deallocated. How can an overflow page be
25241  * deallocated? Well first of all if all its objects are invisible and vacuumed. If it had any visible
25242  * objects, it should be swapped to leaf record. Since only one visible object can be swapped to leaf, the
25243  * page must have at most one visible object. However, swapping starts with first overflow page. But this
25244  * scan has processed at least 7.5k visible objects, worth at least several full pages. The last page, even
25245  * if it had only one visible object, it wouldn't get swapped to leaf. 4. Can swapping objects to leaf be a
25246  * problem? No. It can be considered already processed and ignored. Since there have been at least 7.5k
25247  * visible objects processed in previous iteration, the first leaf object, if visible, must be one of these.
25248  * 5. Inserting new OID's will not affect our scan in any way. If they are found after resume, they will be
25249  * ignored by visibility test. If not found, they are again ignored (as they should). Even creating new
25250  * overflow pages, does not affect us. The above statements are true for default buffer. In order to make it
25251  * true for small buffers, there are two limits used by scan: a soft limit and a hard limit. The hard limit
25252  * is used when key has too many objects. See comment from BTS_IS_HARD_CAPACITY_ENOUGH. */
25253  /* Resume from next page of last overflow page. */
25254  prev_overflow_page =
25256  if (prev_overflow_page == NULL)
25257  {
25258  ASSERT_ERROR_AND_SET (error_code);
25259  return error_code;
25260  }
25261  error_code = btree_get_next_overflow_vpid (thread_p, prev_overflow_page, &overflow_vpid);
25262  if (error_code != NO_ERROR)
25263  {
25264  assert_release (false);
25265  pgbuf_unfix_and_init (thread_p, prev_overflow_page);
25266  return error_code;
25267  }
25268  }
25269  }
25270  else
25271  {
25273  }
25274 
25275  /* Start processing key objects. */
25276 
25277  if (!bts->is_key_partially_processed)
25278  {
25279  /* Start processing key with leaf record objects. */
25280  error_code =
25281  btree_record_process_objects (thread_p, &bts->btid_int, BTREE_LEAF_NODE, &bts->key_record, bts->offset, &stop,
25283  if (error_code != NO_ERROR)
25284  {
25285  ASSERT_ERROR ();
25286  return error_code;
25287  }
25288  if (stop || bts->end_one_iteration || bts->end_scan || bts->is_interrupted)
25289  {
25290  /* Early out. */
25291  return NO_ERROR;
25292  }
25293  /* Process overflow objects. */
25294  /* Start processing overflow with first one. */
25295  VPID_COPY (&overflow_vpid, &bts->leaf_rec_info.ovfl);
25296  /* Fall through. */
25297  }
25298  else
25299  {
25300  /* Start processing key with an overflow page. */
25301  /* Overflow VPID is already set. */
25302  /* Assume key will be entirely processed. It will be changed if it interrupted again. */
25303  bts->is_key_partially_processed = false;
25304  }
25305 
25306  /* Prepare required data. */
25307  if (BTREE_IS_UNIQUE (bts->btid_int.unique_pk))
25308  {
25309  /* Class OID included. */
25310  oid_size += OR_OID_SIZE;
25311  }
25312  /* Initialize last visible overflow. */
25313  VPID_SET_NULL (&last_visible_overflow);
25314  /* Process overflow pages. */
25315  while (!VPID_ISNULL (&overflow_vpid))
25316  {
25317  /* Fix next overflow page. */
25318  PERF_UTIME_TRACKER_START (thread_p, &ovf_fix_time_track);
25319  overflow_page = pgbuf_fix (thread_p, &overflow_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
25320  btree_perf_ovf_oids_fix_time (thread_p, &ovf_fix_time_track);
25321  if (overflow_page == NULL)
25322  {
25323  ASSERT_ERROR_AND_SET (error_code);
25324  if (prev_overflow_page != NULL)
25325  {
25326  pgbuf_unfix_and_init (thread_p, prev_overflow_page);
25327  }
25328  return error_code;
25329  }
25330  /* Previous overflow page (if any) can be unfixed, now that next overflow page was fixed. */
25331  if (prev_overflow_page != NULL)
25332  {
25333  pgbuf_unfix_and_init (thread_p, prev_overflow_page);
25334  }
25335 
25336  /* Save current object count. */
25337  save_oid_count = bts->n_oids_read_last_iteration;
25338 
25339  /* Get record. */
25340  if (spage_get_record (thread_p, overflow_page, 1, &ovf_record, PEEK) != S_SUCCESS)
25341  {
25342  assert_release (false);
25343  pgbuf_unfix_and_init (thread_p, overflow_page);
25344  return ER_FAILED;
25345  }
25346  if (!BTS_IS_INDEX_MRO (bts) && !BTS_NEED_COUNT_ONLY (bts))
25347  {
25348  /* Can we save all objects? */
25349  if (BTREE_IS_UNIQUE (bts->btid_int.unique_pk))
25350  {
25351  /* There can be only one visible object. */
25352  oid_count = 1;
25353  }
25354  else
25355  {
25356  oid_count = btree_record_get_num_oids (thread_p, &bts->btid_int, &ovf_record, 0, BTREE_OVERFLOW_NODE);
25357  }
25358  if (!BTS_IS_HARD_CAPACITY_ENOUGH (bts, bts->n_oids_read_last_iteration + oid_count))
25359  {
25360  /* We don't know how many visible objects there are in this page, and we don't want to process the page
25361  * partially. Stop here and we will continue from last overflow page that had visible objects. */
25362  /* Index coverage uses a list file and can handle all objects for this key. */
25363 
25365  assert (!VPID_ISNULL (&last_visible_overflow));
25366 
25367  /* Save page to resume. */
25368  VPID_COPY (&bts->O_vpid, &last_visible_overflow);
25369  /* Mark key as partially processed to know to resume from an overflow page. */
25370  bts->is_key_partially_processed = true;
25371  /* End current iteration. */
25372  bts->end_one_iteration = true;
25373 
25374  /* Unfix current overflow page. */
25375  pgbuf_unfix_and_init (thread_p, overflow_page);
25376  return NO_ERROR;
25377  }
25378  /* We can handle all current objects. */
25379  }
25380 
25381  /* Process this overflow OID's. */
25382  error_code =
25383  btree_record_process_objects (thread_p, &bts->btid_int, BTREE_OVERFLOW_NODE, &ovf_record, 0, &stop,
25385  if (error_code != NO_ERROR)
25386  {
25387  ASSERT_ERROR ();
25388  pgbuf_unfix_and_init (thread_p, overflow_page);
25389  return error_code;
25390  }
25391  /* Successful processing. */
25392  if (stop || bts->end_one_iteration || bts->end_scan || bts->is_interrupted)
25393  {
25394  /* Early out. */
25395  pgbuf_unfix_and_init (thread_p, overflow_page);
25396  VPID_SET_NULL (&bts->O_vpid);
25397  return NO_ERROR;
25398  }
25399  if (save_oid_count < bts->n_oids_read_last_iteration)
25400  {
25401  /* Page had at least one visible object. */
25402  VPID_COPY (&last_visible_overflow, &overflow_vpid);
25403  }
25404  /* Process next overflow page. */
25405  error_code = btree_get_next_overflow_vpid (thread_p, overflow_page, &overflow_vpid);
25406  if (error_code != NO_ERROR)
25407  {
25408  assert_release (false);
25409  pgbuf_unfix_and_init (thread_p, overflow_page);
25410  return error_code;
25411  }
25412  prev_overflow_page = overflow_page;
25413  overflow_page = NULL;
25414  }
25415  if (prev_overflow_page != NULL)
25416  {
25417  pgbuf_unfix_and_init (thread_p, prev_overflow_page);
25418  }
25419  /* Entire key has been processed. */
25420  VPID_SET_NULL (&bts->O_vpid);
25421 
25422  /* Key has been consumed. */
25424  /* Success. */
25425  return NO_ERROR;
25426 }
25427 
25428 /*
25429  * btree_select_visible_object_for_range_scan () - BTREE_PROCESS_OBJECT_FUNCTION
25430  * Function handles each found object based on type of index scan.
25431  *
25432  * return : Error code.
25433  * thread_p (in) : Thread entry.
25434  * btid_int (in) : B-tree info.
25435  * record (in) : Index record containing one key's objects.
25436  * object_ptr (in) : Pointer in record to current object (not used).
25437  * oid (in) : Current object OID.
25438  * class_oid (in) : Current object class OID.
25439  * mvcc_info (in) : Current object MVCC info.
25440  * stop (out) : Set to true if processing record should stop after this function execution.
25441  * args (in/out) : BTREE_SCAN *.
25442  */
25443 static int
25445  char *object_ptr, OID * oid, OID * class_oid, BTREE_MVCC_INFO * mvcc_info,
25446  bool * stop, void *args)
25447 {
25448  BTREE_SCAN *bts = NULL;
25449  int error_code = NO_ERROR;
25450  MVCC_SNAPSHOT *snapshot = NULL;
25451  MVCC_REC_HEADER mvcc_header_for_snapshot;
25452 
25453  /* Assert expected arguments. */
25454  assert (args != NULL);
25455  assert (btid_int != NULL);
25456  assert (oid != NULL);
25457  assert (class_oid != NULL);
25458  assert (mvcc_info != NULL);
25459  assert (stop != NULL);
25460 
25461  /* args is the b-tree scan structure. */
25462  bts = (BTREE_SCAN *) args;
25463 
25464  /* This function first checks object eligibility. If eligible it will be then saved. Eligibility must pass several
25465  * filters: 1. Snapshot: object must be visible. 2. Match class: for unique indexes of hierarchical classes, query
25466  * may be executed on one class only. The class must be matched. 3. Key limit filters: First lower key limit and all
25467  * after upper key limit objects are ignored. Then object will be saved/processed differently depending on type of
25468  * scan: 1. MRO - object is checked against the current Top N objects. 2. Covering index: object and key are saved in
25469  * a list file. 3. ISS/regular scan: OID is saved in a buffer. NOTE: Unique indexes will stop after the first
25470  * visible objects. */
25471 
25472  /* Verify snapshot. */
25473  btree_mvcc_info_to_heap_mvcc_header (mvcc_info, &mvcc_header_for_snapshot);
25474  snapshot = bts->index_scan_idp != NULL ? bts->index_scan_idp->scan_cache.mvcc_snapshot : NULL;
25475 
25477  {
25478  /* Check if object should have been vacuumed. */
25479  DISK_ISVALID disk_result = DISK_VALID;
25480 
25481  disk_result = vacuum_check_not_vacuumed_rec_header (thread_p, oid, class_oid, &mvcc_header_for_snapshot,
25482  /* TODO: Actually node type is not accurate. */
25483  bts->node_type);
25484  if (disk_result != DISK_VALID)
25485  {
25486  /* Error or object should have been vacuumed. */
25487  bts->index_scan_idp->not_vacuumed_res = disk_result;
25488  if (disk_result == DISK_ERROR)
25489  {
25490  /* Error. */
25491  ASSERT_ERROR_AND_SET (error_code);
25492  return error_code;
25493  }
25494  }
25495  }
25496 
25497  /* Check snapshot. */
25498  if (snapshot != NULL && snapshot->snapshot_fnc != NULL
25499  && snapshot->snapshot_fnc (thread_p, &mvcc_header_for_snapshot, snapshot) != SNAPSHOT_SATISFIED)
25500  {
25501  /* Snapshot not satisfied. Ignore object. */
25502  return NO_ERROR;
25503  }
25504  /* No snapshot or snapshot was satisfied. */
25505 
25506  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
25507  {
25508  /* Key can have only one visible object. */
25510  *stop = true;
25511  /* Fall through to handle current object. Processing this key will stop afterwards. */
25512 
25513  /* Verify class match. */
25514  if (!OID_ISNULL (&bts->match_class_oid) && !OID_EQ (&bts->match_class_oid, class_oid))
25515  {
25516  /* Class not matched. */
25517  return NO_ERROR;
25518  }
25519  /* Class was matched. */
25520  }
25521 
25522  /* Select object. */
25523 
25524  /* Check key limit filters */
25525  /* Only objects between lower key limit and lower+upper key limit are selected. First lower key limit objects are
25526  * skipped, and after another upper key limit objects scan is ended. */
25527 
25528  /* Lower key limit */
25529  if (bts->key_limit_lower != NULL && *bts->key_limit_lower > 0)
25530  {
25531  /* Do not copy object. Just decrement key_limit_lower until it is 0. */
25532  assert (!BTS_IS_INDEX_ILS (bts));
25533  (*bts->key_limit_lower)--;
25534  return NO_ERROR;
25535  }
25536  /* No lower key limit or lower key limit was already reached. */
25537 
25538  /* Upper key limit */
25539  if (bts->key_limit_upper != NULL && bts->n_oids_read_last_iteration >= *bts->key_limit_upper)
25540  {
25541  /* Upper limit reached. Stop scan. */
25542  assert (!BTS_IS_INDEX_ILS (bts));
25543  bts->end_scan = true;
25544  *stop = true;
25545  return NO_ERROR;
25546  }
25547  /* No upper key limit or upper key limit was not reached yet. */
25548 
25549  /* Must objects be read or just counted? */
25550  if (BTS_NEED_COUNT_ONLY (bts))
25551  {
25552  /* Just count. */
25553  assert (!BTS_IS_INDEX_ILS (bts));
25555  return NO_ERROR;
25556  }
25557  /* Read object. */
25558 
25559  /* Possible scans that can reach this code: - Multi-range-optimization. - Covering index. - ISS, if current op is
25560  * ISS_OP_DO_RANGE_SEARCH. - Regular index range scan. */
25561 
25562  if (BTS_IS_INDEX_MRO (bts))
25563  {
25564  /* Multiple range optimization */
25565  /* Add current key to TOP N sorted keys */
25566  /* Pass current key and next pseudo OID's to handle lock release when a candidate is thrown out of TOP N
25567  * structure. */
25568  bool mro_continue = true;
25569  error_code =
25570  btree_range_opt_check_add_index_key (thread_p, bts, &bts->index_scan_idp->multi_range_opt, oid, &mro_continue);
25571  if (error_code != NO_ERROR)
25572  {
25573  ASSERT_ERROR ();
25574  return error_code;
25575  }
25576  if (!mro_continue)
25577  {
25578  /* Current item didn't fit in the TOP N keys, and the following items in current btree_range_search iteration
25579  * will not be better. Go to end of scan. */
25580  bts->end_scan = true;
25581  *stop = true;
25582  }
25583  else
25584  {
25585  /* Increment OID counter. */
25587  }
25588  /* Finished handling object for MRO. */
25589  return NO_ERROR;
25590  }
25591 
25592  /* Possible scans that can reach this code: - Covering index. - ISS, if current op is ISS_OP_DO_RANGE_SEARCH. -
25593  * Regular index range scan. */
25594 
25595  if (BTS_IS_INDEX_COVERED (bts))
25596  {
25597  if (BTS_IS_INDEX_ILS (bts))
25598  {
25599  /* Index loose scan. */
25600  *stop = true;
25602  /* Interrupt range scan. It must be restarted with a new range. */
25603  bts->is_interrupted = true;
25604 
25605  /* Since range scan must be moved on a totally different range, it must restart by looking for the first
25606  * eligible key of the new range. Trick it to think this a new call of btree_range_scan. */
25607  bts_reset_scan (thread_p, bts);
25608 
25609  /* Adjust range of scan. */
25610  error_code = btree_ils_adjust_range (thread_p, bts);
25611  if (error_code != NO_ERROR)
25612  {
25613  ASSERT_ERROR ();
25614  return error_code;
25615  }
25616 
25617  /* Covering index. */
25618  error_code = btree_dump_curr_key (thread_p, bts, bts->key_filter, oid, bts->index_scan_idp);
25619  if (error_code != NO_ERROR)
25620  {
25621  ASSERT_ERROR ();
25622  return error_code;
25623  }
25625  }
25626  else
25627  {
25628  error_code = btree_dump_curr_key (thread_p, bts, bts->key_filter, oid, bts->index_scan_idp);
25629  if (error_code != NO_ERROR)
25630  {
25631  ASSERT_ERROR ();
25632  return error_code;
25633  }
25634  }
25635 
25637  return NO_ERROR;
25638  }
25639 
25640  /* Possible scans that can reach this code: - ISS, if current op is ISS_OP_DO_RANGE_SEARCH. - Regular index range
25641  * scan. They are both treated in the same way (copied to OID buffer). */
25642  BTS_SAVE_OID_IN_BUFFER (bts, oid);
25643 
25644  assert (HEAP_ISVALID_OID (thread_p, oid) != DISK_INVALID);
25646 
25647  return NO_ERROR;
25648 }
25649 
25650 /*
25651  * btree_range_scan_find_fk_any_object () - BTREE_RANGE_SCAN_PROCESS_KEY_FUNC
25652  *
25653  * return : Error code.
25654  * thread_p (in) : Thread entry.
25655  * bts (in) : B-tree scan info.
25656  */
25657 static int
25659 {
25660  int error_code = NO_ERROR; /* Error code. */
25661  bool stop = false;
25662  PAGE_PTR prev_ovf_page = NULL;
25663  RECDES peeked_ovf_recdes;
25664  VPID ovf_vpid = VPID_INITIALIZER;
25665 
25666  /* Assert expected arguments. */
25667  assert (bts != NULL);
25668  assert (bts->bts_other != NULL);
25669 
25670  /* Search and lock one object in key. */
25671  error_code =
25672  btree_record_process_objects (thread_p, &bts->btid_int, BTREE_LEAF_NODE, &bts->key_record, bts->offset, &stop,
25674  if (error_code != NO_ERROR)
25675  {
25676  ASSERT_ERROR ();
25677  return error_code;
25678  }
25679  if (stop == true)
25680  {
25681  return NO_ERROR;
25682  }
25683 
25684  /* Process overflow OID's. */
25685  VPID_COPY (&ovf_vpid, &bts->leaf_rec_info.ovfl);
25686  while (!VPID_ISNULL (&ovf_vpid))
25687  {
25688  /* Fix overflow page. */
25689  bts->O_page = pgbuf_fix (thread_p, &ovf_vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
25690  if (bts->O_page == NULL)
25691  {
25692  ASSERT_ERROR_AND_SET (error_code);
25693  if (prev_ovf_page != NULL)
25694  {
25695  pgbuf_unfix_and_init (thread_p, prev_ovf_page);
25696  }
25697  return error_code;
25698  }
25699  if (prev_ovf_page != NULL)
25700  {
25701  /* Now unfix previous overflow page. */
25702  pgbuf_unfix_and_init (thread_p, prev_ovf_page);
25703  }
25704  /* Get overflow OID's record. */
25705  if (spage_get_record (thread_p, bts->O_page, 1, &peeked_ovf_recdes, PEEK) != S_SUCCESS)
25706  {
25707  assert_release (false);
25708  pgbuf_unfix_and_init (thread_p, bts->O_page);
25709  return ER_FAILED;
25710  }
25711  /* Call internal function on overflow record. */
25712  error_code =
25713  btree_record_process_objects (thread_p, &bts->btid_int, BTREE_OVERFLOW_NODE, &peeked_ovf_recdes, 0, &stop,
25715  if (error_code != NO_ERROR)
25716  {
25717  /* Error . */
25718  ASSERT_ERROR ();
25719  if (bts->O_page != NULL)
25720  {
25721  pgbuf_unfix_and_init (thread_p, bts->O_page);
25722  }
25723  return error_code;
25724  }
25725  else if (stop)
25726  {
25727  /* Stop. */
25728  break;
25729  }
25730  assert (bts->O_page != NULL);
25731 
25732  /* Get VPID of next overflow page */
25733  error_code = btree_get_next_overflow_vpid (thread_p, bts->O_page, &ovf_vpid);
25734  if (error_code != NO_ERROR)
25735  {
25736  assert_release (false);
25737  pgbuf_unfix_and_init (thread_p, bts->O_page);
25738  return error_code;
25739  }
25740  /* Save overflow page until next one is fixed to protect the link between them. */
25741  prev_ovf_page = bts->O_page;
25742  }
25743 
25744  /* Object processing stopped or ended. */
25745  assert (error_code == NO_ERROR);
25746 
25747  /* If overflow page is fixed, unfix it. */
25748  if (bts->O_page != NULL)
25749  {
25750  pgbuf_unfix_and_init (thread_p, bts->O_page);
25751  }
25752  if (bts->end_scan)
25753  {
25754  return NO_ERROR;
25755  }
25756  if (!bts->is_interrupted)
25757  {
25758  /* Key was fully consumed. We are here because no object was found. Since this key was the only one of interest,
25759  * scan can be stopped. */
25760  assert (OID_ISNULL (&((BTREE_FIND_FK_OBJECT *) bts->bts_other)->found_oid));
25761  bts->end_scan = true;
25762  }
25763  return NO_ERROR;
25764 }
25765 
25766 /*
25767  * btree_fk_object_does_exist () - Check whether current object exists (it must not be deleted and successfully locked).
25768  *
25769  * return : Error code.
25770  * thread_p (in) : Thread entry.
25771  * btid_int (in) : B-tree info.
25772  * record (in) : Index record containing one key's objects.
25773  * object_ptr (in) : Pointer in record to current object (not used).
25774  * oid (in) : Current object OID.
25775  * class_oid (in) : Current object class OID.
25776  * mvcc_info (in) : Current object MVCC info.
25777  * stop (out) : Set to true if processing record should stop after this function execution.
25778  * args (in/out) : BTREE_SCAN *
25779  */
25780 static int
25781 btree_fk_object_does_exist (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record, char *object_ptr, OID * oid,
25782  OID * class_oid, BTREE_MVCC_INFO * mvcc_info, bool * stop, void *args)
25783 {
25784  BTREE_SCAN *bts = (BTREE_SCAN *) args;
25785  BTREE_FIND_FK_OBJECT *find_fk_obj = NULL;
25786  MVCC_SATISFIES_DELETE_RESULT satisfy_delete;
25787  MVCC_REC_HEADER mvcc_header_for_check_delete;
25788  int lock_result;
25789 
25790  /* Assert expected arguments. */
25791  assert (bts != NULL);
25792  assert (bts->bts_other != NULL);
25793  assert (oid != NULL);
25794  assert (class_oid != NULL && !OID_ISNULL (class_oid));
25795  assert (mvcc_info != NULL);
25796 
25797  find_fk_obj = (BTREE_FIND_FK_OBJECT *) bts->bts_other;
25798 
25799  /* Is object not dirty and lockable? */
25800 
25801  btree_mvcc_info_to_heap_mvcc_header (mvcc_info, &mvcc_header_for_check_delete);
25802  satisfy_delete = mvcc_satisfies_delete (thread_p, &mvcc_header_for_check_delete);
25803  switch (satisfy_delete)
25804  {
25805  case DELETE_RECORD_DELETED:
25807  /* Object is already deleted. It doesn't exist. */
25808  return NO_ERROR;
25809 
25811 #if defined (SERVER_MODE)
25812  /* Recently inserted. This can be ignored, since it is not inserted yet. To successfully insert, the inserter
25813  * should also obtain lock on primary key object (which is already held by current transaction). Current
25814  * transaction can consider that this object doesn't exist yet. */
25815  return NO_ERROR;
25816 #else /* !SERVER_MODE */ /* SA_MODE */
25817  /* Impossible: no other active transactions. */
25818  assert_release (false);
25819  return ER_FAILED;
25820 #endif /* SA_MODE */
25821 
25823 #if defined (SERVER_MODE)
25824  /* Object is being deleted by an active transaction. We have to wait for that transaction to commit. Fall through
25825  * to suspend. */
25826  break;
25827 #else /* !SERVER_MODE */ /* SA_MODE */
25828  /* Impossible: no other active transactions. */
25829  assert_release (false);
25830  return ER_FAILED;
25831 #endif /* SA_MODE */
25832 
25834 #if defined (SERVER_MODE)
25835  /* Try conditional lock */
25836  /* Make sure there is no other object already locked. */
25837  if (!OID_ISNULL (&find_fk_obj->locked_object))
25838  {
25839  if (OID_EQ (&find_fk_obj->locked_object, oid))
25840  {
25841  /* Object already locked. */
25842  assert (lock_has_lock_on_object (oid, class_oid, find_fk_obj->lock_mode) > 0);
25843  lock_result = LK_GRANTED;
25844  }
25845  else
25846  {
25847  /* Current object must be unlocked. */
25848  lock_unlock_object_donot_move_to_non2pl (thread_p, &find_fk_obj->locked_object, class_oid,
25849  find_fk_obj->lock_mode);
25850  OID_SET_NULL (&find_fk_obj->locked_object);
25851  }
25852  }
25853  if (OID_ISNULL (&find_fk_obj->locked_object))
25854  {
25855  /* Get conditional lock. */
25856  lock_result = lock_object (thread_p, oid, class_oid, find_fk_obj->lock_mode, LK_COND_LOCK);
25857  }
25858 #else /* !SERVER_MODE */ /* SA_MODE */
25859  lock_result = LK_GRANTED;
25860 #endif /* SA_MODE */
25861  if (lock_result == LK_GRANTED)
25862  {
25863  /* Object was successfully locked. Stop now. */
25864  COPY_OID (&find_fk_obj->found_oid, oid);
25865  bts->end_scan = true;
25866  *stop = true;
25867 #if defined (SERVER_MODE)
25868  COPY_OID (&find_fk_obj->locked_object, oid);
25869 #endif /* SERVER_MODE */
25870  return NO_ERROR;
25871  }
25872  /* Conditional lock failed. Fall through to suspend. */
25873  break;
25874  }
25875 
25876 #if defined (SA_MODE)
25877  /* Impossible to reach. */
25878  assert_release (false);
25879  return ER_FAILED;
25880 #else /* !SA_MODE */ /* SERVER_MODE */
25881  /* Object may exist but could not be locked conditionally. */
25882  /* Unconditional lock on object. */
25883  /* Must release fixed pages first. */
25884  bts->is_interrupted = true;
25885  pgbuf_unfix_and_init (thread_p, bts->C_page);
25886  if (bts->O_page != NULL)
25887  {
25888  pgbuf_unfix_and_init (thread_p, bts->O_page);
25889  }
25890 
25891  /* Make sure another object was not already fixed. */
25892  if (!OID_ISNULL (&find_fk_obj->locked_object))
25893  {
25894  lock_unlock_object_donot_move_to_non2pl (thread_p, &find_fk_obj->locked_object, class_oid,
25895  find_fk_obj->lock_mode);
25896  OID_SET_NULL (&find_fk_obj->locked_object);
25897  }
25898  lock_result = lock_object (thread_p, oid, class_oid, find_fk_obj->lock_mode, LK_UNCOND_LOCK);
25899  if (lock_result != LK_GRANTED)
25900  {
25901  int error_code = NO_ERROR;
25902 
25903  /* Lock failed. */
25904  error_code = er_errid ();
25905  if (error_code == NO_ERROR)
25906  {
25907  /* Set an error code. */
25908  error_code = ER_CANNOT_GET_LOCK;
25909  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, error_code, 0);
25910  }
25911  return error_code;
25912  }
25913  /* Lock successful. */
25914  /* Key will be checked again just to make sure object was not deleted or moved while current thread was suspended. */
25915  *stop = true;
25916  return NO_ERROR;
25917 #endif /* SERVER_MODE */
25918 }
25919 
25920 /*
25921  * btree_undo_delete_physical () - Undo of physical delete from b-tree. Must insert back object and other required
25922  * information (class OID, MVCC info).
25923  *
25924  * return : Error code.
25925  * thread_p (in) : Thread entry.
25926  * btid (in) : B-tree identifier.
25927  * key (in) : Key value.
25928  * class_oid (in) : Class OID.
25929  * oid (in) : Instance OID.
25930  * mvcc_info (in) : B-tree MVCC information.
25931  * undo_nxlsa (in) : UNDO next lsa for logical compensate.
25932  */
25933 static int
25934 btree_undo_delete_physical (THREAD_ENTRY * thread_p, BTID * btid, DB_VALUE * key, OID * class_oid, OID * oid,
25935  BTREE_MVCC_INFO * mvcc_info, LOG_LSA * undo_nxlsa)
25936 {
25938  {
25939  if (class_oid == NULL)
25940  {
25941  class_oid = (OID *) (&oid_Null_oid);
25942  }
25944  "BTREE_INSERT: Start undo physical delete %d|%d|%d, "
25945  "class_oid %d|%d|%d, insert MVCCID=%llu delete MVCCID=%llu into index (%d, %d|%d).\n",
25946  oid->volid, oid->pageid, oid->slotid, class_oid->volid, class_oid->pageid, class_oid->slotid,
25947  mvcc_info->insert_mvccid, mvcc_info->delete_mvccid, btid->root_pageid, btid->vfid.volid,
25948  btid->vfid.fileid);
25949  }
25950  return btree_insert_internal (thread_p, btid, key, class_oid, oid, SINGLE_ROW_INSERT, NULL, NULL, mvcc_info,
25952 }
25953 
25954 /*
25955  * btree_insert () - Insert new object into b-tree.
25956  *
25957  * return : Error code.
25958  * thread_p (in) : Thread entry.
25959  * btid (in) : B-tree identifier.
25960  * key (in) : Key value.
25961  * cls_oid (in) : Class OID.
25962  * oid (in) : Instance OID.
25963  * op_type (in) : Single-multi row operations.
25964  * unique_stat_info (in) : Statistics collector used multi row operations.
25965  * unique (out) : Outputs if b-tree is unique when not NULL.
25966  * p_mvcc_rec_header (in) : Heap MVCC record header.
25967  */
25968 int
25969 btree_insert (THREAD_ENTRY * thread_p, BTID * btid, DB_VALUE * key, OID * cls_oid, OID * oid, int op_type,
25970  btree_unique_stats * unique_stat_info, int *unique, MVCC_REC_HEADER * p_mvcc_rec_header)
25971 {
25973 
25974  /* Assert expected arguments. */
25975  assert (oid != NULL);
25976 
25977  if (p_mvcc_rec_header != NULL)
25978  {
25979 #if !defined (SERVER_MODE)
25980  assert_release (false);
25981 #endif /* SERVER_MODE */
25982  btree_mvcc_info_from_heap_mvcc_header (p_mvcc_rec_header, &mvcc_info);
25983  }
25984 
25986  {
25987  if (cls_oid == NULL)
25988  {
25989  cls_oid = (OID *) (&oid_Null_oid);
25990  }
25992  "BTREE_INSERT: Start insert object %d|%d|%d, class_oid %d|%d|%d, insert MVCCID=%llu into "
25993  "index (%d, %d|%d), op_type=%d.\n", oid->volid, oid->pageid, oid->slotid, cls_oid->volid,
25994  cls_oid->pageid, cls_oid->slotid,
25995  p_mvcc_rec_header != NULL ? MVCC_GET_INSID (p_mvcc_rec_header) : MVCCID_ALL_VISIBLE,
25996  btid->root_pageid, btid->vfid.volid, btid->vfid.fileid, op_type);
25997  }
25998 
25999  /* Safe guard. */
26000  assert (!BTREE_MVCC_INFO_IS_DELID_VALID (&mvcc_info));
26001 
26002  return btree_insert_internal (thread_p, btid, key, cls_oid, oid, op_type, unique_stat_info, unique, &mvcc_info, NULL,
26004 }
26005 
26006 /*
26007  * btree_mvcc_delete () - MVCC logical delete. Adds delete MVCCID to an existing object.
26008  *
26009  * return : Error code.
26010  * thread_p (in) : Thread entry.
26011  * btid (in) : B-tree identifier.
26012  * key (in) : Key value.
26013  * cls_oid (in) : Class OID.
26014  * oid (in) : Instance OID.
26015  * op_type (in) : Single-multi row operations.
26016  * unique_stat_info (in) : Statistics collector used multi row operations.
26017  * unique (out) : Outputs if b-tree is unique when not NULL.
26018  * p_mvcc_rec_header (in) : Heap MVCC record header.
26019  */
26020 int
26021 btree_mvcc_delete (THREAD_ENTRY * thread_p, BTID * btid, DB_VALUE * key, OID * class_oid, OID * oid, int op_type,
26022  btree_unique_stats * unique_stat_info, int *unique, MVCC_REC_HEADER * p_mvcc_rec_header)
26023 {
26025 
26026  /* Assert expected arguments. */
26027  assert (oid != NULL);
26028  assert (p_mvcc_rec_header != NULL);
26029 
26031  {
26032  if (class_oid == NULL)
26033  {
26034  class_oid = (OID *) (&oid_Null_oid);
26035  }
26037  "BTREE_INSERT: Start MVCC delete object %d|%d|%d, class_oid %d|%d|%d, delete MVCCID=%llu into "
26038  "index (%d, %d|%d), op_type=%d.\n", oid->volid, oid->pageid, oid->slotid, class_oid->volid,
26039  class_oid->pageid, class_oid->slotid, MVCC_GET_DELID (p_mvcc_rec_header), btid->root_pageid,
26040  btid->vfid.volid, btid->vfid.fileid, op_type);
26041  }
26042 
26043  btree_mvcc_info_from_heap_mvcc_header (p_mvcc_rec_header, &mvcc_info);
26044 
26045  /* Safe guard. */
26046  assert (BTREE_MVCC_INFO_IS_DELID_VALID (&mvcc_info));
26047 
26048  return btree_insert_internal (thread_p, btid, key, class_oid, oid, op_type, unique_stat_info, unique, &mvcc_info,
26050 }
26051 
26052 /*
26053  * btree_insert_internal () - Generic index function that inserts new data in a b-tree key.
26054  *
26055  * return : Error code.
26056  * thread_p (in) : Thread entry.
26057  * btid (in) : B-tree identifier.
26058  * key (in) : Key value.
26059  * class_oid (in) : Class OID.
26060  * oid (in) : Instance OID.
26061  * op_type (in) : Single/Multi row operation type.
26062  * unique_stat_info (in/out) : Unique stats info, used to track changes during multi-update operation.
26063  * unique (out) : Outputs true if index was unique, false otherwise.
26064  * mvcc_info (in) : B-tree MVCC information.
26065  * undo_nxlsa (in) : UNDO next lsa for logical compensate.
26066  * purpose (in) : B-tree insert purpose
26067  */
26068 static int
26069 btree_insert_internal (THREAD_ENTRY * thread_p, BTID * btid, DB_VALUE * key, OID * class_oid, OID * oid, int op_type,
26070  btree_unique_stats * unique_stat_info, int *unique, BTREE_MVCC_INFO * mvcc_info,
26071  LOG_LSA * undo_nxlsa, BTREE_OP_PURPOSE purpose)
26072 {
26073  int error_code = NO_ERROR; /* Error code. */
26074  BTID_INT btid_int; /* B-tree info. */
26075  /* Search key helper which will point to where data should inserted. */
26077  /* Insert helper. */
26078  BTREE_INSERT_HELPER insert_helper;
26079  /* Processing key function: can insert an object or just a delete MVCCID. */
26080  BTREE_PROCESS_KEY_FUNCTION *key_insert_func = NULL;
26081 
26082  /* Assert expected arguments. */
26083  assert (btid != NULL);
26084  assert (oid != NULL);
26085  /* Assert class OID is valid or not required; not required for undo delete */
26086  assert (purpose == BTREE_OP_INSERT_UNDO_PHYSICAL_DELETE || (class_oid != NULL && !OID_ISNULL (class_oid)));
26087 
26088  PERF_UTIME_TRACKER_START (thread_p, &insert_helper.time_track);
26089 
26090  /* Save OID, class OID and MVCC info in insert helper. */
26091  COPY_OID (BTREE_INSERT_OID (&insert_helper), oid);
26092  if (class_oid != NULL)
26093  {
26094  COPY_OID (BTREE_INSERT_CLASS_OID (&insert_helper), class_oid);
26095  }
26096  else
26097  {
26098  OID_SET_NULL (BTREE_INSERT_CLASS_OID (&insert_helper));
26099  }
26100  *BTREE_INSERT_MVCC_INFO (&insert_helper) = *mvcc_info;
26101 
26102  /* Is key NULL? */
26103  insert_helper.is_null = key == NULL || DB_IS_NULL (key) || btree_multicol_key_is_null (key);
26104 
26105  /* Set key function. */
26106  switch (purpose)
26107  {
26109  /* Set undo_nxlsa. */
26110  assert (undo_nxlsa != NULL);
26111  LSA_COPY (&insert_helper.compensate_undo_nxlsa, undo_nxlsa);
26112  /* Fall through. */
26114  key_insert_func = btree_key_insert_new_object;
26115  break;
26118 #if defined (SA_MODE)
26119  /* We should not be here */
26120  assert (false);
26121 #endif /* SA_MODE */
26122  key_insert_func = btree_key_find_and_insert_delete_mvccid;
26123  break;
26124  default:
26125  assert (false);
26126  return ER_FAILED;
26127  }
26128  insert_helper.purpose = purpose;
26129 
26130  /* Set operation type. */
26131  insert_helper.op_type = op_type;
26132  /* Set unique stats info. */
26133  insert_helper.unique_stats_info = unique_stat_info;
26134 
26135  /* Do we log the operations? Use for debug only. */
26137 
26138  /* Is this an unique index and is operation type MULTI_ROW_UPDATE? Unique constraint violation will be treated
26139  * slightly different. */
26140  insert_helper.is_unique_multi_update = unique_stat_info != NULL && op_type == MULTI_ROW_UPDATE;
26141  /* Is HA enabled? The above exception will no longer apply. */
26142  insert_helper.is_ha_enabled = !HA_DISABLED ();
26143 
26144  /* Add more insert_helper initialization here. */
26145 
26146  /* Search for key leaf page and insert data. */
26147  error_code =
26148  btree_search_key_and_apply_functions (thread_p, btid, &btid_int, key, btree_fix_root_for_insert, &insert_helper,
26149  btree_split_node_and_advance, &insert_helper, key_insert_func, &insert_helper,
26150  &search_key, NULL);
26151 
26152  /* Free allocated resources. */
26153  if (insert_helper.printed_key != NULL)
26154  {
26155  db_private_free (thread_p, insert_helper.printed_key);
26156  }
26157 
26158 #if defined (SERVER_MODE)
26159  /* Saved locked OID keeps the object that used to be first in unique key before new object was inserted. In either
26160  * case, if error occurred or if inserting new object was successful, keeping this lock is no longer necessary. If
26161  * insert was successful, key is protected by the newly inserted object, which is already locked. */
26162  if (!OID_ISNULL (&insert_helper.saved_locked_oid))
26163  {
26164  lock_unlock_object_donot_move_to_non2pl (thread_p, &insert_helper.saved_locked_oid,
26165  &insert_helper.saved_locked_class_oid, X_LOCK);
26166  }
26167 #endif /* SERVER_MODE */
26168 
26169  if (error_code != NO_ERROR)
26170  {
26171  ASSERT_ERROR ();
26172  return error_code;
26173  }
26174 
26176 
26177  if (unique != NULL)
26178  {
26179  /* Output unique. */
26180  *unique = (BTREE_IS_UNIQUE (btid_int.unique_pk) && error_code == NO_ERROR) ? 1 : 0;
26181  }
26182 
26183  if (BTREE_IS_UNIQUE (btid_int.unique_pk) && insert_helper.is_unique_multi_update && !insert_helper.is_ha_enabled
26184  && !insert_helper.is_unique_key_added_or_deleted)
26185  {
26186  assert (op_type == MULTI_ROW_UPDATE);
26187  assert (unique_stat_info != NULL);
26188 
26189  /* Key was not inserted/deleted. Correct unique_stat_info (which assumed that key will be inserted/deleted). */
26190  if (purpose == BTREE_OP_INSERT_NEW_OBJECT)
26191  {
26192  // revert
26193  unique_stat_info->delete_key_and_row ();
26194  // insert just row
26195  unique_stat_info->add_row ();
26196  }
26197  else if (purpose == BTREE_OP_INSERT_MVCC_DELID || purpose == BTREE_OP_INSERT_MARK_DELETED)
26198  {
26199  // revert
26200  unique_stat_info->insert_key_and_row ();
26201  // delete only row
26202  unique_stat_info->delete_row ();
26203  }
26204  else
26205  {
26206  /* Unexpected. */
26207  assert_release (false);
26208  return ER_FAILED;
26209  }
26210  }
26211  if (insert_helper.is_unique_multi_update && !insert_helper.is_ha_enabled)
26212  {
26213  btree_insert_log (&insert_helper, "BTREE UNIQUE MULTI-UPDATE STATS: %s \n"
26215  "\t" BTREE_ID_MSG "\n"
26216  "\t" "%s: new stats = %lld keys, %lld objects, %lld nulls.",
26217  (btree_is_insert_object_purpose (insert_helper.purpose)) ? "Insert" : "MVCC Delete",
26218  BTREE_INSERT_HELPER_AS_ARGS (&insert_helper), BTID_AS_ARGS (btid_int.sys_btid),
26219  (btree_is_insert_object_purpose (insert_helper.purpose)) ?
26220  (insert_helper.is_unique_key_added_or_deleted ? "Added new key" : "Did not add new key") :
26221  (insert_helper.is_unique_key_added_or_deleted) ? "Removed key" : "Did not remove key",
26222  unique_stat_info->get_key_count (), unique_stat_info->get_row_count (),
26223  unique_stat_info->get_null_count ());
26224  }
26225 
26226  return NO_ERROR;
26227 }
26228 
26229 /*
26230  * btree_fix_root_for_insert () - BTREE_ROOT_WITH_KEY_FUNCTION - fix root before inserting data in b-tree.
26231  *
26232  * return : Error code.
26233  * thread_p (in) : Thread entry.
26234  * btid (in) : B-tree identifier.
26235  * btid_int (out) : BTID_INT (B-tree data).
26236  * key (in) : Key value.
26237  * root_page (out) : Output b-tree root page.
26238  * is_leaf (out) : Output true if root is leaf page.
26239  * search_key (out) : Output key search result (if root is also leaf).
26240  * stop (out) : Output true if advancing in b-tree should stop.
26241  * restart (out) : Output true if advancing in b-tree should be restarted.
26242  * other_args (in/out) : BTREE_INSERT_HELPER *.
26243  *
26244  * NOTE: Besides fixing root page, this function can also modify the root header. This must be done only once.
26245  */
26246 static int
26247 btree_fix_root_for_insert (THREAD_ENTRY * thread_p, BTID * btid, BTID_INT * btid_int, DB_VALUE * key,
26248  PAGE_PTR * root_page, bool * is_leaf, BTREE_SEARCH_KEY_HELPER * search_key, bool * stop,
26249  bool * restart, void *other_args)
26250 {
26251  BTREE_INSERT_HELPER *insert_helper = (BTREE_INSERT_HELPER *) other_args;
26252  BTREE_ROOT_HEADER *root_header = NULL;
26253  OID *notification_class_oid;
26254  int error_code;
26255  int key_len;
26256 
26257  /* Assert expected arguments. */
26258  assert (insert_helper != NULL);
26259  assert (root_page != NULL && *root_page == NULL);
26260  assert (btid != NULL);
26261  assert (btid_int != NULL);
26262  assert (search_key != NULL);
26263 
26264  /* Possible insert data operations: 1. Insert a new object along with other necessary informations (class OID and/or
26265  * insert MVCCID. 2. Undo of physical delete. If an object is physically removed from b-tree and operation must be
26266  * undone, the object with all its additional information existing before delete must be inserted. 3. Logical delete,
26267  * which inserts a delete MVCCID. */
26268  assert (btree_is_insert_data_purpose (insert_helper->purpose));
26269 
26270  /* Fixing root page. */
26271  insert_helper->is_root = true;
26272  if (insert_helper->is_first_try)
26273  {
26274  /* Fix root and get header/b-tree info to do some additional operations on b-tree. */
26275  *root_page =
26276  btree_fix_root_with_info (thread_p, btid, insert_helper->nonleaf_latch_mode, NULL, &root_header, btid_int);
26277  if (*root_page == NULL)
26278  {
26279  ASSERT_ERROR_AND_SET (error_code);
26280  goto error;
26281  }
26282  }
26283  else
26284  {
26285  /* Just fix root page. */
26286  *root_page = btree_fix_root_with_info (thread_p, btid, insert_helper->nonleaf_latch_mode, NULL, NULL, NULL);
26287  if (*root_page == NULL)
26288  {
26289  ASSERT_ERROR_AND_SET (error_code);
26290  goto error;
26291  }
26292  /* Root fixed. */
26293  /* Reset other flags relevant for traversal. */
26294  insert_helper->is_crt_node_write_latched = false;
26295  insert_helper->need_update_max_key_len = false;
26296  return NO_ERROR;
26297  }
26298  assert (*root_page != NULL);
26299  assert (root_header != NULL);
26300  assert (insert_helper->is_null
26302 
26303  /* Do additional operations. */
26304  /* Next time this function is called, it must be just a restart of b-tree traversal and the additional operations
26305  * must not be executed again. Mark insert_helper to know to skip this part. */
26306  insert_helper->is_first_try = false;
26307 
26308  /* Set complete domain for MIDXKEYS. */
26309  if (key != NULL && DB_VALUE_DOMAIN_TYPE (key) == DB_TYPE_MIDXKEY)
26310  {
26311  key->data.midxkey.domain = btid_int->key_type;
26312  }
26313  if (insert_helper->log_operations && insert_helper->printed_key == NULL)
26314  {
26315  /* This is postponed here to make sure midxkey domain was initialized. */
26316  insert_helper->printed_key = pr_valstring (key);
26317  (void) SHA1Compute ((unsigned char *) insert_helper->printed_key, strlen (insert_helper->printed_key),
26318  &insert_helper->printed_key_sha1);
26319  }
26320 
26321  if (insert_helper->purpose == BTREE_OP_INSERT_UNDO_PHYSICAL_DELETE
26322  || insert_helper->purpose == BTREE_OP_ONLINE_INDEX_UNDO_TRAN_DELETE)
26323  {
26324  /* Stop here. */
26325  /* Code after this: 1. Update unique statistics. In this case, they are updated by undone log records. 2. Create
26326  * overflow key file. Undoing a physical delete cannot create overflow file (the file should already exist if
26327  * that's the case). */
26328  /* Safe guard: there should be no physical delete of NULL keys. */
26329  assert (!insert_helper->is_null);
26330 
26331  if (BTREE_IS_UNIQUE (btid_int->unique_pk) && OID_ISNULL (BTREE_INSERT_CLASS_OID (insert_helper)))
26332  {
26333  /* Top class OID is not packed for recovery. Save it here. */
26334  COPY_OID (BTREE_INSERT_CLASS_OID (insert_helper), &btid_int->topclass_oid);
26335  }
26336  key_len = btree_get_disk_size_of_key (key);
26337  insert_helper->key_len_in_page = BTREE_GET_KEY_LEN_IN_PAGE (key_len);
26338  return NO_ERROR;
26339  }
26340 
26341  /* Update nulls, oids, keys statistics for unique indexes. */
26342  /* If transaction is not active (being rolled back), statistics don't need manual updating. They will be reverted
26343  * automatically by undo logs. NOTE that users to see the header statistics may have the transient values. */
26344  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
26345  {
26346  btree_unique_stats incr;
26347 
26348  if (insert_helper->purpose == BTREE_OP_INSERT_MVCC_DELID
26349  || insert_helper->purpose == BTREE_OP_INSERT_MARK_DELETED)
26350  {
26351  /* Object is being logically deleted. */
26352  if (insert_helper->is_null)
26353  {
26354  incr.delete_null_and_row ();
26355  }
26356  else
26357  {
26358  incr.delete_key_and_row ();
26359  }
26360  }
26361  else
26362  {
26363  /* Object is being inserted. */
26364  if (insert_helper->is_null)
26365  {
26366  incr.insert_null_and_row ();
26367  }
26368  else
26369  {
26370  incr.insert_key_and_row ();
26371  }
26372  }
26373 
26374  /* Update statistics. */
26375  /* Based on type of operation - single or multi, update the unique_stats_info structure or update the transaction
26376  * collected statistics. They will be reflected into global statistics later. */
26377  if (BTREE_IS_MULTI_ROW_OP (insert_helper->op_type))
26378  {
26379  /* Update unique_stats_info. */
26380  if (insert_helper->unique_stats_info == NULL)
26381  {
26382  assert_release (false);
26383  error_code = ER_FAILED;
26384  goto error;
26385  }
26386  (*insert_helper->unique_stats_info) += incr;
26387  }
26388  else
26389  {
26390  /* Update transactions collected statistics. */
26391  if (!btree_is_online_index_loading (insert_helper->purpose))
26392  {
26393  error_code = logtb_tran_update_unique_stats (thread_p, *btid, incr, true);
26394  if (error_code != NO_ERROR)
26395  {
26396  ASSERT_ERROR ();
26397  goto error;
26398  }
26399  }
26400  }
26401  }
26402 
26403  if (insert_helper->is_null)
26404  {
26405  /* Stop here. Object is not inserted in b-tree. */
26406  *stop = true;
26407  pgbuf_unfix_and_init (thread_p, *root_page);
26408 
26409  btree_insert_log (insert_helper, "A NULL object was %s. \n" BTREE_INSERT_HELPER_MSG ("\t")
26410  "\t" BTREE_ID_MSG,
26411  (insert_helper->purpose == BTREE_OP_INSERT_MVCC_DELID
26412  || insert_helper->purpose == BTREE_OP_INSERT_MARK_DELETED) ? "deleted" : "inserted",
26413  BTREE_INSERT_HELPER_AS_ARGS (insert_helper), BTID_AS_ARGS (btid_int->sys_btid));
26414  return NO_ERROR;
26415  }
26416 
26417  if (insert_helper->purpose == BTREE_OP_INSERT_MVCC_DELID || insert_helper->purpose == BTREE_OP_INSERT_MARK_DELETED)
26418  {
26419  /* This is a deleted object. From here on, the code is for inserting keys only. */
26420  return NO_ERROR;
26421  }
26422 
26423  assert (btree_is_insert_object_purpose (insert_helper->purpose));
26424 
26425  /* Check if key length is too big and if an overflow key file needs to be created. */
26426  key_len = btree_get_disk_size_of_key (key);
26427  if (key_len >= BTREE_MAX_KEYLEN_INPAGE && VFID_ISNULL (&btid_int->ovfid))
26428  {
26429  /* Promote latch (if required). */
26430  error_code = pgbuf_promote_read_latch (thread_p, root_page, PGBUF_PROMOTE_SHARED_READER);
26431  if (error_code == ER_PAGE_LATCH_PROMOTE_FAIL)
26432  {
26433  /* Unfix and re-fix root page. */
26434  pgbuf_unfix_and_init (thread_p, *root_page);
26435  *root_page = btree_fix_root_with_info (thread_p, btid, PGBUF_LATCH_WRITE, NULL, &root_header, btid_int);
26436  if (*root_page == NULL)
26437  {
26438  ASSERT_ERROR_AND_SET (error_code);
26439  goto error;
26440  }
26441  }
26442  else if (error_code != NO_ERROR)
26443  {
26444  ASSERT_ERROR ();
26445  goto error;
26446  }
26447  else if (*root_page == NULL)
26448  {
26449  ASSERT_ERROR_AND_SET (error_code);
26450  goto error;
26451  }
26452  /* Root is write latched. */
26453  insert_helper->is_crt_node_write_latched = true;
26454  /* Check that overflow key file was not created by another. */
26455  if (VFID_ISNULL (&btid_int->ovfid))
26456  {
26457  /* Create overflow key file. */
26458 
26459  /* Start a system operation. */
26460  log_sysop_start (thread_p);
26461 
26462  /* Create file. */
26463  error_code = btree_create_overflow_key_file (thread_p, btid_int);
26464  if (error_code != NO_ERROR)
26465  {
26466  ASSERT_ERROR ();
26467  log_sysop_abort (thread_p);
26468  goto error;
26469  }
26470 
26471  /* Notification. */
26472  if (!OID_ISNULL (BTREE_INSERT_CLASS_OID (insert_helper)))
26473  {
26474  notification_class_oid = BTREE_INSERT_CLASS_OID (insert_helper);
26475  }
26476  else
26477  {
26478  notification_class_oid = &btid_int->topclass_oid;
26479  }
26480  BTREE_SET_CREATED_OVERFLOW_KEY_NOTIFICATION (thread_p, key, BTREE_INSERT_OID (insert_helper),
26481  notification_class_oid, btid, NULL);
26482 
26483  /* Change the root header. */
26484  log_append_undoredo_data2 (thread_p, RVBT_UPDATE_OVFID, &btid_int->sys_btid->vfid, *root_page, HEADER,
26485  sizeof (VFID), sizeof (VFID), &root_header->ovfid, &btid_int->ovfid);
26486  VFID_COPY (&root_header->ovfid, &btid_int->ovfid);
26487  pgbuf_set_dirty (thread_p, *root_page, DONT_FREE);
26488 
26489  /* Finish system operation. */
26490  log_sysop_commit (thread_p);
26491  }
26492  }
26493 
26494  insert_helper->key_len_in_page = BTREE_GET_KEY_LEN_IN_PAGE (key_len);
26495 
26496  /* Success. */
26497  return NO_ERROR;
26498 
26499 error:
26500  assert (error_code != NO_ERROR);
26501  ASSERT_ERROR ();
26502  if (*root_page != NULL)
26503  {
26504  pgbuf_unfix_and_init (thread_p, *root_page);
26505  }
26506  return error_code;
26507 }
26508 
26509 /*
26510  * btree_get_max_new_data_size () - Get new data size required based on node type and operation.
26511  *
26512  * return : Maximum require size for operation.
26513  * thread_p (in) : Thread entry.
26514  * btid_int (in) : B-tree info.
26515  * page (in) : B-tree node.
26516  * node_type (in) : B-tree node type.
26517  * key_len (in) : Key length to be considered for new entries.
26518  * helper (in) : B-tree insert helper.
26519  * known_to_be_found (in) : True if key was searched and found.
26520  */
26521 static int
26523  int key_len, BTREE_INSERT_HELPER * helper, bool known_to_be_found)
26524 {
26525  assert (btid_int != NULL);
26526  assert (page != NULL);
26527  assert (node_type == BTREE_NON_LEAF_NODE || node_type == BTREE_LEAF_NODE);
26528  assert (key_len > 0);
26529  assert (helper != NULL);
26530 
26531  if (node_type == BTREE_NON_LEAF_NODE)
26532  {
26533  return NON_LEAF_ENTRY_MAX_SIZE (key_len) + spage_slot_size ();
26534  }
26535 
26536  /* TODO: We can always know if key is found for leaf nodes. */
26537  switch (helper->purpose)
26538  {
26545  if (known_to_be_found)
26546  {
26547  /* Possible inserted data: 1. New object (consider maximum size including all info). 2. Link to overflow page
26548  * (and setting first object to max size). In worst case scenario it will insert same data as a fixed size
26549  * object. */
26550  return BTREE_OBJECT_FIXED_SIZE (btid_int);
26551  }
26552  else
26553  {
26554  /* A new entry max size (including new slot). */
26555  return LEAF_ENTRY_MAX_SIZE (key_len) + spage_slot_size ();
26556  }
26557 
26560  /* Always a delete MVCCID is added. */
26561  return OR_MVCCID_SIZE;
26562 
26563  default:
26564  /* Unhandled. */
26565  assert_release (false);
26566  return ER_FAILED;
26567  }
26568 }
26569 
26570 /*
26571  * btree_split_node_and_advance () - BTREE_ADVANCE_WITH_KEY_FUNCTION used by btree_insert_internal while advancing
26572  * following key. It also has the role to make sure b-tree has enough space to
26573  * insert new data.
26574  *
26575  * return : Error code.
26576  * thread_p (in) : Thread entry.
26577  * btid_int (in) : B-tree data.
26578  * key (in) : Search key value.
26579  * crt_page (in) : Page of current node.
26580  * advance_to_page (out) : Fixed page of child node found by following key.
26581  * is_leaf (out) : Output true if current page is leaf node.
26582  * key_slotid (out) : Output slotid of key if found, otherwise NULL_SLOTID.
26583  * stop (out) : Output true if advancing in b-tree should be stopped.
26584  * restart (out) : Output true if advancing in b-tree should be restarted from top.
26585  * other_args (in/out) : BTREE_INSERT_HELPER *.
26586  */
26587 static int
26588 btree_split_node_and_advance (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key, PAGE_PTR * crt_page,
26589  PAGE_PTR * advance_to_page, bool * is_leaf, BTREE_SEARCH_KEY_HELPER * search_key,
26590  bool * stop, bool * restart, void *other_args)
26591 {
26592  /* Insert helper: used to store insert specific data that can be used during the call off
26593  * btree_search_key_and_apply_functions. */
26594  BTREE_INSERT_HELPER *insert_helper = (BTREE_INSERT_HELPER *) other_args;
26595  int max_new_data_size = 0; /* The maximum possible size of data to be inserted in current node. */
26596  int max_key_len = 0; /* Maximum key length of a new entry in node. */
26597  int error_code = NO_ERROR; /* Error code. */
26598  int key_count = 0; /* Node key count. */
26599  BTREE_NODE_TYPE node_type; /* Node type. */
26600  BTREE_NODE_HEADER *node_header = NULL; /* Node header. */
26601  /* VPID's of newly allocated pages for split. Both can be used if the root is split, only first is used if non-root
26602  * node is split. */
26603  VPID new_page_vpid1 = VPID_INITIALIZER, new_page_vpid2 = VPID_INITIALIZER;
26604  VPID child_vpid = VPID_INITIALIZER; /* VPID of child (based on the direction set by key). */
26605  VPID *crt_vpid = NULL; /* VPID pointer for current node. */
26606  VPID advance_vpid = VPID_INITIALIZER; /* VPID to advance to (hinted by split functions). */
26607  PAGE_PTR child_page = NULL; /* Page pointer of child node. */
26608  PAGE_PTR new_page1 = NULL, new_page2 = NULL; /* Page pointers to newly allocated pages. Both can be used if root is
26609  * split, only first is used if non-root node is split. */
26610  PGSLOTID child_slotid; /* Slot ID that points to child node. */
26611  bool is_new_key_possible = false; /* Set to true if insert operation can add new keys to b-tree (and not just
26612  * data in existing keys). */
26613  bool need_split = false; /* Set to true if split is required. */
26614  bool need_update_max_key_len = false; /* Set to true if the node max key length must be updated to the length of new
26615  * key. */
26616  bool is_system_op_started = false; /* Set to true when a system operations is started. Set to false when it is
26617  * ended. Used to properly end system operation in case of errors. */
26618  bool is_child_leaf = false; /* Set to true when current node fathers a leaf node. It is treated differently and it
26619  * must be determined before fixing the child node page. */
26620 
26621 #if !defined (NDEBUG)
26622  int parent_max_key_len = 0; /* Used by debug to check the rule that parent max key length is always bigger or equal
26623  * to child max key length. */
26624  int parent_node_level = 0; /* Used by debug to check that level of parent node is always the incremented value of
26625  * level of child node. */
26626 #endif /* !NDEBUG */
26627 
26628  LOG_LSA save_lsa = LSA_INITIALIZER;
26629  LOG_LSA save_child_lsa = LSA_INITIALIZER;
26630 
26631  /* Assert expected arguments. */
26632  assert (btid_int != NULL);
26633  assert (key != NULL && !DB_IS_NULL (key) && !btree_multicol_key_is_null (key));
26634  assert (crt_page != NULL && *crt_page != NULL);
26635  assert (advance_to_page != NULL && *advance_to_page == NULL);
26636  assert (is_leaf != NULL);
26637  assert (search_key != NULL);
26638  assert (stop != NULL);
26639  assert (restart != NULL);
26640  assert (insert_helper != NULL);
26641 
26642  page_key_boundary *page_boundaries =
26643  (insert_helper->insert_list != NULL && insert_helper->insert_list->m_use_page_boundary_check)
26644  ? &insert_helper->insert_list->m_boundaries : NULL;
26645 
26646 #if defined (SERVER_MODE)
26647  if (LOG_ISRESTARTED ()
26648  && (insert_helper->purpose == BTREE_OP_INSERT_NEW_OBJECT || insert_helper->purpose == BTREE_OP_INSERT_MVCC_DELID))
26649  {
26650  /* vacuum will probably follow same path */
26651  pgbuf_notify_vacuum_follows (thread_p, *crt_page);
26652  }
26653 #endif /* SERVER_MODE */
26654 
26655  /* Get informations on current node. */
26656  /* Node header. */
26657  node_header = btree_get_node_header (thread_p, *crt_page);
26658  if (node_header == NULL)
26659  {
26660  assert_release (false);
26661  error_code = ER_FAILED;
26662  goto error;
26663  }
26664  /* Leaf/Non-leaf node type. */
26665  node_type = (node_header->node_level == 1) ? BTREE_LEAF_NODE : BTREE_NON_LEAF_NODE;
26666  /* Key count. */
26667  key_count = btree_node_number_of_keys (thread_p, *crt_page);
26668  /* Safe guard. */
26669  assert (key_count >= 0);
26670  assert (key_count > 0 || node_type == BTREE_LEAF_NODE);
26671 
26672  /* Is new key possible? True if inserting new object or if undoing the removal of some key/object. */
26673  is_new_key_possible = btree_is_insert_object_purpose (insert_helper->purpose);
26674 
26675  /* Split algorithm: There are two types of splits: root split and normal split. 1. Root split: If there is not enough
26676  * space for new data in root, split it into three nodes: two nodes containing all previous entries and a new root
26677  * node to point to these nodes. This will increase the b-tree level. After split, function will continue with one
26678  * of the two newly created nodes. 2. Normal split: If child doesn't have enough space to handle new insert data,
26679  * split it into two nodes and update current node entries. Current node will gain one additional entry, so the split
26680  * algorithm should make sure it always has enough space before checking its children. One of the two children
26681  * resulted from the split will be chosen to advance to. */
26682  /* Part of split algorithm is to keep the maximum key length for each node. The value kept by a node is actually the
26683  * maximum length of all keys found in all leaf pages of the sub-tree fathered by current node. One resulting rule is
26684  * that the maximum key length value for one node is always bigger than or equal to the maximum key length value for
26685  * any of its children. Maximum key length is used to estimate the size of future entries (in a defensive way). */
26686  /* NOTE 1: To optimize b-tree access, the algorithm assumes that no change is required and READ latch on nodes is
26687  * normally used. However if max key length must be update or if split is needed, latches must then be promoted to
26688  * write/exclusive. Promotion may sometimes fail (e.g. when there is already another promoter in waiting list). This
26689  * case is considered to be an exceptional and rare case, therefore it is allowed to restart b-tree traversal.
26690  * However, to avoid repeating traversals indefinetly, the second traversal is done using exclusive latches (which
26691  * should guarantee success). NOTE 2: Leaf nodes are always latched using exclusive latches (because they are changed
26692  * almost every time and using promotion can actually lead to poor performance). NOTE 3: Promotion of current page is
26693  * always done using ONLY_READER. This prevents dead-latches between three or more threads like the following: T1:
26694  * Holds READ on P1 and P2, waits on T2 for P1 promotion T2: Holds READ on P1, waits on T3 for READ on P2 T3: Holds
26695  * READ on P2 and another page, waits on T1 for P2 promotion */
26696 
26697  /* Root case. */
26698  if (insert_helper->is_root)
26699  {
26700  /* Check if root needs to split or update max key length. */
26701 
26702  /* Updating max key length is possible if new key can be added and if the length of the new key exceeds the
26703  * current max key length. */
26704  need_update_max_key_len = is_new_key_possible && insert_helper->key_len_in_page > node_header->max_key_len;
26705 
26706  /* Get maximum key length for a possible new entry in node. */
26707  max_key_len = need_update_max_key_len ? insert_helper->key_len_in_page : node_header->max_key_len;
26708 
26709  /* Compute the maximum possible size of data being inserted in node. */
26710  max_new_data_size =
26711  btree_get_max_new_data_size (thread_p, btid_int, *crt_page, node_type, max_key_len, insert_helper, false);
26712 
26713  /* Split is needed if there is a risk that inserted data doesn't fit the root node. */
26714  need_split = (max_new_data_size > spage_get_free_space_without_saving (thread_p, *crt_page, NULL));
26715 
26716  /* If root node should suffer changes, its latch must be promoted to exclusive. */
26717  if (insert_helper->nonleaf_latch_mode == PGBUF_LATCH_READ
26718  /* root has read. */
26719  && (need_split || need_update_max_key_len || node_type == BTREE_LEAF_NODE)
26720  /* and requires write. */ )
26721  {
26722  /* Promote latch. */
26723  error_code = pgbuf_promote_read_latch (thread_p, crt_page, PGBUF_PROMOTE_SHARED_READER);
26724  if (error_code == ER_PAGE_LATCH_PROMOTE_FAIL)
26725  {
26726  /* Retry fix with write latch. */
26727  pgbuf_unfix_and_init (thread_p, *crt_page);
26728  insert_helper->nonleaf_latch_mode = PGBUF_LATCH_WRITE;
26729  *restart = true;
26730  return NO_ERROR;
26731  }
26732  else if (error_code != NO_ERROR)
26733  {
26734  ASSERT_ERROR ();
26735  goto error;
26736  }
26737  else if (*crt_page == NULL)
26738  {
26739  ASSERT_ERROR_AND_SET (error_code);
26740  goto error;
26741  }
26743  insert_helper->is_crt_node_write_latched = true;
26744  }
26745 
26746  if (need_update_max_key_len)
26747  {
26749 
26750  save_lsa = *pgbuf_get_lsa (*crt_page);
26751 
26752  /* Update max key length. */
26753  node_header->max_key_len = insert_helper->key_len_in_page;
26754  error_code = btree_change_root_header_delta (thread_p, &btid_int->sys_btid->vfid, *crt_page, 0, 0, 0);
26755  if (error_code != NO_ERROR)
26756  {
26757  ASSERT_ERROR ();
26758  goto error;
26759  }
26760  pgbuf_set_dirty (thread_p, *crt_page, DONT_FREE);
26761 
26762  btree_insert_log (insert_helper, "Update max_key_length to %d. \n"
26763  "\t" PGBUF_PAGE_MODIFY_MSG ("root page") "\n" "\t" BTREE_ID_MSG,
26764  node_header->max_key_len, PGBUF_PAGE_MODIFY_ARGS (*crt_page, &save_lsa),
26765  BTID_AS_ARGS (btid_int->sys_btid));
26766 
26767  /* If this node required to update its max key length, then also the children we meet will require to update
26768  * their max key length. (rule being that parent->max_key_len >= child->max_key_len). */
26769  insert_helper->need_update_max_key_len = true;
26770  /* Set insert_helper->is_crt_node_write_latched to avoid trying promotion on current node. */
26771  insert_helper->is_crt_node_write_latched = true;
26772  }
26773 
26774  if (need_split)
26775  {
26777 
26778  /* Split root node. */
26779  assert (key_count >= 3);
26780 
26781  /* Start system operation. */
26782  log_sysop_start (thread_p);
26783  is_system_op_started = true;
26784 
26785  /* Create two new b-tree pages. */
26786  /* First page. */
26787  crt_vpid = pgbuf_get_vpid_ptr (*crt_page);
26788  new_page1 = btree_get_new_page (thread_p, btid_int, &new_page_vpid1, crt_vpid);
26789  if (new_page1 == NULL)
26790  {
26791  ASSERT_ERROR_AND_SET (error_code);
26792  goto error;
26793  }
26794  /* Second page. */
26795  new_page2 = btree_get_new_page (thread_p, btid_int, &new_page_vpid2, crt_vpid);
26796  if (new_page2 == NULL)
26797  {
26798  ASSERT_ERROR_AND_SET (error_code);
26799  goto error;
26800  }
26801 
26802  save_lsa = *pgbuf_get_lsa (*crt_page);
26803 
26804  /* Split the root. */
26805  error_code =
26806  btree_split_root (thread_p, btid_int, *crt_page, new_page1, new_page2, crt_vpid, &new_page_vpid1,
26807  &new_page_vpid2, node_type, key, insert_helper, &advance_vpid);
26808  if (error_code != NO_ERROR)
26809  {
26810  ASSERT_ERROR ();
26811  goto error;
26812  }
26813 
26814  btree_insert_log (insert_helper, "Split root page and create two children. \n"
26815  PGBUF_PAGE_MODIFY_MSG ("root page") "\n"
26816  "\t" PGBUF_PAGE_STATE_MSG ("left child page") "\n"
26817  "\t" PGBUF_PAGE_STATE_MSG ("right child page") "\n"
26818  "\t" BTREE_ID_MSG, PGBUF_PAGE_MODIFY_ARGS (*crt_page, &save_lsa),
26819  PGBUF_PAGE_STATE_ARGS (new_page1), PGBUF_PAGE_STATE_ARGS (new_page2),
26820  BTID_AS_ARGS (btid_int->sys_btid));
26821 
26822 #if !defined(NDEBUG)
26823  /* Safe guard checks */
26824  (void) spage_check_num_slots (thread_p, *crt_page);
26825  (void) spage_check_num_slots (thread_p, new_page1);
26826  (void) spage_check_num_slots (thread_p, new_page2);
26827 #endif
26828 
26829  /* Unfix root and choose adequate child to advance to (based on given key). The child is hinted by
26830  * btree_split_root through advance_vpid. */
26831  pgbuf_unfix_and_init (thread_p, *crt_page);
26832  insert_helper->is_crt_node_write_latched = false; /* because of unfix */
26833 
26834  /* Which child? */
26835  if (VPID_EQ (&advance_vpid, &new_page_vpid1))
26836  {
26837  /* Go to new page 1. */
26838  /* Unfix the other child. */
26839  pgbuf_unfix_and_init (thread_p, new_page2);
26840 
26841  /* End opened system operation. */
26842  log_sysop_commit (thread_p);
26843  is_system_op_started = false;
26844 
26845  /* Choose child 1 to advance. */
26846  *crt_page = new_page1;
26847  new_page1 = NULL;
26848 
26849  /* Set insert_helper->is_crt_node_write_latched to avoid trying promotion on current node. */
26850  insert_helper->is_crt_node_write_latched = true;
26852  }
26853  else if (VPID_EQ (&advance_vpid, &new_page_vpid2))
26854  {
26855  /* Go to new page 1. */
26856  /* Unfix the other child. */
26857  pgbuf_unfix_and_init (thread_p, new_page1);
26858 
26859  /* End opened system operation. */
26860  log_sysop_commit (thread_p);
26861  is_system_op_started = false;
26862 
26863  /* Choose child 2 to advance. */
26864  *crt_page = new_page2;
26865  new_page2 = NULL;
26866 
26867  insert_helper->is_crt_node_write_latched = true;
26869  }
26870  else
26871  {
26872  /* Impossible! */
26873  assert_release (false);
26874 
26875  pgbuf_unfix_and_init (thread_p, new_page1);
26876  pgbuf_unfix_and_init (thread_p, new_page2);
26877 
26878  /* Error */
26879  error_code = ER_FAILED;
26880  goto error;
26881  }
26882  assert (*crt_page != NULL);
26883 
26884  /* Re-obtain node header. */
26885  node_header = btree_get_node_header (thread_p, *crt_page);
26886  if (node_header == NULL)
26887  {
26888  /* Unexpected. */
26889  assert (false);
26890  error_code = ER_FAILED;
26891  goto error;
26892  }
26893 
26894  node_type = ((node_header->node_level == 1) ? BTREE_LEAF_NODE : BTREE_NON_LEAF_NODE);
26895  insert_helper->is_root = false;
26896  }
26897  assert (node_type != BTREE_LEAF_NODE || pgbuf_get_latch_mode (*crt_page) == PGBUF_LATCH_WRITE);
26898  }
26899 
26900  /* Here, node represented by *crt_page has enough space to handle a child split. Either because it was root and was
26901  * split into two nodes in this call or because it was non-root and was split in previous iteration (if split was
26902  * required of course). */
26903 
26904  if (node_type == BTREE_LEAF_NODE)
26905  {
26907 
26908  /* No other child. Notify called this is a leaf node and return the slot of new key. */
26909  error_code = btree_search_leaf_page (thread_p, btid_int, *crt_page, key, search_key);
26910  if (error_code != NO_ERROR)
26911  {
26912  ASSERT_ERROR ();
26913  goto error;
26914  }
26915  *is_leaf = true;
26916  return NO_ERROR;
26917  }
26918 
26919  /* Node is non-leaf. */
26920  /* Check if the child we would normally advance to from this node requires splitting. Also check if child should
26921  * update its max key length. */
26922 
26923  /* If next child is leaf, it must be treated differently. See NOTE 2,3 from the big comment. */
26924  is_child_leaf = (node_header->node_level == 2);
26925 
26926  /* Find and fix the child to advance to. Use write latch if the child is leaf or if it is already known that it will
26927  * require an update of max key length. */
26928  error_code = btree_search_nonleaf_page (thread_p, btid_int, *crt_page, key, &child_slotid, &child_vpid,
26929  page_boundaries);
26930  if (error_code != NO_ERROR)
26931  {
26932  ASSERT_ERROR ();
26933  goto error;
26934  }
26935  child_page = pgbuf_fix (thread_p, &child_vpid, OLD_PAGE, ((is_child_leaf || insert_helper->need_update_max_key_len)
26936  ? PGBUF_LATCH_WRITE : insert_helper->nonleaf_latch_mode),
26938  if (child_page == NULL)
26939  {
26940  ASSERT_ERROR_AND_SET (error_code);
26941  goto error;
26942  }
26943 
26944 #if !defined (NDEBUG)
26945  /* Save parent max key length and node level. */
26946  parent_max_key_len = node_header->max_key_len;
26947  parent_node_level = node_header->node_level;
26948 #endif
26949 
26950  insert_helper->is_root = false;
26951 
26952  /* Get child node header */
26953  node_header = btree_get_node_header (thread_p, child_page);
26954  if (node_header == NULL)
26955  {
26956  assert (false);
26957  error_code = er_errid ();
26958  goto error;
26959  }
26960  node_type = is_child_leaf ? BTREE_LEAF_NODE : BTREE_NON_LEAF_NODE;
26961  key_count = btree_node_number_of_keys (thread_p, child_page);
26962  assert (key_count >= 0);
26963  assert (key_count > 0 || is_child_leaf);
26964 
26965  assert (parent_max_key_len >= node_header->max_key_len);
26966  assert (parent_node_level == node_header->node_level + 1);
26967 
26968  /* Safe guard: if current node max key length was updated, it should be write latched. So either its max key length
26969  * was not updated or used latch mode must be write mode. */
26970  assert (!insert_helper->need_update_max_key_len || insert_helper->is_crt_node_write_latched);
26971 
26972  /* Make sure page is write latched if insert_helper indicates it is. Otherwise, we may do write-mode operations on
26973  * read-latched page. If insert_helper doesn't indicate the page is write-latched and it is, all we risk to do is an
26974  * unnecessary promote call. */
26975  assert ((!insert_helper->is_crt_node_write_latched && insert_helper->nonleaf_latch_mode != PGBUF_LATCH_WRITE)
26976  || (pgbuf_get_latch_mode (*crt_page) == PGBUF_LATCH_WRITE));
26977 
26978  /* Is updating max key length necessary? True if: 1. Parent node already needed an update
26979  * (insert_helper->need_update_max_key_len is set to true). 2. Current node. */
26980  need_update_max_key_len = (insert_helper->need_update_max_key_len
26981  || (is_new_key_possible && insert_helper->key_len_in_page > node_header->max_key_len));
26982 
26983  max_key_len = need_update_max_key_len ? insert_helper->key_len_in_page : node_header->max_key_len;
26984  max_new_data_size =
26985  btree_get_max_new_data_size (thread_p, btid_int, child_page, node_type, max_key_len, insert_helper, false);
26986 
26987  need_split = max_new_data_size > spage_get_free_space_without_saving (thread_p, child_page, NULL);
26988 
26989  /* If split is needed, we first need to make sure current node is latched exclusively. A new entry must be added.
26990  * Promoting latch from read to write may be required if the node is not already latched exclusively. Node is not
26991  * already exclusive if: 1. traversal is done in read mode (insert_helper->nonleaf_latch_mode). 2. node was not part
26992  * of a split and did not need an update of max key length. */
26993  if (need_split && insert_helper->nonleaf_latch_mode == PGBUF_LATCH_READ && !insert_helper->is_crt_node_write_latched)
26994  {
26995  /* Promote mode is always ONLY_READER */
26996  error_code = pgbuf_promote_read_latch (thread_p, crt_page, PGBUF_PROMOTE_ONLY_READER);
26997  if (error_code == ER_PAGE_LATCH_PROMOTE_FAIL)
26998  {
26999  /* Could not promote. Restart insert from root by using write latch directly. */
27000  insert_helper->nonleaf_latch_mode = PGBUF_LATCH_WRITE;
27001  *restart = true;
27002  pgbuf_unfix_and_init (thread_p, child_page);
27003  pgbuf_unfix_and_init (thread_p, *crt_page);
27004  insert_helper->is_crt_node_write_latched = false;
27005  return NO_ERROR;
27006  }
27007  else if (error_code != NO_ERROR)
27008  {
27009  ASSERT_ERROR ();
27010  goto error;
27011  }
27012  else if (*crt_page == NULL)
27013  {
27014  ASSERT_ERROR_AND_SET (error_code);
27015  goto error;
27016  }
27018  insert_helper->is_crt_node_write_latched = true;
27019  }
27020 
27021  /* Do we need to promote child node latch mode? It must currently be read and should be promoted to write. */
27022  if ((need_split || need_update_max_key_len) /* need write latch */
27023  && (insert_helper->nonleaf_latch_mode == PGBUF_LATCH_READ && !insert_helper->need_update_max_key_len
27024  && !is_child_leaf) /* and had read latch */ )
27025  {
27026  error_code = pgbuf_promote_read_latch (thread_p, &child_page, PGBUF_PROMOTE_SHARED_READER);
27027  if (error_code == ER_PAGE_LATCH_PROMOTE_FAIL)
27028  {
27029  /* Could not promote. Restart insert from root by using write latch directly. */
27030  insert_helper->nonleaf_latch_mode = PGBUF_LATCH_WRITE;
27031  *restart = true;
27032  pgbuf_unfix_and_init (thread_p, child_page);
27033  pgbuf_unfix_and_init (thread_p, *crt_page);
27034  insert_helper->is_crt_node_write_latched = false;
27035  return NO_ERROR;
27036  }
27037  else if (error_code != NO_ERROR)
27038  {
27039  ASSERT_ERROR ();
27040  goto error;
27041  }
27042  else if (child_page == NULL)
27043  {
27044  ASSERT_ERROR_AND_SET (error_code);
27045  goto error;
27046  }
27047  }
27048 
27049  if (need_update_max_key_len)
27050  {
27051  save_lsa = *pgbuf_get_lsa (child_page);
27052 
27053  /* Update max key length. */
27054  node_header->max_key_len = insert_helper->key_len_in_page;
27055  error_code = btree_node_header_redo_log (thread_p, &btid_int->sys_btid->vfid, child_page);
27056  if (error_code != NO_ERROR)
27057  {
27058  ASSERT_ERROR ();
27059  goto error;
27060  }
27061  pgbuf_set_dirty (thread_p, child_page, DONT_FREE);
27062 
27063  btree_insert_log (insert_helper, "Update max key length to %d. \n"
27064  "\t" PGBUF_PAGE_MODIFY_MSG ("b-tree node page") "\n"
27065  "\t" BTREE_ID_MSG, node_header->max_key_len, PGBUF_PAGE_MODIFY_ARGS (child_page, &save_lsa),
27066  BTID_AS_ARGS (btid_int->sys_btid));
27067 
27068  /* If this node required to update its max key length, then also the children we meet will require to update
27069  * their max key length. (rule being that parent->max_key_len >= child->max_key_len). */
27070  insert_helper->need_update_max_key_len = true;
27071  insert_helper->is_crt_node_write_latched = true;
27072  }
27073 
27074  if (need_split)
27075  {
27076  log_sysop_start (thread_p);
27077  is_system_op_started = true;
27078 
27079  /* Get a new page */
27080  crt_vpid = pgbuf_get_vpid_ptr (*crt_page);
27081  new_page1 = btree_get_new_page (thread_p, btid_int, &new_page_vpid1, crt_vpid);
27082  if (new_page1 == NULL)
27083  {
27084  ASSERT_ERROR_AND_SET (error_code);
27085  goto error;
27086  }
27087 
27088  save_lsa = *pgbuf_get_lsa (*crt_page);
27089  save_child_lsa = *pgbuf_get_lsa (child_page);
27090 
27091  /* Split the node. */
27092  error_code =
27093  btree_split_node (thread_p, btid_int, *crt_page, child_page, new_page1, crt_vpid, &child_vpid, &new_page_vpid1,
27094  child_slotid, node_type, key, insert_helper, &advance_vpid);
27095  if (error_code != NO_ERROR)
27096  {
27097  ASSERT_ERROR ();
27098  goto error;
27099  }
27100 
27101  btree_insert_log (insert_helper, "Split child node, max key length = %d. \n"
27102  "\t" PGBUF_PAGE_MODIFY_MSG ("parent node page") "\n"
27103  "\t" PGBUF_PAGE_MODIFY_MSG ("split node page") "\n"
27104  "\t" PGBUF_PAGE_STATE_MSG ("new node page") "\n"
27105  "\t" BTREE_ID_MSG, node_header->max_key_len,
27106  PGBUF_PAGE_MODIFY_ARGS (*crt_page, &save_lsa),
27107  PGBUF_PAGE_MODIFY_ARGS (child_page, &save_child_lsa),
27108  PGBUF_PAGE_STATE_ARGS (new_page1), BTID_AS_ARGS (btid_int->sys_btid));
27109 
27110  /* Choose which of the split nodes we need to advance to. */
27111  if (VPID_EQ (&advance_vpid, &child_vpid))
27112  {
27113  /* Go to current child. */
27114 
27115  /* Unfix newly created page. */
27116  pgbuf_unfix_and_init (thread_p, new_page1);
27117 
27118  /* End opened system operation. */
27119  log_sysop_commit (thread_p);
27120  is_system_op_started = false;
27121 
27122  *advance_to_page = child_page;
27123  insert_helper->is_crt_node_write_latched = true;
27124 
27125  return NO_ERROR;
27126  }
27127  else if (VPID_EQ (&advance_vpid, &new_page_vpid1))
27128  {
27129  /* Go to newly allocated node. */
27130 
27131  /* Unfix current child page. */
27132  pgbuf_unfix_and_init (thread_p, child_page);
27133 
27134  /* End opened system operation. */
27135  log_sysop_commit (thread_p);
27136  is_system_op_started = false;
27137 
27138  *advance_to_page = new_page1;
27139  insert_helper->is_crt_node_write_latched = true;
27140 
27141  return NO_ERROR;
27142  }
27143  else
27144  {
27145  /* Impossible. */
27146  assert_release (false);
27147 
27148  pgbuf_unfix_and_init (thread_p, child_page);
27149  pgbuf_unfix_and_init (thread_p, new_page1);
27150  pgbuf_unfix_and_init (thread_p, *crt_page);
27151 
27152  error_code = ER_FAILED;
27153  goto error;
27154  }
27155  }
27156  assert (!need_split);
27157  /* Split was not required. Just advance to current child. */
27158 
27159  if (!need_update_max_key_len && insert_helper->nonleaf_latch_mode == PGBUF_LATCH_READ)
27160  {
27161  /* Node was fixed with read latch. */
27162  insert_helper->is_crt_node_write_latched = false;
27163  }
27164  *advance_to_page = child_page;
27165 
27166  /* Finished successfully. */
27167  return NO_ERROR;
27168 
27169 error:
27170  /* Error. */
27171  if (new_page1 != NULL)
27172  {
27173  pgbuf_unfix_and_init (thread_p, new_page1);
27174  }
27175  if (new_page2 != NULL)
27176  {
27177  pgbuf_unfix_and_init (thread_p, new_page2);
27178  }
27179  if (is_system_op_started)
27180  {
27181  /*
27182  * Abort system operation, before after unfixing newpage1 and newpage2 and before unfixing child page.
27183  * Thus, new_page1 and newpage2 are deallocated during abort, so fix count must be 0.
27184  * Also, we have to be sure that no other transaction modify the child page, in order to correctly restore it.
27185  */
27186  log_sysop_abort (thread_p);
27187  }
27188  if (child_page != NULL)
27189  {
27190  pgbuf_unfix_and_init (thread_p, child_page);
27191  }
27192  assert (*advance_to_page == NULL);
27193  assert (error_code != NO_ERROR);
27194  ASSERT_ERROR ();
27195  return error_code;
27196 }
27197 
27198 /*
27199  * btree_key_insert_new_object () - BTREE_PROCESS_KEY_FUNCTION used for inserting new object in b-tree.
27200  *
27201  * return : Error code.
27202  * thread_p (in) : Thread entry.
27203  * btid_int (in) : B-tree info.
27204  * record (in) : B-tree leaf/overflow record.
27205  * object_ptr (in) : Pointer to object in record data.
27206  * oid (in) : Object OID.
27207  * class_oid (in) : Object class OID.
27208  * mvcc_info (in) : Object MVCC info.
27209  * stop (out) : Set to true if index is unique and visible object is found and if this is not a debug build.
27210  * args (in/out) : BTREE_INSERT_HELPER *.
27211  */
27212 static int
27213 btree_key_insert_new_object (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key, PAGE_PTR * leaf_page,
27214  BTREE_SEARCH_KEY_HELPER * search_key, bool * restart, void *other_args)
27215 {
27216  /* B-tree insert helper used as argument for different btree insert functions. */
27217  BTREE_INSERT_HELPER *insert_helper = (BTREE_INSERT_HELPER *) other_args;
27218  int error_code = NO_ERROR; /* Error code. */
27219  RECDES leaf_record; /* Record descriptor for leaf key record. */
27220  char data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN]; /* Data buffer used to copy record data. */
27221  LEAF_REC leaf_info; /* Leaf record info. */
27222  int offset_after_key; /* Offset in record data where packed key is ended. */
27223  bool dummy_clear_key; /* Dummy field used as argument for btree_read_record. */
27224 
27225  /* Recovery structures. */
27226  char rv_undo_data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
27227  char *rv_undo_data_bufalign = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
27228  int rv_undo_data_capacity = IO_MAX_PAGE_SIZE;
27229 
27230  char rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
27231 
27232  /* Assert expected arguments. */
27233  assert (btid_int != NULL);
27234  assert (key != NULL && !DB_IS_NULL (key) && !btree_multicol_key_is_null (key));
27235  assert (leaf_page != NULL && *leaf_page != NULL && pgbuf_get_latch_mode (*leaf_page) == PGBUF_LATCH_WRITE);
27236  assert (search_key != NULL);
27237  assert (search_key->slotid > 0 && search_key->slotid <= btree_node_number_of_keys (thread_p, *leaf_page) + 1);
27238  assert (restart != NULL);
27239  assert (insert_helper != NULL);
27240  assert (btree_is_insert_object_purpose (insert_helper->purpose));
27241 
27242  /* Do not allow inserting a deleted object. It should never happen Insert new object should insert objects with no
27243  * delete MVCCID. Rollback of object physical removal, cannot reach here with a deleted object. There are three
27244  * types of physical removal: - Delete object (should not have a delete MVCCID). - Rollback insert (should not have a
27245  * delete MVCCID). - Vacuum (deleted object). However, vacuum is not rollbacked. */
27247 
27248  btree_perf_track_traverse_time (thread_p, insert_helper);
27249 
27250  /* Prepare log data */
27251  insert_helper->leaf_addr.offset = search_key->slotid;
27252  insert_helper->leaf_addr.pgptr = *leaf_page;
27253  insert_helper->leaf_addr.vfid = &btid_int->sys_btid->vfid;
27254  /* Based on recovery index it is know if this is MVCC-like operation or not. Particularly important for vacuum. */
27255  /* Undo physical delete will add a compensate record and doesn't require undo recovery data. */
27256  /* Prepare undo data. */
27257  if (insert_helper->purpose == BTREE_OP_INSERT_NEW_OBJECT
27258  || insert_helper->purpose == BTREE_OP_ONLINE_INDEX_TRAN_INSERT
27259  || insert_helper->purpose == BTREE_OP_ONLINE_INDEX_TRAN_INSERT_DF)
27260  {
27261  insert_helper->rcvindex =
27264  insert_helper->rv_keyval_data = rv_undo_data_bufalign;
27265  error_code =
27266  btree_rv_save_keyval_for_undo (btid_int, key, BTREE_INSERT_CLASS_OID (insert_helper),
27267  BTREE_INSERT_OID (insert_helper), BTREE_INSERT_MVCC_INFO (insert_helper),
27268  insert_helper->purpose, rv_undo_data_bufalign, &insert_helper->rv_keyval_data,
27269  &rv_undo_data_capacity, &insert_helper->rv_keyval_data_length);
27270  if (error_code != NO_ERROR)
27271  {
27272  ASSERT_ERROR ();
27273  goto error;
27274  }
27275  }
27276  else /* BTREE_OP_INSERT_UNDO_PHYSICAL_DELETE */
27277  {
27278  insert_helper->rcvindex = RVBT_RECORD_MODIFY_COMPENSATE;
27279  }
27280  insert_helper->rv_redo_data = PTR_ALIGN (rv_redo_data_buffer, BTREE_MAX_ALIGN);
27281  insert_helper->rv_redo_data_ptr = insert_helper->rv_redo_data;
27282 
27283  /* Does key already exist? */
27284  if (search_key->result != BTREE_KEY_FOUND)
27285  {
27286  /* Key doesn't exist. Insert new key. */
27287  error_code = btree_key_insert_new_key (thread_p, btid_int, key, *leaf_page, insert_helper, search_key);
27288  if (error_code != NO_ERROR)
27289  {
27290  ASSERT_ERROR ();
27291  goto error;
27292  }
27293  if (insert_helper->rv_keyval_data != NULL && insert_helper->rv_keyval_data != rv_undo_data_bufalign)
27294  {
27295  db_private_free_and_init (thread_p, insert_helper->rv_keyval_data);
27296  }
27297  btree_perf_track_time (thread_p, insert_helper);
27298  return NO_ERROR;
27299  }
27300  /* Key was found. Append new object to existing key. */
27301 
27302  /* Initialize leaf record */
27303  leaf_record.type = REC_HOME;
27304  leaf_record.area_size = DB_PAGESIZE;
27305  leaf_record.data = PTR_ALIGN (data_buffer, BTREE_MAX_ALIGN);
27306 
27307  if (BTREE_IS_UNIQUE (btid_int->unique_pk) && insert_helper->purpose == BTREE_OP_INSERT_NEW_OBJECT)
27308  {
27309  /* Call unique insert function. */
27310  error_code =
27311  btree_key_lock_and_append_object_unique (thread_p, btid_int, key, leaf_page, restart, search_key, insert_helper,
27312  &leaf_record);
27313  if (error_code != NO_ERROR)
27314  {
27315  ASSERT_ERROR ();
27316  goto error;
27317  }
27318  if (*restart == true)
27319  {
27320  goto exit;
27321  }
27322  }
27323  else
27324  {
27325  /* Get record and call non-unique insert function. */
27326  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &leaf_record, COPY) != S_SUCCESS)
27327  {
27328  assert_release (false);
27329  error_code = ER_FAILED;
27330  goto error;
27331  }
27332 #if !defined (NDEBUG)
27333  (void) btree_check_valid_record (thread_p, btid_int, &leaf_record, BTREE_LEAF_NODE, NULL);
27334 #endif /* !NDEBUG */
27335 
27336  error_code =
27337  btree_read_record (thread_p, btid_int, *leaf_page, &leaf_record, NULL, &leaf_info, BTREE_LEAF_NODE,
27338  &dummy_clear_key, &offset_after_key, PEEK_KEY_VALUE, NULL);
27339  if (error_code != NO_ERROR)
27340  {
27341  ASSERT_ERROR ();
27342  goto error;
27343  }
27344 
27345 #if !defined (NDEBUG)
27346  if (oid_is_db_class (BTREE_INSERT_CLASS_OID (insert_helper)))
27347  {
27348  /* Although the indexes on _db_class and _db_attribute are not unique, they cannot have duplicate OID's.
27349  * Check here this consistency (no visible objects should exist). */
27350  btree_key_record_check_no_visible (thread_p, btid_int, *leaf_page, search_key->slotid);
27351  }
27352 #endif /* !NDEBUG */
27353 
27354  error_code =
27355  btree_key_append_object_non_unique (thread_p, btid_int, key, *leaf_page, search_key, &leaf_record,
27356  offset_after_key, &leaf_info, &insert_helper->obj_info, insert_helper);
27357  if (error_code != NO_ERROR)
27358  {
27359  ASSERT_ERROR ();
27360  goto error;
27361  }
27362  }
27363 
27364 #if !defined (NDEBUG)
27365  (void) btree_verify_node (thread_p, btid_int, *leaf_page);
27366 #endif /* !NDEBUG */
27367 
27368 exit:
27369  if (insert_helper->rv_keyval_data != NULL && insert_helper->rv_keyval_data != rv_undo_data_bufalign)
27370  {
27371  db_private_free_and_init (thread_p, insert_helper->rv_keyval_data);
27372  }
27373  insert_helper->rv_keyval_data = NULL;
27374 
27375  btree_perf_track_time (thread_p, insert_helper);
27376  return error_code;
27377 
27378 error:
27379  assert_release (error_code != NO_ERROR);
27380  goto exit;
27381 }
27382 
27383 /*
27384  * btree_key_insert_new_key () - Insert new key in b-tree.
27385  *
27386  * return : Error code.
27387  * thread_p (in) : Thread entry.
27388  * btid_int (in) : B-tree info.
27389  * key (in) : Key value.
27390  * leaf_page (in) : Leaf node page.
27391  * insert_helper (in) : Insert helper.
27392  * search_key (in) : Search key result.
27393  */
27394 static int
27395 btree_key_insert_new_key (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key, PAGE_PTR leaf_page,
27396  BTREE_INSERT_HELPER * insert_helper, BTREE_SEARCH_KEY_HELPER * search_key)
27397 {
27398  int error_code = NO_ERROR;
27399  int key_len;
27400  int key_type = BTREE_NORMAL_KEY;
27401  int key_cnt;
27402  DB_VALUE local_key;
27403  RECDES record;
27404  char data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
27405  BTREE_NODE_HEADER *node_header;
27406  int rv_redo_data_length;
27407  bool update_max_key_length = false;
27408  DB_VALUE *new_key = key;
27409 
27410  LOG_LSA prev_lsa;
27411 
27412  /* Redo recovery. */
27413  /* One page size buffer can handle one b-tree record entirely and any additional recovery data (e.g. debug info). */
27414  char rv_redo_recovery_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
27415  char *rv_redo_data = PTR_ALIGN (rv_redo_recovery_buffer, BTREE_MAX_ALIGN);
27417  /* Do not use insert_helper redo structures since they may not be able to handle the entire record. */
27418 
27419  /* Insert new key into search_key->slotid. */
27420  insert_helper->is_unique_key_added_or_deleted = true;
27421 
27422  /* Assert expected arguments. */
27423  assert (btid_int != NULL);
27424  assert (key != NULL && !DB_IS_NULL (key) && !btree_multicol_key_is_null (key));
27425  assert (leaf_page != NULL && btree_get_node_level (thread_p, leaf_page) == 1
27426  && pgbuf_get_latch_mode (leaf_page) == PGBUF_LATCH_WRITE);
27427  assert (insert_helper != NULL);
27428  assert (search_key != NULL && search_key->result != BTREE_KEY_FOUND);
27429  assert (search_key->slotid > 0 && search_key->slotid <= btree_node_number_of_keys (thread_p, leaf_page) + 1);
27430  assert (insert_helper->rv_redo_data != NULL && insert_helper->rv_redo_data_ptr != NULL);
27431  assert (insert_helper->is_system_op_started == false);
27432 #if defined (SERVER_MODE)
27433  assert ((btree_is_online_index_loading (insert_helper->purpose)) || !BTREE_IS_UNIQUE (btid_int->unique_pk)
27434  || log_is_in_crash_recovery () || btree_check_locking_for_insert_unique (thread_p, insert_helper));
27435 #endif /* SERVER_MODE */
27436 
27437  /* Insert new key. */
27438  db_make_null (&local_key);
27439 
27440  key_len = btree_get_disk_size_of_key (key);
27441  if (key_len >= BTREE_MAX_KEYLEN_INPAGE)
27442  {
27443  key_type = BTREE_OVERFLOW_KEY;
27444 
27445  log_sysop_start (thread_p);
27446  insert_helper->is_system_op_started = true;
27447  }
27448  else
27449  {
27450  int diff_column;
27451 
27452  diff_column = btree_node_common_prefix (thread_p, btid_int, leaf_page);
27453  if (diff_column > 0)
27454  {
27455  /* Remove columns from key value. */
27456  /* Use local_key as buffer. */
27457  new_key = &local_key;
27458  pr_clone_value (key, new_key);
27459  pr_midxkey_remove_prefix (new_key, diff_column);
27460  }
27461  else if (diff_column < 0)
27462  {
27463  ASSERT_ERROR ();
27464  return diff_column;
27465  }
27466  }
27467  record.type = REC_HOME;
27468  record.data = PTR_ALIGN (data_buffer, MAX_ALIGNMENT);
27469  record.area_size = DB_PAGESIZE;
27470  error_code =
27471  btree_write_record (thread_p, btid_int, NULL, new_key, BTREE_LEAF_NODE, key_type, key_len, false,
27472  BTREE_INSERT_CLASS_OID (insert_helper), BTREE_INSERT_OID (insert_helper),
27473  BTREE_INSERT_MVCC_INFO (insert_helper), &record);
27474  if (new_key == &local_key)
27475  {
27476  pr_clear_value (&local_key);
27477  }
27478  if (error_code != NO_ERROR)
27479  {
27480  ASSERT_ERROR ();
27481  goto error;
27482  }
27483 
27484 #if !defined (NDEBUG)
27485  (void) btree_check_valid_record (thread_p, btid_int, &record, BTREE_LEAF_NODE, NULL);
27486 #endif
27487 
27488  /* Node header will be updated. */
27489  node_header = btree_get_node_header (thread_p, leaf_page);
27490  if (node_header == NULL)
27491  {
27492  assert_release (false);
27493  error_code = ER_FAILED;
27494  goto error;
27495  }
27496 
27498 
27499  /* Nothing should fail after spage_insert_at! */
27500  if (spage_insert_at (thread_p, leaf_page, search_key->slotid, &record) != SP_SUCCESS)
27501  {
27502  assert_release (false);
27503  error_code = ER_FAILED;
27504  goto error;
27505  }
27506 
27508 
27509  key_cnt = btree_node_number_of_keys (thread_p, leaf_page);
27510  key_len = BTREE_GET_KEY_LEN_IN_PAGE (key_len);
27511  /* Do not write log for updating header. Redo recovery function of insert key will know to update it. */
27512  if (key_len > node_header->max_key_len)
27513  {
27514  update_max_key_length = true;
27515  node_header->max_key_len = key_len;
27516  }
27517 
27518 
27519  assert (node_header->split_info.pivot >= 0 && key_cnt > 0);
27520  btree_split_next_pivot (&node_header->split_info, (float) search_key->slotid / key_cnt, key_cnt);
27521 
27522  /* Log */
27523  assert (insert_helper->leaf_addr.offset == search_key->slotid);
27524 
27525  /* Redo logging. */
27527 #if !defined (NDEBUG)
27528  /* For debugging info. */
27529  BTREE_RV_REDO_SET_DEBUG_INFO (&insert_helper->leaf_addr, rv_redo_data_ptr, btid_int, BTREE_RV_DEBUG_ID_INS_KEY);
27530 #endif /* !NDEBUG */
27531  if (update_max_key_length)
27532  {
27533  /* Put key length. */
27534  rv_redo_data_ptr = or_pack_int (rv_redo_data_ptr, key_len);
27535  BTREE_RV_SET_UPDATE_MAX_KEY_LEN (&insert_helper->leaf_addr);
27536  }
27537  /* Put record data. */
27538  memcpy (rv_redo_data_ptr, record.data, record.length);
27539  rv_redo_data_ptr += record.length;
27540 
27541  /* We need to log previous lsa. */
27542  LSA_COPY (&prev_lsa, pgbuf_get_lsa (leaf_page));
27543 
27544  /* Add logging. */
27545  rv_redo_data_length = CAST_BUFLEN (rv_redo_data_ptr - rv_redo_data);
27546  assert (rv_redo_data_length < DB_PAGESIZE);
27547 
27548  btree_rv_log_insert_object (thread_p, *insert_helper, insert_helper->leaf_addr, 0, rv_redo_data_length, NULL,
27549  rv_redo_data);
27550 
27551  if (insert_helper->is_system_op_started)
27552  {
27553  // also end sysop
27554  btree_insert_sysop_end (thread_p, insert_helper);
27555  }
27556 
27557  if (insert_helper->log_operations)
27558  {
27559  if (key_type == BTREE_OVERFLOW_KEY)
27560  {
27561  OR_BUF buf_vpid_key;
27562  VPID vpid_key = VPID_INITIALIZER;
27563  int rc = NO_ERROR;
27564 
27565  OR_BUF_INIT (buf_vpid_key, record.data + record.length - DISK_VPID_ALIGNED_SIZE, DISK_VPID_ALIGNED_SIZE);
27566  vpid_key.pageid = or_get_int (&buf_vpid_key, &rc);
27567  vpid_key.volid = or_get_short (&buf_vpid_key, &rc);
27568 
27569  assert (!VPID_ISNULL (&vpid_key));
27570  btree_insert_log (insert_helper, BTREE_INSERT_MODIFY_MSG ("New overflow key %d|%d"),
27571  VPID_AS_ARGS (&vpid_key),
27572  BTREE_INSERT_MODIFY_ARGS (thread_p, insert_helper, leaf_page, &prev_lsa, true,
27573  search_key->slotid, record.length, btid_int->sys_btid));
27574  }
27575  else
27576  {
27577  btree_insert_log (insert_helper, BTREE_INSERT_MODIFY_MSG ("New key"),
27578  BTREE_INSERT_MODIFY_ARGS (thread_p, insert_helper, leaf_page, &prev_lsa, true,
27579  search_key->slotid, record.length, btid_int->sys_btid));
27580  }
27581  }
27582 
27584 
27585  pgbuf_set_dirty (thread_p, leaf_page, DONT_FREE);
27586 
27587 #if !defined(NDEBUG)
27589  {
27590  VPID *vpid = pgbuf_get_vpid_ptr (leaf_page);
27591  fprintf (stdout, "btree insert at (%d:%d:%d) with key:", vpid->volid, vpid->pageid, search_key->slotid);
27592  db_value_print (key);
27593  fprintf (stdout, "\n");
27594  }
27595 #endif
27596 
27597 #if !defined(NDEBUG)
27598  (void) btree_verify_node (thread_p, btid_int, leaf_page);
27599 #endif
27600 
27601  return NO_ERROR;
27602 
27603 error:
27604  assert (error_code != NO_ERROR);
27605 
27606  if (insert_helper->is_system_op_started)
27607  {
27608  log_sysop_abort (thread_p);
27609  insert_helper->is_system_op_started = false;
27610  }
27611  return error_code;
27612 }
27613 
27614 /*
27615  * btree_key_insert_does_leaf_need_split () - Check if there is not enough space in leaf node to handle new object.
27616  *
27617  * return : True if there is not enough space in page.
27618  * thread_p (in) : Thread entry.
27619  * btid_int (in) : B-tree info.
27620  * leaf_page (in) : Leaf page.
27621  * insert_helper (in) : Insert helper.
27622  * search_key (in) : Search key result.
27623  */
27624 static bool
27626  BTREE_INSERT_HELPER * insert_helper, BTREE_SEARCH_KEY_HELPER * search_key)
27627 {
27628  int max_new_data_size = 0;
27629 
27630  /* Assert expected arguments. */
27631  assert (btid_int != NULL);
27632  assert (leaf_page != NULL && btree_get_node_level (thread_p, leaf_page) == 1);
27633  assert (insert_helper != NULL);
27634  assert (search_key != NULL);
27635 
27636  if (search_key->result == BTREE_KEY_FOUND)
27637  {
27638  /* Does a new object fit the page? */
27639  return (BTREE_OBJECT_MAX_SIZE > spage_get_free_space_without_saving (thread_p, leaf_page, NULL));
27640  }
27641  else
27642  {
27643  /* Does a new key fit the page? */
27644  max_new_data_size = BTREE_NEW_ENTRY_MAX_SIZE (insert_helper->key_len_in_page, BTREE_LEAF_NODE);
27645  return (max_new_data_size > spage_max_space_for_new_record (thread_p, leaf_page));
27646  }
27647 }
27648 
27649 /*
27650  * btree_key_lock_and_append_object_unique () - Append new object into an existing unique index key.
27651  * New objects are always inserted at the beginning of
27652  * the key (as long as unique constraint is not violated).
27653  *
27654  * return : Error code.
27655  * thread_p (in) : Thread entry.
27656  * btid_int (in) : B-tree info.
27657  * key (in) : Inserted key.
27658  * leaf (in/out) : Pointer to leaf page (can be re-fixed).
27659  * restart (out) : Outputs true when restarting from b-tree root is required.
27660  * search_key (in/out) : Search key result.
27661  * insert_helper (in) : Insert operation helper structure.
27662  * leaf_record (in) : Preallocated record descriptor used to read b-tree record.
27663  */
27664 static int
27666  bool * restart, BTREE_SEARCH_KEY_HELPER * search_key,
27667  BTREE_INSERT_HELPER * insert_helper, RECDES * leaf_record)
27668 {
27669  int error_code = NO_ERROR; /* Error code. */
27670  BTREE_OBJECT_INFO first_object; /* Current first object in record. It will be replaced. */
27671  LEAF_REC leaf_info; /* Leaf record info. */
27672  int offset_after_key; /* Offset in record where packed key ends. */
27673  bool dummy_clear_key; /* Dummy */
27674  /* Used by btree_key_find_and_lock_unique. */
27676  MVCC_SNAPSHOT mvcc_snapshot_dirty; /* Dirty snapshot used to count visible objects for multi-update. */
27677  bool is_key_record_read = false; /* Set to true when key record is read (btree_read_record function was called). */
27678  int max_visible_oids = 1; /* Used to check visible OID's for REPEATABLE READ isolation. */
27679  MVCC_SNAPSHOT *mvcc_snapshot = NULL; /* Used to check visibility for REPEATABLE READ isolation. */
27680  int num_visible = 0; /* Used to count number of visible objects. */
27681 
27682 #if defined (SERVER_MODE)
27683  LOG_LSA saved_leaf_lsa; /* Save page LSA before locking the first object in record. If LSA is changed, it is no
27684  * longer guaranteed that page has enough space for new key/object. We need to be
27685  * conservative and check it. */
27686 #endif /* SERVER_MODE */
27687 
27688  /* Assert expected arguments. */
27689  assert (btid_int != NULL && BTREE_IS_UNIQUE (btid_int->unique_pk));
27690  assert (key != NULL);
27691  assert (leaf != NULL);
27692  assert (restart != NULL);
27693  assert (search_key != NULL && search_key->result == BTREE_KEY_FOUND);
27694  assert (insert_helper->rv_redo_data != NULL && insert_helper->rv_redo_data_ptr != NULL);
27695  assert (insert_helper->purpose == BTREE_OP_INSERT_NEW_OBJECT);
27696 #if defined (SERVER_MODE)
27697  assert (log_is_in_crash_recovery () || btree_check_locking_for_insert_unique (thread_p, insert_helper));
27698 #endif /* SERVER_MODE */
27699 
27700  /* Insert object in the beginning of leaf record if unique constraint is not violated. Step 1: Protect key by
27701  * locking its first object. Every transaction that insert or delete objects will follow this rule. So, the inserter
27702  * must first lock the first object, check its status and decide if insert is possible or if it would violation
27703  * unique constraint. After locking the first object, if it is deleted and committed, then unique constraint would
27704  * not be violated. Otherwise, if the object is not deleted (and nobody else is trying to delete it), inserting new
27705  * object is no longer accepted. Use btree_key_find_and_lock_unique_of_unique to find and lock the first object.
27706  * Step 2: Once the first object is locked and it is established that unique constraint is preserved, adding object
27707  * to key record can be done. Because the first object is always considered last key version, inserting new object
27708  * must be done in the beginning of leaf record. So, the current first object must be relocated (it simulates a
27709  * regular insert and follows the regular insert rules). Then, the first object is replaced with new object. */
27710 
27711 #if defined (SERVER_MODE)
27712  /* Transfer locked object from insert helper to find unique helper. */
27713  COPY_OID (&find_unique_helper.locked_oid, &insert_helper->saved_locked_oid);
27714  COPY_OID (&find_unique_helper.locked_class_oid, &insert_helper->saved_locked_class_oid);
27715 
27716  LSA_COPY (&saved_leaf_lsa, pgbuf_get_lsa (*leaf));
27717 #endif /* SERVER_MODE */
27718 
27719  find_unique_helper.lock_mode = X_LOCK;
27720  error_code =
27721  btree_key_find_and_lock_unique_of_unique (thread_p, btid_int, key, leaf, search_key, restart, &find_unique_helper);
27722 #if defined (SERVER_MODE)
27723  /* Transfer locked object from find unique helper to insert helper. */
27724  COPY_OID (&insert_helper->saved_locked_oid, &find_unique_helper.locked_oid);
27725  COPY_OID (&insert_helper->saved_locked_class_oid, &find_unique_helper.locked_class_oid);
27726 #endif /* SERVER_MODE */
27727 
27728  if (error_code != NO_ERROR || *restart)
27729  {
27730  /* Error occurred or failed to lock key object and keep a relevant leaf node. Return now. */
27731  return error_code;
27732  }
27733  /* No error, object was locked and key leaf node is held. */
27734  assert (*leaf != NULL);
27735 
27736  /* Slot ID and page pointer may have changed. Update insert_helper->leaf_addr. */
27737  insert_helper->leaf_addr.offset = search_key->slotid;
27738  insert_helper->leaf_addr.pgptr = *leaf;
27739 
27740 #if defined (SERVER_MODE)
27741  if (!LSA_EQ (&saved_leaf_lsa, pgbuf_get_lsa (*leaf)))
27742  {
27743  /* Does leaf need split? */
27744  if (btree_key_insert_does_leaf_need_split (thread_p, btid_int, *leaf, insert_helper, search_key))
27745  {
27746  /* We need to go back from top and split leaf. */
27747  *restart = true;
27748  return NO_ERROR;
27749  }
27750  }
27751 #endif /* SERVER_MODE */
27752 
27753  if (search_key->result != BTREE_KEY_FOUND)
27754  {
27755  /* Key was deleted or vacuumed. */
27756  /* Insert key directly. */
27757  return btree_key_insert_new_key (thread_p, btid_int, key, *leaf, insert_helper, search_key);
27758  }
27759 
27760  if (!find_unique_helper.found_object && logtb_find_current_isolation (thread_p) >= TRAN_REPEATABLE_READ)
27761  {
27762  /* This could be an object deleted but still visible to me. */
27763  /* The problem is reading the object next time. Old object, even if it was deleted, on the next read will be
27764  * visible to me. This object is inserted by me, so it is also visible to me. This means I will see two different
27765  * objects that have this key, which is obviously a violation of unique constraint. READ COMMITTED doesn't have
27766  * this problem, since on the next statement it will refresh its snapshot and old object will no longer be
27767  * visible (even if it was with this snapshot). */
27768  /* TODO: Is this required in STAND ALONE? */
27769 
27770  /* Get current key record. */
27771  if (spage_get_record (thread_p, *leaf, search_key->slotid, leaf_record, COPY) != S_SUCCESS)
27772  {
27773  assert_release (false);
27774  return ER_FAILED;
27775  }
27776 
27777 #if !defined (NDEBUG)
27778  (void) btree_check_valid_record (thread_p, btid_int, leaf_record, BTREE_LEAF_NODE, key);
27779 #endif
27780 
27781  /* Read record. */
27782  error_code =
27783  btree_read_record (thread_p, btid_int, *leaf, leaf_record, NULL, &leaf_info, BTREE_LEAF_NODE, &dummy_clear_key,
27784  &offset_after_key, PEEK_KEY_VALUE, NULL);
27785  if (error_code != NO_ERROR)
27786  {
27787  ASSERT_ERROR ();
27788  return error_code;
27789  }
27790  /* Don't repeat key record read. */
27791  is_key_record_read = true;
27792 
27793  /* Count visible objects considering transaction snapshot. */
27794  mvcc_snapshot = logtb_get_mvcc_snapshot (thread_p);
27795  error_code =
27796  btree_get_num_visible_from_leaf_and_ovf (thread_p, btid_int, leaf_record, offset_after_key, &leaf_info,
27797  &max_visible_oids, mvcc_snapshot, &num_visible);
27798  if (error_code != NO_ERROR)
27799  {
27800  ASSERT_ERROR ();
27801  return error_code;
27802  }
27803  else if (num_visible > 0)
27804  {
27805  /* Unique constraint violation. */
27807  {
27808  char *keyval = pr_valstring (key);
27810  (keyval == NULL) ? "(null)" : keyval);
27811  if (keyval != NULL)
27812  {
27813  db_private_free (thread_p, keyval);
27814  }
27816  }
27817  else
27818  {
27819  /* Object already exists. Unique constraint violation. */
27820  BTREE_SET_UNIQUE_VIOLATION_ERROR (thread_p, key, BTREE_INSERT_OID (insert_helper),
27821  BTREE_INSERT_CLASS_OID (insert_helper), btid_int->sys_btid, NULL);
27822  return ER_BTREE_UNIQUE_FAILED;
27823  }
27824  }
27825  }
27826  /* No visible objects or isolation not >= RR */
27827 
27828  /* Unique constraint may be violated if there is already a visible object and if this is not recovery
27829  * (BTREE_OP_INSERT_UNDO_PHYSICAL_DELETE). Unique constraint is violated in this case, except if op_type is
27830  * MULTI-ROW-UPDATE. In this case, a duplicate key can be allowed, as long as it will be deleted until the end of
27831  * query execution (this is checked with unique_stats_info). Even in this case, never allow more than two visible
27832  * objects. If HA is enabled, never allow more than one visible object. */
27833  if (find_unique_helper.found_object && insert_helper->purpose == BTREE_OP_INSERT_NEW_OBJECT)
27834  {
27835  mvcc_snapshot_dirty.snapshot_fnc = mvcc_satisfies_dirty;
27836 
27837  if (insert_helper->is_unique_multi_update)
27838  {
27839  /* This should be MULTI-ROW-UPDATE. Get key record and count visible objects. */
27840 
27841  /* Get current key record. */
27842  if (spage_get_record (thread_p, *leaf, search_key->slotid, leaf_record, COPY) != S_SUCCESS)
27843  {
27844  assert_release (false);
27845  return ER_FAILED;
27846  }
27847 
27848 #if !defined (NDEBUG)
27849  (void) btree_check_valid_record (thread_p, btid_int, leaf_record, BTREE_LEAF_NODE, key);
27850 #endif
27851 
27852  /* Read record. */
27853  error_code =
27854  btree_read_record (thread_p, btid_int, *leaf, leaf_record, NULL, &leaf_info, BTREE_LEAF_NODE,
27855  &dummy_clear_key, &offset_after_key, PEEK_KEY_VALUE, NULL);
27856  if (error_code != NO_ERROR)
27857  {
27858  ASSERT_ERROR ();
27859  return error_code;
27860  }
27861  /* Don't repeat key record read. */
27862  is_key_record_read = true;
27863 
27864  /* Count visible (not dirty) objects. */
27865  error_code =
27866  btree_get_num_visible_from_leaf_and_ovf (thread_p, btid_int, leaf_record, offset_after_key, &leaf_info,
27867  NULL, &mvcc_snapshot_dirty, &num_visible);
27868  if (error_code != NO_ERROR)
27869  {
27870  ASSERT_ERROR ();
27871  return error_code;
27872  }
27873  }
27874 
27875  /* Should we consider this a unique constraint violation? */
27876  if (!insert_helper->is_unique_multi_update || num_visible > 1)
27877  {
27878  /* Not multi-update operation or there would be more than two objects visible. Unique constraint violation. */
27880  {
27881  char *keyval = pr_valstring (key);
27883  (keyval == NULL) ? "(null)" : keyval);
27884  if (keyval != NULL)
27885  {
27886  db_private_free (thread_p, keyval);
27887  }
27889  }
27890  else
27891  {
27892  /* Object already exists. Unique constraint violation. */
27893  BTREE_SET_UNIQUE_VIOLATION_ERROR (thread_p, key, BTREE_INSERT_OID (insert_helper),
27894  BTREE_INSERT_CLASS_OID (insert_helper), btid_int->sys_btid, NULL);
27895  return ER_BTREE_UNIQUE_FAILED;
27896  }
27897  }
27898  else if (insert_helper->is_ha_enabled)
27899  {
27900  /* When HA is enabled, unique constraint can never be violated. */
27902  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, error_code, 0);
27903  return error_code;
27904  }
27905 
27906  /* Cannot consider this a new key. */
27907  insert_helper->is_unique_key_added_or_deleted = false;
27908 #if defined (SERVER_MODE)
27909  /* We don't want to unlock the object we will relocate. Others cannot delete it until we are finished. */
27910  OID_SET_NULL (&insert_helper->saved_locked_oid);
27911 #endif
27912  }
27913  else
27914  {
27915  /* All existing objects are deleted. Proceed */
27916  insert_helper->is_unique_key_added_or_deleted = true;
27917  }
27918  /* Unique constraint not violated (yet). */
27919  /* New object can be inserted. */
27920 
27921  /* Slot ID points to key in page. */
27922  assert (search_key->slotid > 0 && (search_key->slotid <= btree_node_number_of_keys (thread_p, *leaf) + 1));
27923 
27924  if (!is_key_record_read)
27925  {
27926  /* Get current key record. */
27927  if (spage_get_record (thread_p, *leaf, search_key->slotid, leaf_record, COPY) != S_SUCCESS)
27928  {
27929  assert_release (false);
27930  return ER_FAILED;
27931  }
27932 
27933 #if !defined (NDEBUG)
27934  (void) btree_check_valid_record (thread_p, btid_int, leaf_record, BTREE_LEAF_NODE, key);
27935 #endif
27936 
27937  /* Read record. */
27938  error_code = btree_read_record (thread_p, btid_int, *leaf, leaf_record, NULL, &leaf_info, BTREE_LEAF_NODE,
27939  &dummy_clear_key, &offset_after_key, PEEK_KEY_VALUE, NULL);
27940  if (error_code != NO_ERROR)
27941  {
27942  ASSERT_ERROR ();
27943  return error_code;
27944  }
27945  }
27946 
27947  /* Get current first object and its info */
27948  error_code =
27949  btree_leaf_get_first_object (btid_int, leaf_record, &first_object.oid, &first_object.class_oid,
27950  &first_object.mvcc_info);
27951  if (error_code != NO_ERROR)
27952  {
27953  ASSERT_ERROR ();
27954  return error_code;
27955  }
27956 
27957  /* Do we need to bring back the existing first object on undo? We normally should. We don't have to do it if the
27958  * first record is deleted and committed. Opposite to that is either object is not deleted (and this is
27959  * multi-update) or the object is deleted by current transaction. If it happens to be one of these cases, then
27960  * we have to bring the first object back on undo to ensure unique constraint.
27961  */
27962  if ((!insert_helper->is_unique_key_added_or_deleted
27963  || (BTREE_MVCC_INFO_DELID (&first_object.mvcc_info)
27964  == BTREE_MVCC_INFO_INSID (BTREE_INSERT_MVCC_INFO (insert_helper))))
27965  && insert_helper->rcvindex == RVBT_MVCC_INSERT_OBJECT)
27966  {
27967  /* We need to log two objects: the one that is being inserted and the one that was first before. Undo will return
27968  * the visible object to its place. */
27969  char *rv_keyval_data_buf = NULL;
27970  int rv_keyval_data_capacity = IO_MAX_PAGE_SIZE;
27971 
27972  insert_helper->rcvindex = RVBT_MVCC_INSERT_OBJECT_UNQ;
27973  if (insert_helper->rv_keyval_data_length <= IO_MAX_PAGE_SIZE)
27974  {
27975  /* Undo data uses preallocated data buffer. */
27976  rv_keyval_data_buf = insert_helper->rv_keyval_data;
27977  }
27978  else
27979  {
27980  /* Undo data was reallocated and its capacity matches its size. */
27981  rv_keyval_data_capacity = insert_helper->rv_keyval_data_length;
27982  }
27983  error_code =
27984  btree_rv_save_keyval_for_undo_two_objects (btid_int, key, &insert_helper->obj_info, &first_object,
27985  insert_helper->purpose, rv_keyval_data_buf,
27986  &insert_helper->rv_keyval_data, &rv_keyval_data_capacity,
27987  &insert_helper->rv_keyval_data_length);
27988  if (error_code != NO_ERROR)
27989  {
27990  ASSERT_ERROR ();
27991  return error_code;
27992  }
27993  }
27994 
27995  /* Now we are ready to insert new version. */
27996  error_code =
27997  btree_key_append_object_unique (thread_p, btid_int, key, *leaf, search_key, leaf_record, &leaf_info,
27998  offset_after_key, insert_helper, &first_object);
27999 
28000  /* Success. */
28001  return error_code;
28002 }
28003 
28004 /*
28005  * btree_key_append_object_non_unique () - Append a new object in an existing b-tree key.
28006  *
28007  * return : Error code.
28008  * thread_p (in) : Thread entry.
28009  * btid_int (in) : B-tree info.
28010  * key (in) : Key value.
28011  * leaf (in) : Leaf node.
28012  * search_key (in) : Search key result.
28013  * leaf_record (in) : Key's leaf record.
28014  * offset_after_key (in) : Offset to where packed key is ended in leaf record data.
28015  * leaf_info (in) : Leaf record info.
28016  * btree_obj (in) : B-tree object info.
28017  * insert_helper (in) : B-tree insert helper.
28018  */
28019 static int
28021  BTREE_SEARCH_KEY_HELPER * search_key, RECDES * leaf_record, int offset_after_key,
28022  LEAF_REC * leaf_info, BTREE_OBJECT_INFO * btree_obj,
28023  BTREE_INSERT_HELPER * insert_helper)
28024 {
28025  int n_objects; /* Current number of leaf objects. If maximum size is reached, next object is inserted
28026  * in overflows. */
28027  int error_code = NO_ERROR; /* Error code. */
28028  int n_objects_limit = 0;
28029  int rv_redo_data_length = 0;
28030 
28031  LOG_LSA prev_lsa;
28032 
28033  /* Assert expected arguments. */
28034  assert (btid_int != NULL);
28035  assert (key != NULL);
28036  assert (leaf != NULL);
28037  assert (search_key != NULL && search_key->slotid > 0
28038  && search_key->slotid <= btree_node_number_of_keys (thread_p, leaf));
28039  assert (leaf_record != NULL);
28040  assert (leaf_info != NULL);
28041  assert (btree_obj != NULL);
28042  assert (btree_is_insert_object_purpose (insert_helper->purpose));
28043  assert (insert_helper->rv_redo_data != NULL && insert_helper->rv_redo_data_ptr != NULL);
28044 
28045  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
28046  {
28047  /* Append OID means this is not first and must be fixed size. */
28049  }
28050 
28051  /* Append new object to list of OID's for this key. First it should try to add the object to leaf record. Due to
28052  * split algorithm the page should always have enough space to add it. However, there is a limit of objects that can
28053  * be inserted in a leaf record. If that limit is reached, then the new object will be inserted in an overflow OID's
28054  * page. If there is none or if all existing pages are full, a new page is created and appended to current OID's
28055  * pages. */
28056 
28057  /* The OID list size on leaf page is limited to threshold number. */
28058  n_objects = btree_record_get_num_oids (thread_p, btid_int, leaf_record, offset_after_key, BTREE_LEAF_NODE);
28059  n_objects_limit = BTREE_MAX_OIDCOUNT_IN_LEAF_RECORD (btid_int);
28060  /* Is inserting another object possible? */
28061  if (n_objects < n_objects_limit)
28062  {
28063  /* Insert in leaf record is possible. */
28064 
28065 #if !defined (NDEBUG)
28066  BTREE_RV_REDO_SET_DEBUG_INFO (&insert_helper->leaf_addr, insert_helper->rv_redo_data_ptr, btid_int,
28068 #endif /* !NDEBUG */
28070 
28071  /* Append object at the end of leaf record. */
28072  btree_record_append_object (thread_p, btid_int, leaf_record, BTREE_LEAF_NODE, btree_obj, NULL,
28073  &insert_helper->rv_redo_data_ptr);
28074 
28075  /* Update should work. */
28076  if (spage_update (thread_p, leaf, search_key->slotid, leaf_record) != S_SUCCESS)
28077  {
28078  assert_release (false);
28079  return ER_FAILED;
28080  }
28081 
28083 
28084  /* We need to log previous lsa. */
28085  LSA_COPY (&prev_lsa, pgbuf_get_lsa (leaf));
28086 
28087  /* Log changes. */
28088  BTREE_RV_GET_DATA_LENGTH (insert_helper->rv_redo_data_ptr, insert_helper->rv_redo_data, rv_redo_data_length);
28089  btree_rv_log_insert_object (thread_p, *insert_helper, insert_helper->leaf_addr, 0, rv_redo_data_length,
28090  NULL, insert_helper->rv_redo_data);
28091  pgbuf_set_dirty (thread_p, leaf, DONT_FREE);
28092 
28093  btree_insert_log (insert_helper, BTREE_INSERT_MODIFY_MSG ("append object at the end"),
28094  BTREE_INSERT_MODIFY_ARGS (thread_p, insert_helper, leaf, &prev_lsa, true, search_key->slotid,
28095  leaf_record->length, btid_int->sys_btid));
28096 
28097  return NO_ERROR;
28098  }
28099 
28100  /* Insert into overflow. */
28101  /* Overflow OID's have fixed size. */
28103 
28104  error_code =
28105  btree_key_append_object_into_ovf (thread_p, btid_int, key, leaf, search_key, leaf_record, leaf_info, insert_helper,
28106  btree_obj);
28107  if (error_code != NO_ERROR)
28108  {
28109  ASSERT_ERROR ();
28110  return error_code;
28111  }
28112 
28113  /* Success. */
28114  return NO_ERROR;
28115 }
28116 
28117 /*
28118  * btree_key_append_object_unique () - Append new version in unique key.
28119  *
28120  * return : Error code.
28121  * thread_p (in) : Thread entry.
28122  * btid_int (in) : B-tree info.
28123  * key (in) : Key value.
28124  * leaf (in) : Leaf page.
28125  * search_key (in) : Search key result.
28126  * leaf_record (in) : Leaf record.
28127  * leaf_record_info (in) : Leaf record info.
28128  * offset_after_key (in) : Offset in record after packed key value.
28129  * insert_helper (in) : B-tree insert helper.
28130  * first_object (in) : First object read from record.
28131  *
28132  * Prerequisite: read leaf record and its first object.
28133  */
28134 static int
28136  BTREE_SEARCH_KEY_HELPER * search_key, RECDES * leaf_record, LEAF_REC * leaf_record_info,
28137  int offset_after_key, BTREE_INSERT_HELPER * insert_helper,
28138  BTREE_OBJECT_INFO * first_object)
28139 {
28140  int error_code = NO_ERROR;
28141  int rv_redo_data_length = 0;
28142  LOG_LSA prev_lsa;
28143 
28144  assert (btid_int != NULL);
28145  assert (BTREE_IS_UNIQUE (btid_int->unique_pk));
28146  assert (key != NULL);
28147  assert (search_key != NULL && search_key->result == BTREE_KEY_FOUND);
28148  assert (leaf_record != NULL);
28149  assert (leaf_record_info != NULL);
28150  assert (offset_after_key > 0);
28151  assert (btree_is_insert_object_purpose (insert_helper->purpose));
28152  assert (insert_helper->rv_redo_data != NULL);
28153  assert (insert_helper->rv_keyval_data != NULL && insert_helper->rv_keyval_data_length > 0);
28154  assert (insert_helper->leaf_addr.offset != 0 && insert_helper->leaf_addr.pgptr == leaf);
28155  assert (insert_helper->rcvindex == RVBT_MVCC_INSERT_OBJECT
28156  || insert_helper->rcvindex == RVBT_NON_MVCC_INSERT_OBJECT
28157  || insert_helper->rcvindex == RVBT_MVCC_INSERT_OBJECT_UNQ);
28158  assert (first_object != NULL && !OID_ISNULL (&first_object->oid));
28159 
28160  /* First object must be relocated at the end of leaf record. First we need to make sure there is enough room to do
28161  * so. If there isn't, last object in leaf record must be relocated to an overflow page. NOTE: Initially, the first
28162  * object was relocated directly into overflow. However, current logging system is quite limited when it comes to
28163  * using system operations and logical undo together. For that reason, the entire operation was split into two
28164  * sub-operations: 1. Relocate last object in leaf record using a system operation and physical undo/redo logging. 2.
28165  * Relocate first object at the end of leaf record and replace with new object. This is logged using logical undo.
28166  * The log size used is not optimal, but it is the only correct way. */
28167  if (btree_record_get_num_oids (thread_p, btid_int, leaf_record, offset_after_key, BTREE_LEAF_NODE) >=
28169  {
28170  assert (btree_record_get_num_oids (thread_p, btid_int, leaf_record, offset_after_key, BTREE_LEAF_NODE) ==
28172 
28173  /* Relocate last object in leaf record to make room for a new object. */
28174  error_code =
28175  btree_key_relocate_last_into_ovf (thread_p, btid_int, key, leaf, search_key, leaf_record, leaf_record_info,
28176  offset_after_key, insert_helper);
28177  if (error_code != NO_ERROR)
28178  {
28179  ASSERT_ERROR ();
28180  return error_code;
28181  }
28182  }
28183 
28185 
28186  /* Relocate first object at the end of the leaf record and replace it with new object. */
28187  /* Prepare logging. */
28188  insert_helper->leaf_addr.offset = search_key->slotid;
28189  insert_helper->rv_redo_data_ptr = insert_helper->rv_redo_data;
28190 #if !defined (NDEBUG)
28191  BTREE_RV_REDO_SET_DEBUG_INFO (&insert_helper->leaf_addr, insert_helper->rv_redo_data_ptr, btid_int,
28193 #endif /* !NDEBUG */
28195 
28196  /* Relocate first object to the end of leaf record. */
28197  BTREE_MVCC_INFO_SET_FIXED_SIZE (&first_object->mvcc_info);
28198  btree_record_append_object (thread_p, btid_int, leaf_record, BTREE_LEAF_NODE, first_object, NULL,
28199  &insert_helper->rv_redo_data_ptr);
28200  /* Replace first object with new object. */
28201  btree_leaf_change_first_object (thread_p, leaf_record, btid_int, BTREE_INSERT_OID (insert_helper),
28202  BTREE_INSERT_CLASS_OID (insert_helper), BTREE_INSERT_MVCC_INFO (insert_helper), NULL,
28203  NULL, &insert_helper->rv_redo_data_ptr);
28204 
28205  /* Update leaf record in page. */
28206  if (spage_update (thread_p, leaf, search_key->slotid, leaf_record) != SP_SUCCESS)
28207  {
28208  assert_release (false);
28209  return ER_FAILED;
28210  }
28211 
28213 
28214  /* Log changes. */
28215  LSA_COPY (&prev_lsa, pgbuf_get_lsa (leaf));
28216  BTREE_RV_GET_DATA_LENGTH (insert_helper->rv_redo_data_ptr, insert_helper->rv_redo_data, rv_redo_data_length);
28217  log_append_undoredo_data (thread_p, insert_helper->rcvindex, &insert_helper->leaf_addr,
28218  insert_helper->rv_keyval_data_length, rv_redo_data_length, insert_helper->rv_keyval_data,
28219  insert_helper->rv_redo_data);
28220 
28222  pgbuf_set_dirty (thread_p, leaf, DONT_FREE);
28223 
28224  btree_insert_log (insert_helper, BTREE_INSERT_MODIFY_MSG ("replace first object and relocate it at the end")
28225  "\t" BTREE_OBJINFO_MSG ("replaced object"),
28226  BTREE_INSERT_MODIFY_ARGS (thread_p, insert_helper, leaf, &prev_lsa, true, search_key->slotid,
28227  leaf_record->length, btid_int->sys_btid),
28228  BTREE_OBJINFO_AS_ARGS (first_object));
28229 
28231  return NO_ERROR;
28232 }
28233 
28234 /*
28235  * btree_key_relocate_last_into_ovf () - Move last object in leaf record into an overflow page.
28236  *
28237  * return : Error code.
28238  * thread_p (in) : Thread entry.
28239  * btid_int (in) : B-tree info.
28240  * key (in) : Key value.
28241  * leaf (in) : Leaf page.
28242  * search_key (in) : Search key result.
28243  * leaf_record (in) : Leaf record.
28244  * leaf_record_info (in) : Leaf record info.
28245  * offset_after_key (in) : Offset in record after packed key value.
28246  * insert_helper (in) : B-tree insert helper.
28247  */
28248 static int
28250  BTREE_SEARCH_KEY_HELPER * search_key, RECDES * leaf_record,
28251  LEAF_REC * leaf_record_info, int offset_after_key,
28252  BTREE_INSERT_HELPER * insert_helper)
28253 {
28254  int error_code = NO_ERROR;
28255  BTREE_OBJECT_INFO last_object;
28256  int offset_to_last_object = 0;
28257 
28258  char rv_undo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
28259  char *rv_undo_data = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
28260  char *rv_undo_data_ptr = rv_undo_data;
28261  int rv_undo_data_length = 0;
28262  int rv_redo_data_length = 0;
28263 
28264  LOG_LSA prev_lsa = LSA_INITIALIZER;
28265 
28266  assert (btid_int != NULL);
28267  assert (BTREE_IS_UNIQUE (btid_int->unique_pk));
28268  assert (key != NULL);
28269  assert (leaf != NULL);
28270  assert (search_key != NULL && search_key->result == BTREE_KEY_FOUND);
28271  assert (leaf_record != NULL);
28272  assert (leaf_record_info != NULL);
28273  assert (offset_after_key > 0);
28274  assert (insert_helper != NULL);
28275  assert (btree_is_insert_object_purpose (insert_helper->purpose));
28276  assert (insert_helper->leaf_addr.offset != 0 && insert_helper->leaf_addr.pgptr == leaf);
28277 
28278  /* Relocate last object object in leaf record into an overflow page. */
28279 
28280  /* Get last object. */
28281  error_code =
28282  btree_record_get_last_object (thread_p, btid_int, leaf_record, BTREE_LEAF_NODE, offset_after_key, &last_object.oid,
28283  &last_object.class_oid, &last_object.mvcc_info, &offset_to_last_object);
28284  if (error_code != NO_ERROR)
28285  {
28286  assert_release (false);
28287  return ER_FAILED;
28288  }
28289  assert (offset_to_last_object > 0);
28290 
28291  /* We need to change leaf page and at least one overflow page. Start a system operation. */
28292  log_sysop_start (thread_p);
28293  insert_helper->is_system_op_started = true;
28294 
28295  /* Copy last object into an overflow page. */
28296  error_code =
28297  btree_key_append_object_into_ovf (thread_p, btid_int, key, leaf, search_key, leaf_record, leaf_record_info,
28298  insert_helper, &last_object);
28299  if (error_code != NO_ERROR)
28300  {
28301  ASSERT_ERROR ();
28302  goto exit;
28303  }
28305 
28306  if (VPID_ISNULL (&leaf_record_info->ovfl) && btree_leaf_is_flaged (leaf_record, BTREE_LEAF_RECORD_OVERFLOW_OIDS))
28307  {
28308  /* First overflow page was added. */
28309  /* The last object may have been relocated. Read its offset again. */
28310  bool dummy_clear_key = false;
28311  error_code =
28312  btree_read_record (thread_p, btid_int, leaf, leaf_record, NULL, leaf_record_info, BTREE_LEAF_NODE,
28313  &dummy_clear_key, &offset_after_key, PEEK_KEY_VALUE, NULL);
28314  error_code =
28315  btree_record_get_last_object (thread_p, btid_int, leaf_record, BTREE_LEAF_NODE, offset_after_key,
28316  &last_object.oid, &last_object.class_oid, &last_object.mvcc_info,
28317  &offset_to_last_object);
28318  if (error_code != NO_ERROR)
28319  {
28320  assert_release (false);
28321  goto exit;
28322  }
28323  }
28324 
28325  /* Remove last object from leaf record. */
28326  /* Prepare logging. */
28327  insert_helper->leaf_addr.offset = search_key->slotid;
28328  insert_helper->rv_redo_data_ptr = insert_helper->rv_redo_data;
28329 #if !defined (NDEBUG)
28330  BTREE_RV_UNDOREDO_SET_DEBUG_INFO (&insert_helper->leaf_addr, insert_helper->rv_redo_data_ptr, rv_undo_data_ptr,
28332 #endif /* !NDEBUG */
28334  btree_record_remove_last_object (thread_p, btid_int, leaf_record, BTREE_LEAF_NODE, offset_to_last_object,
28335  &rv_undo_data_ptr, &insert_helper->rv_redo_data_ptr);
28336  /* Update leaf record. */
28337  if (spage_update (thread_p, leaf, search_key->slotid, leaf_record) != SP_SUCCESS)
28338  {
28339  assert_release (false);
28340  error_code = ER_FAILED;
28341  goto exit;
28342  }
28344 
28345  /* Log changes. */
28346  LSA_COPY (&prev_lsa, pgbuf_get_lsa (leaf));
28347  BTREE_RV_GET_DATA_LENGTH (rv_undo_data_ptr, rv_undo_data, rv_undo_data_length);
28348  BTREE_RV_GET_DATA_LENGTH (insert_helper->rv_redo_data_ptr, insert_helper->rv_redo_data, rv_redo_data_length);
28349  log_append_undoredo_data (thread_p, RVBT_RECORD_MODIFY_UNDOREDO, &insert_helper->leaf_addr, rv_undo_data_length,
28350  rv_redo_data_length, rv_undo_data, insert_helper->rv_redo_data);
28351 
28353  pgbuf_set_dirty (thread_p, leaf, DONT_FREE);
28354 
28355  btree_insert_log (insert_helper, BTREE_INSERT_MODIFY_MSG ("removed last object from leaf record")
28356  "\t" BTREE_OBJINFO_MSG ("last object"),
28357  BTREE_INSERT_MODIFY_ARGS (thread_p, insert_helper, leaf, &prev_lsa, true, search_key->slotid,
28358  leaf_record->length, btid_int->sys_btid),
28359  BTREE_OBJINFO_AS_ARGS (&last_object));
28360 
28361  /* Safe guard: another object can now be added to leaf record. */
28362  assert (btree_record_get_num_oids (thread_p, btid_int, leaf_record, offset_after_key, BTREE_LEAF_NODE) <
28364 
28365  /* Success. */
28367 
28368 exit:
28369  if (insert_helper->is_system_op_started)
28370  {
28371  if (error_code == NO_ERROR)
28372  {
28373  log_sysop_commit (thread_p);
28374  }
28375  else
28376  {
28377  ASSERT_ERROR ();
28378  log_sysop_abort (thread_p);
28379  }
28380  }
28381 
28383  return error_code;
28384 }
28385 
28386 /*
28387  * btree_key_relocate_last_into_ovf () - Append a new object in overflow OID's pages.
28388  *
28389  * return : Error code.
28390  * thread_p (in) : Thread entry.
28391  * btid_int (in) : B-tree info.
28392  * key (in) : Key value.
28393  * leaf (in) : Leaf page.
28394  * search_key (in) : Search key result.
28395  * leaf_record (in) : Leaf record.
28396  * leaf_record_info (in) : Leaf record info.
28397  * insert_helper (in) : B-tree insert helper.
28398  * append_object (in) : Object to append to overflow.
28399  */
28400 static int
28402  BTREE_SEARCH_KEY_HELPER * search_key, RECDES * leaf_record,
28403  LEAF_REC * leaf_record_info, BTREE_INSERT_HELPER * insert_helper,
28404  BTREE_OBJECT_INFO * append_object)
28405 {
28406  int error_code = NO_ERROR;
28407  PAGE_PTR overflow_page = NULL;
28408 
28409  assert (btid_int != NULL);
28410  assert (key != NULL);
28411  assert (leaf != NULL);
28412  assert (search_key != NULL && search_key->result == BTREE_KEY_FOUND);
28413  assert (leaf_record != NULL);
28414  assert (leaf_record_info != NULL);
28415  assert (insert_helper != NULL);
28416  assert (btree_is_insert_object_purpose (insert_helper->purpose));
28417  assert (append_object != NULL);
28418 
28419  /* Is there enough space in existing overflow pages? */
28420  error_code = btree_find_free_overflow_oids_page (thread_p, btid_int, &leaf_record_info->ovfl, &overflow_page);
28421  if (error_code != NO_ERROR)
28422  {
28423  assert (overflow_page == NULL);
28424  ASSERT_ERROR ();
28425  return error_code;
28426  }
28427  if (overflow_page == NULL)
28428  {
28429  /* Could not find free space for object. Create overflow page. */
28430  OID *notification_class_oid;
28431 
28432  if (!OID_ISNULL (&append_object->class_oid))
28433  {
28434  notification_class_oid = &append_object->class_oid;
28435  }
28436  else
28437  {
28438  notification_class_oid = &btid_int->topclass_oid;
28439  }
28440 
28441  /* Notification */
28442  BTREE_SET_CREATED_OVERFLOW_PAGE_NOTIFICATION (thread_p, key, &append_object->oid, notification_class_oid,
28443  btid_int->sys_btid);
28444  error_code =
28445  btree_key_append_object_as_new_overflow (thread_p, btid_int, leaf, append_object, insert_helper, search_key,
28446  leaf_record, &leaf_record_info->ovfl);
28447  if (error_code != NO_ERROR)
28448  {
28449  ASSERT_ERROR ();
28450  return error_code;
28451  }
28452 
28453  if (insert_helper->insert_list != NULL)
28454  {
28455  insert_helper->insert_list->m_ovf_appends_new_page++;
28456  }
28457  }
28458  else
28459  {
28460  error_code =
28461  btree_key_append_object_to_overflow (thread_p, btid_int, overflow_page, append_object, insert_helper);
28462  pgbuf_unfix_and_init (thread_p, overflow_page);
28463  if (error_code != NO_ERROR)
28464  {
28465  ASSERT_ERROR ();
28466  return error_code;
28467  }
28468  if (insert_helper->insert_list != NULL)
28469  {
28470  insert_helper->insert_list->m_ovf_appends++;
28471  }
28472  }
28473  assert (overflow_page == NULL);
28474 
28475  /* Success. */
28476  return NO_ERROR;
28477 }
28478 
28479 /*
28480  * btree_key_find_and_insert_delete_mvccid () - BTREE_ADVANCE_WITH_KEY_FUNCTION used for MVCC logical delete.
28481  * An object is found and an MVCCID is added to its MVCC info.
28482  *
28483  * return : Error code.
28484  * thread_p (in) : Thread entry.
28485  * btid_int (in) : B-tree identifier.
28486  * key (in) : Key value.
28487  * leaf_page (in) : Pointer to leaf node page.
28488  * search_key (in) : Search key result.
28489  * restart (in) : Not used.
28490  * other_args (in) : BTREE_INSERT_HELPER *
28491  */
28492 static int
28494  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
28495  void *other_args)
28496 {
28497  BTREE_INSERT_HELPER *insert_helper = (BTREE_INSERT_HELPER *) other_args;
28498  int error_code = NO_ERROR;
28499  RECDES record;
28500  char data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
28501  LEAF_REC leaf_info;
28502  int offset_after_key;
28503  int offset_to_found_object;
28505  bool dummy_clear_key;
28506 
28507  PAGE_PTR found_page = NULL;
28508 
28509  int num_visible = 0;
28510  MVCC_SNAPSHOT snapshot_dirty;
28511 
28512  /* Assert expected arguments. */
28513  assert (btid_int != NULL);
28514  assert (key != NULL && !DB_IS_NULL (key) && !btree_multicol_key_is_null (key));
28515  assert (leaf_page != NULL && *leaf_page != NULL);
28516  assert (search_key != NULL);
28517  assert (insert_helper != NULL);
28518  assert (insert_helper->purpose == BTREE_OP_INSERT_MVCC_DELID
28519  || insert_helper->purpose == BTREE_OP_INSERT_MARK_DELETED);
28520 
28521  btree_perf_track_traverse_time (thread_p, insert_helper);
28522 
28523  if (search_key->result != BTREE_KEY_FOUND)
28524  {
28525  /* Impossible. Object and key should exist in b-tree. */
28526  assert (false);
28527  btree_set_unknown_key_error (thread_p, btid_int->sys_btid, key, "btree_key_find_and_insert_delete_mvccid");
28528  return ER_BTREE_UNKNOWN_KEY;
28529  }
28530 
28531  /* Prepare leaf record descriptor to read from b-tree. */
28532  record.type = REC_HOME;
28533  record.area_size = DB_PAGESIZE;
28534  record.data = PTR_ALIGN (data_buffer, BTREE_MAX_ALIGN);
28535 
28536  /* Get & read record. */
28537  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &record, COPY) != S_SUCCESS)
28538  {
28539  /* Unexpected. */
28540  assert_release (false);
28541  return ER_FAILED;
28542  }
28543 #if !defined (NDEBUG)
28544  /* Check valid record before changing it. */
28545  (void) btree_check_valid_record (thread_p, btid_int, &record, BTREE_LEAF_NODE, key);
28546 #endif
28547  error_code =
28548  btree_read_record (thread_p, btid_int, *leaf_page, &record, NULL, &leaf_info, BTREE_LEAF_NODE, &dummy_clear_key,
28549  &offset_after_key, PEEK_KEY_VALUE, NULL);
28550  if (error_code != NO_ERROR)
28551  {
28552  ASSERT_ERROR ();
28553  goto exit;
28554  }
28555 
28556  if (insert_helper->is_unique_multi_update && !insert_helper->is_ha_enabled && BTREE_IS_UNIQUE (btid_int->unique_pk))
28557  {
28558  snapshot_dirty.snapshot_fnc = mvcc_satisfies_dirty;
28559  error_code =
28560  btree_get_num_visible_from_leaf_and_ovf (thread_p, btid_int, &record, offset_after_key, &leaf_info, NULL,
28561  &snapshot_dirty, &num_visible);
28562  if (error_code != NO_ERROR)
28563  {
28564  ASSERT_ERROR ();
28565  goto exit;
28566  }
28567  /* Even though multiple visible objects are allowed, they cannot exceed two visible objects (insert does not
28568  * allow it). Also, there should be at least one object to delete. */
28569  assert (num_visible > 0);
28570  assert (num_visible <= 2);
28571  /* The key is considered deleted only if there is one visible object (which will be deleted). */
28572  insert_helper->is_unique_key_added_or_deleted = (num_visible == 1);
28573  }
28574 
28575  error_code =
28576  btree_find_oid_and_its_page (thread_p, btid_int, BTREE_INSERT_OID (insert_helper), *leaf_page,
28577  insert_helper->purpose, NULL, &record, &leaf_info, offset_after_key, &found_page, NULL,
28578  &offset_to_found_object, &mvcc_info);
28579  if (error_code != NO_ERROR)
28580  {
28581  /* Error. */
28582  ASSERT_ERROR ();
28583  assert (found_page == NULL);
28584 
28585  goto exit;
28586  }
28587  if (offset_to_found_object == NOT_FOUND)
28588  {
28589  assert (found_page == NULL);
28590  assert (false);
28591  btree_set_unknown_key_error (thread_p, btid_int->sys_btid, key, "btree_key_find_and_insert_delete_mvccid");
28592  error_code = ER_BTREE_UNKNOWN_KEY;
28593  goto exit;
28594  }
28595  /* Object was found. */
28596  assert (!BTREE_MVCC_INFO_IS_DELID_VALID (&mvcc_info));
28597  /* Copy insert ID into object info. */
28599  {
28600  BTREE_MVCC_INFO_SET_INSID (BTREE_INSERT_MVCC_INFO (insert_helper), mvcc_info.insert_mvccid);
28601  }
28602 
28603  /* Delete its delete MVCCID. */
28604  error_code =
28605  btree_key_insert_delete_mvccid (thread_p, btid_int, key, *leaf_page, search_key, insert_helper, &record, found_page,
28606  offset_to_found_object);
28607  if (found_page != NULL && found_page != *leaf_page)
28608  {
28609  pgbuf_unfix_and_init (thread_p, found_page);
28610  }
28611  if (error_code != NO_ERROR)
28612  {
28613  ASSERT_ERROR ();
28614  goto exit;
28615  }
28616 
28617 #if !defined (NDEBUG)
28618  if (oid_is_db_class (BTREE_INSERT_CLASS_OID (insert_helper)))
28619  {
28620  /* Although the indexes on _db_class and _db_attribute are not unique, they cannot have duplicate OID's. Check
28621  * here this consistency (no visible objects should exist). */
28622  btree_key_record_check_no_visible (thread_p, btid_int, *leaf_page, search_key->slotid);
28623  }
28624 #endif /* !NDEBUG */
28625 
28626 exit:
28627 
28628  btree_perf_track_time (thread_p, insert_helper);
28629  return error_code;
28630 }
28631 
28632 /*
28633  * btree_key_insert_delete_mvccid () - Insert delete MVCCID for a b-tree object.
28634  *
28635  * return : Error code.
28636  * thread_p (in) : Thread entry.
28637  * btid_int (in) : B-tree info.
28638  * key (in) : Key value.
28639  * leaf_page (in) : Leaf page.
28640  * search_key (in) : Search key result.
28641  * insert_helper (in) : B-tree insert helper.
28642  * leaf_record (in) : Leaf record.
28643  * object_page (in) : Page of object that is being deleted.
28644  * offset_to_found_object (in) : Offset to object that is being deleted.
28645  */
28646 static int
28647 btree_key_insert_delete_mvccid (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key, PAGE_PTR leaf_page,
28648  BTREE_SEARCH_KEY_HELPER * search_key, BTREE_INSERT_HELPER * insert_helper,
28649  RECDES * leaf_record, PAGE_PTR object_page, int offset_to_found_object)
28650 {
28651  int error_code = NO_ERROR;
28652  RECDES overflow_record;
28653  char overflow_record_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
28654 
28655  if (object_page == leaf_page)
28656  {
28657  /* Found in leaf page. */
28658 
28659  /* Unique index can only delete the first object. Exception: If this is multi-row update, it is allowed to have
28660  * more than one visible objects. Second visible object may be deleted later, thus preserving the unique
28661  * constraint. */
28662  assert (!BTREE_IS_UNIQUE (btid_int->unique_pk) || offset_to_found_object == 0
28663  || insert_helper->op_type == MULTI_ROW_UPDATE);
28664 
28665  /* Object was found in leaf page and can be deleted. */
28666  error_code =
28667  btree_insert_mvcc_delid_into_page (thread_p, btid_int, leaf_page, BTREE_LEAF_NODE, key, insert_helper,
28668  search_key->slotid, leaf_record, offset_to_found_object);
28669  if (error_code != NO_ERROR)
28670  {
28671  ASSERT_ERROR ();
28672  return error_code;
28673  }
28674 
28675  /* Success */
28676  return NO_ERROR;
28677  }
28678  /* Found in overflow page. */
28679 
28680  /* Get overflow record. */
28681  overflow_record.data = PTR_ALIGN (overflow_record_buffer, BTREE_MAX_ALIGN);
28682  overflow_record.area_size = DB_PAGESIZE;
28683  if (spage_get_record (thread_p, object_page, 1, &overflow_record, COPY) != S_SUCCESS)
28684  {
28685  assert_release (false);
28686  return ER_FAILED;
28687  }
28688  /* Insert delete MVCCID. */
28689  error_code =
28690  btree_insert_mvcc_delid_into_page (thread_p, btid_int, object_page, BTREE_OVERFLOW_NODE, key, insert_helper, 1,
28691  &overflow_record, offset_to_found_object);
28692  if (error_code != NO_ERROR)
28693  {
28694  ASSERT_ERROR ();
28695  return error_code;
28696  }
28697 
28698  /* Success */
28699  return NO_ERROR;
28700 }
28701 
28702 #if !defined (NDEBUG)
28703 /*
28704  * btree_key_record_check_no_visible () - Check b-tree record has no visible objects. Debug only.
28705  *
28706  * thread_p (in) : Thread entry.
28707  * btid_int (in) : B-tree info.
28708  * leaf_page (in) : Leaf page.
28709  * slotid (in) : Record slot ID.
28710  */
28711 static void
28712 btree_key_record_check_no_visible (THREAD_ENTRY * thread_p, BTID_INT * btid_int, PAGE_PTR leaf_page, PGSLOTID slotid)
28713 {
28714  RECDES record;
28715  LEAF_REC leaf_rec_info;
28716  int offset_after_key;
28717  bool dummy_clear_key;
28718  int num_visible;
28719  MVCC_SNAPSHOT dirty_snapshot;
28720  int error_code = NO_ERROR;
28721 
28722  dirty_snapshot.snapshot_fnc = mvcc_satisfies_dirty;
28723 
28724  if (spage_get_record (thread_p, leaf_page, slotid, &record, PEEK) != S_SUCCESS)
28725  {
28726  assert (false);
28727  return;
28728  }
28729  if (btree_read_record (thread_p, btid_int, leaf_page, &record, NULL, &leaf_rec_info, BTREE_LEAF_NODE,
28730  &dummy_clear_key, &offset_after_key, PEEK_KEY_VALUE, NULL) != NO_ERROR)
28731  {
28732  assert (false);
28733  return;
28734  }
28735 
28736  error_code = btree_get_num_visible_from_leaf_and_ovf (thread_p, btid_int, &record, offset_after_key, &leaf_rec_info,
28737  NULL, &dirty_snapshot, &num_visible);
28738 
28739  assert ((error_code == NO_ERROR && num_visible == 0) || error_code != NO_ERROR);
28740 }
28741 #endif /* !NDEBUG */
28742 
28743 /*
28744  * btree_mvcc_info_from_heap_mvcc_header () - Convert an MVCC record header (used for heap records) into
28745  * a b-tree MVCC info structure (used to store an object in b-tree).
28746  *
28747  * return : Void.
28748  * mvcc_header (in) : Heap record MVCC header.
28749  * mvcc_info (out) : B-tree MVCC info.
28750  */
28751 void
28753 {
28754  /* Assert expected arguments. */
28755  assert (mvcc_header != NULL);
28756  assert (mvcc_info != NULL);
28757 
28758  mvcc_info->flags = 0;
28759  if (MVCC_IS_FLAG_SET (mvcc_header, OR_MVCC_FLAG_VALID_INSID))
28760  {
28761  mvcc_info->flags |= BTREE_OID_HAS_MVCC_INSID;
28762  mvcc_info->insert_mvccid = MVCC_GET_INSID (mvcc_header);
28763  }
28764  else
28765  {
28766  mvcc_info->insert_mvccid = MVCCID_ALL_VISIBLE;
28767  }
28768  if (MVCC_IS_FLAG_SET (mvcc_header, OR_MVCC_FLAG_VALID_DELID))
28769  {
28770  mvcc_info->flags |= BTREE_OID_HAS_MVCC_DELID;
28771  mvcc_info->delete_mvccid = MVCC_GET_DELID (mvcc_header);
28772  }
28773  else
28774  {
28775  mvcc_info->delete_mvccid = MVCCID_NULL;
28776  }
28777 }
28778 
28779 /*
28780  * btree_mvcc_info_to_heap_mvcc_header () - Convert a b-tree MVCC info structure into a heap record MVCC header.
28781  *
28782  * return : Void.
28783  * mvcc_info (in) : B-tree MVCC info.
28784  * mvcc_header (out) : Heap record MVCC header.
28785  */
28786 void
28788 {
28789  /* Assert expected arguments. */
28790  assert (mvcc_header != NULL);
28791  assert (mvcc_info != NULL);
28792 
28793  mvcc_header->mvcc_flag = 0;
28795  {
28796  mvcc_header->mvcc_flag |= OR_MVCC_FLAG_VALID_INSID;
28797  MVCC_SET_INSID (mvcc_header, mvcc_info->insert_mvccid);
28798  }
28799  else
28800  {
28801  MVCC_SET_INSID (mvcc_header, MVCCID_ALL_VISIBLE);
28802  }
28803  if (BTREE_MVCC_INFO_IS_DELID_VALID (mvcc_info))
28804  {
28805  mvcc_header->mvcc_flag |= OR_MVCC_FLAG_VALID_DELID;
28806  MVCC_SET_DELID (mvcc_header, mvcc_info->delete_mvccid);
28807  }
28808  else
28809  {
28810  MVCC_SET_DELID (mvcc_header, MVCCID_NULL);
28811  }
28812 }
28813 
28814 /*
28815  * btree_rv_redo_record_modify () - Redo recovery of b-tree key records.
28816  *
28817  * return : Error code.
28818  * thread_p (in) : Thread entry.
28819  * rcv (in) : Recovery data.
28820  */
28821 int
28823 {
28824  return btree_rv_record_modify_internal (thread_p, rcv, false);
28825 }
28826 
28827 /*
28828  * btree_rv_undo_record_modify () - Undo recovery of b-tree key records.
28829  *
28830  * return : Error code.
28831  * thread_p (in) : Thread entry.
28832  * rcv (in) : Recovery data.
28833  */
28834 int
28836 {
28837  return btree_rv_record_modify_internal (thread_p, rcv, true);
28838 }
28839 
28840 /*
28841  * btree_rv_record_modify_internal () - Undoredo recovery of b-tree key records.
28842  *
28843  * return : Error code.
28844  * thread_p (in) : Thread entry.
28845  * rcv (in) : Recovery data.
28846  * is_undo (in) : True if undo recovery, false if redo recovery.
28847  */
28848 static int
28849 btree_rv_record_modify_internal (THREAD_ENTRY * thread_p, LOG_RCV * rcv, bool is_undo)
28850 {
28851  short flags; /* Flags set into rcv->offset. */
28852  PGSLOTID slotid; /* Slot ID stored in rcv->offset. */
28853  BTREE_NODE_HEADER *node_header = NULL; /* Node header. */
28854  int key_cnt; /* Node key count. */
28855  RECDES update_record; /* Used to store modified record. */
28856  /* Buffer to store modified record data. */
28857  char data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
28858  char *rcv_data_ptr = NULL; /* Current pointer in recovery data. */
28859  BTID_INT btid_int_for_debug; /* B-tree info structure used to store unique flag and top class OID. */
28860  BTREE_NODE_TYPE node_type; /* Node type. */
28861  int error_code = NO_ERROR; /* Error code. */
28862  /* True when b-tree operations should be logged. For debugging. */
28863  bool log_btree_ops = prm_get_bool_value (PRM_ID_LOG_BTREE_OPS);
28864  bool has_debug_info = false;
28865 
28866  /* >>>>>>>>>>>> */
28867  /* Debug ID to help developers find the source of bug in logging/recovery code. */
28868  BTREE_RV_DEBUG_ID rv_debug_id = BTREE_RV_REDO_NO_ID;
28869  /* <<<<<<<<<<<< */
28870 
28871  /* Get flags and slot ID. */
28872  flags = rcv->offset & BTREE_RV_FLAGS_MASK;
28873  slotid = rcv->offset & (~BTREE_RV_FLAGS_MASK);
28874 
28875  /* There are four major cases here:
28876  * 1. LOG_RV_RECORD_DELETE: Key is removed completely.
28877  * 2. LOG_RV_RECORD_UPDATE_ALL:
28878  * Entire record is updated (overflow header).
28879  * 3. LOG_RV_RECORD_INSERT: Key is inserted or overflow is created.
28880  * 4. LOG_RV_RECORD_UPDATE_PARTIAL: Record is updated by bits.
28881  * If is_undo flag is true, the cases are reversed. */
28882 
28883  /* Case 1: Is key being removed completely? */
28884  /* Logged by: btree_delete_key_from_leaf or btree_key_insert_new_key */
28885  if ((!is_undo && LOG_RV_RECORD_IS_DELETE (flags)) || (is_undo && LOG_RV_RECORD_IS_INSERT (flags)))
28886  {
28887  /* Record is completely removed from page. Redo of key removal from leaf page, when all its objects have been
28888  * deleted or undo of new key being inserted. */
28889 
28890  /* Delete key record. */
28891  node_header = btree_get_node_header (thread_p, rcv->pgptr);
28892  if (node_header == NULL)
28893  {
28894  assert_release (false);
28895  return ER_FAILED;
28896  }
28897  assert (slotid > 0);
28898  if (spage_delete (thread_p, rcv->pgptr, slotid) != slotid)
28899  {
28900  assert_release (false);
28901  return ER_FAILED;
28902  }
28903 
28904  /* Update the page header */
28905  key_cnt = btree_node_number_of_keys (thread_p, rcv->pgptr);
28906  if (key_cnt == 0)
28907  {
28908  node_header->max_key_len = 0;
28909  }
28910  pgbuf_set_dirty (thread_p, rcv->pgptr, DONT_FREE);
28911 
28912  if (log_btree_ops)
28913  {
28915  "%s: remove slotid=%d from leaf page %d|%d, lsa=%lld|%d, in an unknown index.\n",
28916  is_undo ? "BTREE_UNDO" : "BTREE_REDO", slotid, PGBUF_PAGE_STATE_ARGS (rcv->pgptr));
28917  }
28918  return NO_ERROR;
28919  }
28920  /* Case 1 is ruled out. Cases 2, 3, 4. These cases may also require unpacking debug info and running sanity checks
28921  * after modifying record. */
28922 
28923  /* First get debug info if any. */
28924  rcv_data_ptr = (char *) rcv->data;
28925 
28926  /* First check if there is debug info stored. */
28927  if (BTREE_RV_HAS_DEBUG_INFO (flags))
28928  {
28929  has_debug_info = true;
28930 
28931  /* Get BTREE_RV_DEBUG_ID */
28932  rv_debug_id = (BTREE_RV_DEBUG_ID) OR_GET_INT (rcv_data_ptr);
28933  rcv_data_ptr += OR_INT_SIZE;
28934 
28935  /* Read unique_pk flag. */
28936  btid_int_for_debug.unique_pk = OR_GET_INT (rcv_data_ptr);
28937  rcv_data_ptr += OR_INT_SIZE;
28938 
28939  /* Set top class OID. */
28940  if (BTREE_IS_UNIQUE (btid_int_for_debug.unique_pk))
28941  {
28942  OR_GET_OID (rcv_data_ptr, &btid_int_for_debug.topclass_oid);
28943  rcv_data_ptr += OR_OID_SIZE;
28944  }
28945  else
28946  {
28947  OID_SET_NULL (&btid_int_for_debug.topclass_oid);
28948  }
28949 
28950  /* Read key type. */
28951  ASSERT_ALIGN (rcv_data_ptr, INT_ALIGNMENT);
28952  rcv_data_ptr = or_unpack_domain (rcv_data_ptr, &btid_int_for_debug.key_type, NULL);
28953  rcv_data_ptr = PTR_ALIGN (rcv_data_ptr, INT_ALIGNMENT);
28954  }
28955 
28956  /* Get node type. */
28957  if (flags & BTREE_RV_OVERFLOW_FLAG)
28958  {
28959  node_type = BTREE_OVERFLOW_NODE;
28960  }
28961  else
28962  {
28963  node_type = BTREE_LEAF_NODE;
28964  }
28965 
28966  /* Case 2: Is entire record being updated? */
28967  /* Logged by: btree_modify_overflow_link. btree_overflow_record_replace_object. */
28968  if (LOG_RV_RECORD_IS_UPDATE_ALL (flags))
28969  {
28970  /* Update entire record. */
28971  /* The remaining recovery data is updated record data. */
28972  update_record.data = (char *) rcv_data_ptr;
28973  update_record.length = rcv->length - CAST_BUFLEN (rcv_data_ptr - rcv->data);
28974  update_record.type = REC_HOME;
28975 
28976 #if !defined (NDEBUG)
28977  if (has_debug_info)
28978  {
28979  (void) btree_check_valid_record (thread_p, &btid_int_for_debug, &update_record, node_type, NULL);
28980  }
28981 #endif /* NDEBUG */
28982 
28983  /* Update record in page. */
28984  if (spage_update (thread_p, rcv->pgptr, slotid, &update_record) != SP_SUCCESS)
28985  {
28986  assert_release (false);
28987  return ER_FAILED;
28988  }
28989  pgbuf_set_dirty (thread_p, rcv->pgptr, DONT_FREE);
28990 
28991  if (log_btree_ops)
28992  {
28994  "%s: update slotid=%d from page %d|%d, lsa=%lld|%d in an unknown index."
28995  "Record length = %d.\n", is_undo ? "BTREE_UNDO" : "BTREE_REDO", slotid,
28996  PGBUF_PAGE_STATE_ARGS (rcv->pgptr), update_record.length);
28997  }
28998  return NO_ERROR;
28999  }
29000  /* Case 2 is ruled out. Cases 3 and 4. */
29001 
29002  /* Case 3: 1. New overflow page. Logged by: btree_start_overflow_page. 2. New key in leaf record. Logged by:
29003  * btree_key_insert_new_key. */
29004  if ((!is_undo && LOG_RV_RECORD_IS_INSERT (flags)) || (is_undo && LOG_RV_RECORD_IS_DELETE (flags)))
29005  {
29006  if (node_type == BTREE_OVERFLOW_NODE)
29007  {
29008  /* Actually two records are inserted: overflow header and record containing one object. */
29009  BTREE_OVERFLOW_HEADER overflow_header;
29010 
29011  /* Insert overflow header. */
29012  OR_GET_VPID (rcv_data_ptr, &overflow_header.next_vpid);
29013  rcv_data_ptr += DISK_VPID_ALIGNED_SIZE;
29014 
29015  update_record.type = REC_HOME;
29016  update_record.data = (char *) &overflow_header;
29017  update_record.length = sizeof (overflow_header);
29018  if (spage_insert_at (thread_p, rcv->pgptr, HEADER, &update_record) != SP_SUCCESS)
29019  {
29020  assert_release (false);
29021  return ER_FAILED;
29022  }
29023 
29024  /* Insert object record. */
29025  update_record.data = rcv_data_ptr;
29026  update_record.length = rcv->length - CAST_BUFLEN (rcv_data_ptr - rcv->data);
29027 
29028 #if !defined (NDEBUG)
29029  if (has_debug_info)
29030  {
29031  assert (update_record.length == BTREE_OBJECT_FIXED_SIZE (&btid_int_for_debug));
29032  (void) btree_check_valid_record (thread_p, &btid_int_for_debug, &update_record, BTREE_OVERFLOW_NODE,
29033  NULL);
29034  }
29035 #endif /* !NDEDBUG */
29036 
29037  if (spage_insert_at (thread_p, rcv->pgptr, 1, &update_record) != SP_SUCCESS)
29038  {
29039  assert_release (false);
29040  (void) spage_delete (thread_p, rcv->pgptr, HEADER);
29041  return ER_FAILED;
29042  }
29043  pgbuf_set_dirty (thread_p, rcv->pgptr, DONT_FREE);
29044 
29045  if (log_btree_ops)
29046  {
29047  if (has_debug_info)
29048  {
29049  OID oid = OID_INITIALIZER;
29050  OID class_oid = OID_INITIALIZER;
29052 
29053  btree_unpack_object (update_record.data, &btid_int_for_debug, node_type, &update_record, 0, &oid,
29054  &class_oid, &mvcc_info);
29056  "%s: create new overflow page %d|%d, lsa=%lld|%d, in an unknown index. "
29057  "Insert object=%d|%d|%d, class_oid=%d|%d|%d, mvcc_info=%llu|%llu."
29058  "Record length = %d.\n", is_undo ? "BTREE_UNDO" : "BTREE_REDO",
29059  PGBUF_PAGE_STATE_ARGS (rcv->pgptr), oid.volid, oid.pageid, oid.slotid,
29060  class_oid.volid, class_oid.pageid, class_oid.slotid,
29061  (unsigned long long int) mvcc_info.insert_mvccid,
29062  (unsigned long long int) mvcc_info.delete_mvccid, update_record.length);
29063  }
29064  else
29065  {
29067  "%s: create new overflow page %d|%d, lsa=%lld|%d, in an unknown index."
29068  "Record length = %d.\n", is_undo ? "BTREE_UNDO" : "BTREE_REDO",
29069  PGBUF_PAGE_STATE_ARGS (rcv->pgptr), update_record.length);
29070  }
29071  }
29072  }
29073  else
29074  {
29075  /* Insert key into leaf page. */
29076  int key_length;
29077  int key_count;
29078  BTREE_NODE_HEADER *node_header = NULL;
29079 
29080  node_header = btree_get_node_header (thread_p, rcv->pgptr);
29081  if (node_header == NULL)
29082  {
29083  assert_release (false);
29084  return ER_FAILED;
29085  }
29086 
29087  if (BTREE_RV_IS_UPDATE_MAX_KEY_LEN (flags))
29088  {
29089  rcv_data_ptr = or_unpack_int (rcv_data_ptr, &key_length);
29090  }
29091 
29092  update_record.type = REC_HOME;
29093  update_record.data = rcv_data_ptr;
29094  update_record.length = rcv->length - CAST_BUFLEN (rcv_data_ptr - rcv->data);
29095 
29096 #if !defined (NDEBUG)
29097  if (has_debug_info)
29098  {
29099  (void) btree_check_valid_record (thread_p, &btid_int_for_debug, &update_record, node_type, NULL);
29100  }
29101 #endif /* !NDEBUG */
29102 
29103  if (spage_insert_at (thread_p, rcv->pgptr, slotid, &update_record) != SP_SUCCESS)
29104  {
29105  assert_release (false);
29106  return ER_FAILED;
29107  }
29108 
29109  if (BTREE_RV_IS_UPDATE_MAX_KEY_LEN (flags))
29110  {
29111  assert (key_length > node_header->max_key_len);
29112  node_header->max_key_len = key_length;
29113  }
29114 
29115  key_count = btree_node_number_of_keys (thread_p, rcv->pgptr);
29116 
29117  assert (node_header->split_info.pivot >= 0 && key_count > 0);
29118  btree_split_next_pivot (&node_header->split_info, (float) slotid / key_count, key_count);
29119 
29120  pgbuf_set_dirty (thread_p, rcv->pgptr, DONT_FREE);
29121 
29122  if (log_btree_ops)
29123  {
29124  if (has_debug_info && !btree_leaf_is_flaged (&update_record, BTREE_LEAF_RECORD_OVERFLOW_KEY))
29125  {
29126  OID oid = OID_INITIALIZER;
29127  OID class_oid = OID_INITIALIZER;
29129  LEAF_REC leaf_rec_info;
29130  int offset_after_key = 0;
29131  DB_VALUE key;
29132  bool clear_key;
29133  char *printed_key = NULL;
29134 
29135  btree_init_temp_key_value (&clear_key, &key);
29136  (void) btree_read_record (thread_p, &btid_int_for_debug, rcv->pgptr, &update_record, &key,
29137  &leaf_rec_info, node_type, &clear_key, &offset_after_key, PEEK_KEY_VALUE,
29138  NULL);
29139  printed_key = pr_valstring (&key);
29140  btree_clear_key_value (&clear_key, &key);
29141 
29142  (void) btree_unpack_object (update_record.data, &btid_int_for_debug, node_type, &update_record,
29143  offset_after_key, &oid, &class_oid, &mvcc_info);
29145  "%s: insert slotid=%d in leaf page %d|%d, lsa=%lld|%d, in an unknown index. "
29146  "Object=%d|%d|%d, class_oid=%d|%d|%d, mvcc_info=%lld|%lld, key=%s."
29147  "Record length = %d.\n", is_undo ? "BTREE_UNDO" : "BTREE_REDO", slotid,
29148  PGBUF_PAGE_STATE_ARGS (rcv->pgptr), oid.volid, oid.pageid, oid.slotid,
29149  class_oid.volid, class_oid.pageid, class_oid.slotid,
29150  (unsigned long long int) mvcc_info.insert_mvccid,
29151  (unsigned long long int) mvcc_info.delete_mvccid,
29152  printed_key != NULL ? printed_key : "unknown", update_record.length);
29153  if (printed_key != NULL)
29154  {
29155  db_private_free (thread_p, printed_key);
29156  }
29157  }
29158  else
29159  {
29161  "%s: insert slotid=%d in leaf page %d|%d, lsa=%lld|%d, "
29162  "in an unknown index. Record length = %d.\n", is_undo ? "BTREE_UNDO" : "BTREE_REDO",
29163  slotid, PGBUF_PAGE_STATE_ARGS (rcv->pgptr), update_record.length);
29164  }
29165  }
29166  }
29167  return NO_ERROR;
29168  }
29169  /* Case 4: Update record by parts. All other b-tree record changes. Logged by: btree_insert_mvcc_delid_into_page
29170  * btree_key_append_object_as_new_overflow btree_key_append_object_to_overflow
29171  * btree_key_lock_and_append_object_unique btree_key_append_object_non_unique btree_key_remove_insert_mvccid
29172  * btree_key_remove_delete_mvccid_unique btree_key_remove_delete_mvccid_non_unique
29173  * btree_overflow_record_replace_object btree_replace_first_oid_with_ovfl_oid btree_modify_overflow_link
29174  * btree_leaf_record_replace_first_with_last btree_record_remove_object */
29176 
29177  /* Check there is at least one change logged. */
29178  assert (CAST_BUFLEN (rcv_data_ptr - rcv->data) < rcv->length);
29179 
29180  /* Get existing record. */
29181  update_record.data = PTR_ALIGN (data_buffer, BTREE_MAX_ALIGN);
29182  update_record.area_size = DB_PAGESIZE;
29183  if (spage_get_record (thread_p, rcv->pgptr, slotid, &update_record, COPY) != SP_SUCCESS)
29184  {
29185  assert_release (false);
29186  return ER_FAILED;
29187  }
29188 
29189 #if !defined (NDEBUG)
29190  if (has_debug_info)
29191  {
29192  /* Check existing record is valid. */
29193  (void) btree_check_valid_record (thread_p, &btid_int_for_debug, &update_record, node_type, NULL);
29194  }
29195 #endif /* !NDEBUG */
29196 
29197  /* Apply changes. */
29198  error_code =
29199  log_rv_undoredo_record_partial_changes (thread_p, rcv_data_ptr,
29200  rcv->length - CAST_BUFLEN (rcv_data_ptr - rcv->data), &update_record,
29201  is_undo);
29202  if (error_code != NO_ERROR)
29203  {
29204  ASSERT_ERROR ();
29205  return error_code;
29206  }
29207  /* Changes applied successfully. */
29208 
29209 #if !defined (NDEBUG)
29210  if (has_debug_info)
29211  {
29212  /* Check record validity after changes. */
29213  (void) btree_check_valid_record (thread_p, &btid_int_for_debug, &update_record, node_type, NULL);
29214  }
29215 #endif /* !NDEBUG */
29216 
29217  /* Update in page. */
29218  if (spage_update (thread_p, rcv->pgptr, slotid, &update_record) != SP_SUCCESS)
29219  {
29220  assert_release (false);
29221  return ER_FAILED;
29222  }
29223  pgbuf_set_dirty (thread_p, rcv->pgptr, DONT_FREE);
29224 
29225  if (log_btree_ops)
29226  {
29227  if (has_debug_info)
29228  {
29229  LEAF_REC leaf_rec_info;
29230  int offset_after_key = 0;
29231  DB_VALUE key;
29232  bool clear_key;
29233  char *printed_key = NULL;
29234 
29235  /* Read key value if node is leaf and if key is not overflow (avoid fixing other pages here since it may
29236  * crash). */
29237  if (node_type == BTREE_LEAF_NODE && !btree_leaf_is_flaged (&update_record, BTREE_LEAF_RECORD_OVERFLOW_KEY))
29238  {
29239  btree_init_temp_key_value (&clear_key, &key);
29240  (void) btree_read_record (thread_p, &btid_int_for_debug, rcv->pgptr, &update_record, &key, &leaf_rec_info,
29241  node_type, &clear_key, &offset_after_key, PEEK_KEY_VALUE, NULL);
29242  printed_key = pr_valstring (&key);
29243  btree_clear_key_value (&clear_key, &key);
29244  }
29245 
29247  "%s: update slotid=%d from %s page %d|%d, lsa=%lld|%d, in an unknown index."
29248  "key=%s, rv_debug_id=%d. Record length = %d.\n", is_undo ? "BTREE_UNDO" : "BTREE_REDO", slotid,
29249  node_type == BTREE_LEAF_NODE ? "leaf" : "overflow", PGBUF_PAGE_STATE_ARGS (rcv->pgptr),
29250  printed_key != NULL ? printed_key : "unknown", rv_debug_id, update_record.length);
29251  if (printed_key != NULL)
29252  {
29253  db_private_free (thread_p, printed_key);
29254  }
29255  }
29256  else
29257  {
29259  "%s: update slotid=%d from %s page %d|%d, lsa=%lld|%d, in an unknown index. "
29260  "Record length = %d.\n", is_undo ? "BTREE_UNDO" : "BTREE_REDO", slotid,
29261  node_type == BTREE_LEAF_NODE ? "leaf" : "overflow", PGBUF_PAGE_STATE_ARGS (rcv->pgptr),
29262  update_record.length);
29263  }
29264  }
29265  return NO_ERROR;
29266 }
29267 
29268 /*
29269  * btree_rv_remove_unique_stats () -
29270  * return: int
29271  * recv(in): Recovery structure
29272  *
29273  * Note: Remove unique statistics from global hash
29274  */
29275 int
29277 {
29278  BTID btid;
29279  LOG_TRAN_BTID_UNIQUE_STATS *unique_stats;
29280  int ret = NO_ERROR;
29281 
29282  assert (recv->length == sizeof (btid));
29283 
29284  /* unpack the index btid */
29285  btid = *(BTID *) recv->data;
29286  ret = logtb_delete_global_unique_stats (thread_p, &btid);
29287  if (ret != NO_ERROR)
29288  {
29289  assert_release (false);
29290  return ER_FAILED;
29291  }
29292  unique_stats = logtb_tran_find_btid_stats (thread_p, &btid, false);
29293  if (unique_stats != NULL)
29294  {
29295  unique_stats->deleted = true;
29296  }
29297 
29298  if (recv->offset < 0)
29299  {
29300  /* logical run postpone or logical compensate. this will end with an end system op log record that it is only
29301  * executed once. however, if the server crashes, we will have to drop these statistics again.
29302  * we'll do it by adding a redo log. this redo log record should be executed again and again until we successfully
29303  * finish recovery and a new checkpoint is created after it. if server crashes again during recovery, the
29304  * statistics may again show up in the memory. so we are only safe when checkpoint passed this point.
29305  * the solution was a little hack-ish: we use offset value to separate the logical operation execution and redo
29306  * execution. another approach is to create two different recovery indexes and functions.
29307  */
29309  /* should be system op */
29311  assert (recv->offset == -1);
29312 
29313  /* append a new RVBT_REMOVE_UNIQUE_STATS redo log record */
29314  addr.offset = 0;
29315  log_append_redo_data (thread_p, RVBT_REMOVE_UNIQUE_STATS, &addr, sizeof (btid), &btid);
29316  }
29317  else
29318  {
29319  /* simple redo. just set page dirty. */
29320  assert (recv->offset == 0);
29321  }
29322 
29323  return NO_ERROR;
29324 }
29325 
29326 /*
29327  * btree_physical_delete () - Physically delete an object (unlike MVCC delete, object and all its info are removed).
29328  *
29329  * return : Error code.
29330  * thread_p (in) : Thread entry.
29331  * btid (in) : B-tree ID.
29332  * key (in) : Key value.
29333  * oid (in) : Object OID.
29334  * class_oid (in) : Object class OID.
29335  * unique (in) : Output if index is unique.
29336  * op_type (in) : Operation type.
29337  * unique_stat_info (in) : Unique statistics information.
29338  */
29339 int
29340 btree_physical_delete (THREAD_ENTRY * thread_p, BTID * btid, DB_VALUE * key, OID * oid, OID * class_oid, int *unique,
29341  int op_type, btree_unique_stats * unique_stat_info)
29342 {
29344 
29346  {
29348  "BTREE_DELETE: Start physical delete object %d|%d|%d, "
29349  "class_oid %d|%d|%d in index (%d, %d|%d).\n", oid->volid, oid->pageid, oid->slotid,
29350  class_oid->volid, class_oid->pageid, class_oid->slotid, btid->root_pageid, btid->vfid.volid,
29351  btid->vfid.fileid);
29352  }
29353 #if defined (SERVER_MODE)
29354  if (oid_is_serial (class_oid))
29355 #else /* !SERVER_MODE */ /* SA_MODE */
29356  if (false)
29357 #endif /* SA_MODE */
29358  {
29359  /* Before starting, we have to handle the special case of serials. Since next key locking was removed, deleting a
29360  * key from serial is not protected. Somebody may insert same key, and if the deleter undoes work, two objects
29361  * for the same key will be found. Therefore, it is better to postpone deleting object from index after commit.
29362  * While the deleter is still active, others will be blocked on the key. After deleter commits, it is no longer
29363  * critical to block new transactions checking the key (since it will not exist anyway). However, object is
29364  * still accessible to current transaction. Imagine next scenario (auto-commit off): drop serial s1; create
29365  * serial s1; If we leave the serial "s1" in index, creating it again will fail. Therefore we must also mark
29366  * existing object as deleted. We'll do this by using MVCC delete system: mark deletion using MVCCID. After
29367  * commit, the marked object will be removed. If transaction aborts, delete MVCCID will be removed. This should
29368  * actually apply to all unique indexes of non-MVCC classes. But since we don't have this information yet (we
29369  * must access the index root page to find out). In the future, maybe index uniqueness can be easily accessible
29370  * (through a hash or any other system). Then we can apply same rules to all unique non-MVCC indexes. */
29371  MVCCID tran_mvccid = logtb_get_current_mvccid (thread_p);
29372 
29373  BTREE_MVCC_INFO_SET_DELID (&mvcc_info, tran_mvccid);
29374 
29375  return btree_insert_internal (thread_p, btid, key, class_oid, oid, op_type, unique_stat_info, unique, &mvcc_info,
29377  }
29378  else
29379  {
29380  return btree_delete_internal (thread_p, btid, oid, class_oid, &mvcc_info, key, NULL, unique, op_type,
29381  unique_stat_info, NULL, NULL, NULL, BTREE_OP_DELETE_OBJECT_PHYSICAL);
29382  }
29383 }
29384 
29385 /*
29386  * btree_vacuum_insert_mvccid () - Vacuum the insert MVCCID of object.
29387  *
29388  * return : Error code.
29389  * thread_p (in) : Thread entry.
29390  * btid (in) : B-tree ID.
29391  * buffered_key (in) : Key value.
29392  * oid (in) : Object OID.
29393  * class_oid (in) : Object class OID.
29394  * insert_mvccid (in) : Insert MVCCID of object.
29395  */
29396 int
29397 btree_vacuum_insert_mvccid (THREAD_ENTRY * thread_p, BTID * btid, OR_BUF * buffered_key, OID * oid, OID * class_oid,
29398  MVCCID insert_mvccid)
29399 {
29402 
29404  {
29406  "BTREE_DELETE: Start vacuum insert MVCCID %lld from object %d|%d|%d, class_oid %d|%d|%d in "
29407  "index (%d, %d|%d).\n", (long long int) insert_mvccid, oid->volid, oid->pageid, oid->slotid,
29408  class_oid->volid, class_oid->pageid, class_oid->slotid, btid->root_pageid, btid->vfid.volid,
29409  btid->vfid.fileid);
29410  }
29411 
29412  BTREE_MVCC_INFO_SET_INSID (&match_mvccinfo, insert_mvccid);
29413  return btree_delete_internal (thread_p, btid, oid, class_oid, &mvcc_info, NULL, buffered_key, NULL, SINGLE_ROW_MODIFY,
29414  NULL, &match_mvccinfo, NULL, NULL, BTREE_OP_DELETE_VACUUM_INSID);
29415 }
29416 
29417 /*
29418  * btree_vacuum_object () - Vacuum (remove) deleted object and all its info from b-tree key.
29419  *
29420  * return : Error code.
29421  * thread_p (in) : Thread entry.
29422  * btid (in) : B-tree ID.
29423  * buffered_key (in) : Key value.
29424  * oid (in) : Object OID.
29425  * class_oid (in) : Object class OID.
29426  * delete_mvccid (in) : Delete MVCCID of object.
29427  */
29428 int
29429 btree_vacuum_object (THREAD_ENTRY * thread_p, BTID * btid, OR_BUF * buffered_key, OID * oid, OID * class_oid,
29430  MVCCID delete_mvccid)
29431 {
29434 
29436  {
29438  "BTREE_DELETE: Start vacuum object %d|%d|%d, class_oid %d|%d|%d and delete MVCCID %lld in "
29439  "index (%d, %d|%d).\n", oid->volid, oid->pageid, oid->slotid, class_oid->volid, class_oid->pageid,
29440  class_oid->slotid, (long long int) delete_mvccid, btid->root_pageid, btid->vfid.volid,
29441  btid->vfid.fileid);
29442  }
29443 
29444  BTREE_MVCC_INFO_SET_DELID (&match_mvccinfo, delete_mvccid);
29445  return btree_delete_internal (thread_p, btid, oid, class_oid, &mvcc_info, NULL, buffered_key, NULL, SINGLE_ROW_MODIFY,
29446  NULL, &match_mvccinfo, NULL, NULL, BTREE_OP_DELETE_VACUUM_OBJECT);
29447 }
29448 
29449 /*
29450  * btree_undo_mvcc_delete () - Undo MVCC delete (undo the insert of delete MVCCID).
29451  *
29452  * return : Error code.
29453  * thread_p (in) : Thread entry.
29454  * btid (in) : B-tree ID.
29455  * buffered_key (in) : Key value.
29456  * oid (in) : Object OID.
29457  * class_oid (in) : Object class OID.
29458  * match_mvccinfo (in) : The MVCC information to be matched.
29459  * undo_nxlsa (in) : UNDO next lsa for logical compensate.
29460  */
29461 static int
29462 btree_undo_mvcc_delete (THREAD_ENTRY * thread_p, BTID * btid, OR_BUF * buffered_key, OID * oid, OID * class_oid,
29463  BTREE_MVCC_INFO * match_mvccinfo, LOG_LSA * undo_nxlsa)
29464 {
29466 
29468  {
29470  "BTREE_DELETE: Start undo MVCC delete on object %d|%d|%d, class_oid %d|%d|%d and "
29471  "delete MVCCID %lld in index (%d, %d|%d).\n", oid->volid, oid->pageid, oid->slotid,
29472  class_oid->volid, class_oid->pageid, class_oid->slotid,
29473  (long long int) match_mvccinfo->delete_mvccid, btid->root_pageid, btid->vfid.volid,
29474  btid->vfid.fileid);
29475  }
29476 
29477  return btree_delete_internal (thread_p, btid, oid, class_oid, &mvcc_info, NULL, buffered_key, NULL, SINGLE_ROW_MODIFY,
29478  NULL, match_mvccinfo, undo_nxlsa, NULL, BTREE_OP_DELETE_UNDO_INSERT_DELID);
29479 }
29480 
29481 /*
29482  * btree_undo_insert_object () - Delete object from index as part of an undo of insert object operation.
29483  *
29484  * return : Error code.
29485  * thread_p (in) : Thread entry.
29486  * btid (in) : B-tree ID.
29487  * buffered_key (in) : Key value.
29488  * oid (in) : Object OID.
29489  * class_oid (in) : Object class OID.
29490  * insert_mvccid (in) : Insert MVCCID.
29491  * undo_nxlsa (in) : UNDO next lsa for logical compensate.
29492  */
29493 static int
29494 btree_undo_insert_object (THREAD_ENTRY * thread_p, BTID * btid, OR_BUF * buffered_key, OID * oid, OID * class_oid,
29495  MVCCID insert_mvccid, LOG_LSA * undo_nxlsa)
29496 {
29499 
29501  {
29503  "BTREE_DELETE: Start undo insert object %d|%d|%d, class_oid %d|%d|%d and insert MVCCID %lld "
29504  "in index (%d, %d|%d).\n", oid->volid, oid->pageid, oid->slotid, class_oid->volid,
29505  class_oid->pageid, class_oid->slotid, (long long int) insert_mvccid, btid->root_pageid,
29506  btid->vfid.volid, btid->vfid.fileid);
29507  }
29508 
29509  if (insert_mvccid != MVCCID_ALL_VISIBLE)
29510  {
29511  BTREE_MVCC_INFO_SET_INSID (&match_mvccinfo, insert_mvccid);
29512  }
29513  return btree_delete_internal (thread_p, btid, oid, class_oid, &mvcc_info, NULL, buffered_key, NULL, SINGLE_ROW_MODIFY,
29514  NULL, &match_mvccinfo, undo_nxlsa, NULL, BTREE_OP_DELETE_UNDO_INSERT);
29515 }
29516 
29517 /*
29518  * btree_undo_insert_object_unique_multiupd () - Delete object from unique index as part of an undo of insert object
29519  * operation - the insert moved older visible object during multi-update
29520  * and now it must be returned to first position.
29521  *
29522  * return : Error code.
29523  * thread_p (in) : Thread entry.
29524  * btid (in) : B-tree ID.
29525  * buffered_key (in) : Key value.
29526  * inserted_object (in) : Inserted object info.
29527  * second_object (in) : Second visible object info.
29528  * insert_mvccid (in) : Insert MVCCID.
29529  * undo_nxlsa (in) : UNDO next lsa for logical compensate.
29530  */
29531 static int
29533  BTREE_OBJECT_INFO * inserted_object, BTREE_OBJECT_INFO * second_object,
29534  MVCCID insert_mvccid, LOG_LSA * undo_nxlsa)
29535 {
29538 
29540  {
29542  "BTREE_DELETE: Start undo insert object %d|%d|%d, class_oid %d|%d|%d and insert MVCCID %lld "
29543  "in index (%d, %d|%d). Special case of undo from unique "
29544  "index when previous visible object must be returned to first position.\n",
29545  inserted_object->oid.volid, inserted_object->oid.pageid, inserted_object->oid.slotid,
29546  inserted_object->class_oid.volid, inserted_object->class_oid.pageid,
29547  inserted_object->class_oid.slotid, (long long int) insert_mvccid, btid->root_pageid,
29548  btid->vfid.volid, btid->vfid.fileid);
29549  }
29550 
29551  if (insert_mvccid != MVCCID_ALL_VISIBLE)
29552  {
29553  BTREE_MVCC_INFO_SET_INSID (&match_mvccinfo, insert_mvccid);
29554  }
29555  return btree_delete_internal (thread_p, btid, &inserted_object->oid, &inserted_object->class_oid, &mvcc_info, NULL,
29556  buffered_key, NULL, SINGLE_ROW_MODIFY, NULL, &match_mvccinfo, undo_nxlsa, second_object,
29558 }
29559 
29560 /*
29561  * btree_delete_postponed () - Delete b-tree object on postpone.
29562  *
29563  * return : Error code.
29564  * thread_p (in) : Thread entry.
29565  * btid (in) : B-tree ID.
29566  * buffered_key (in) : Key value (buffered).
29567  * btree_obj (in) : B-tree object info.
29568  * tran_mvccid (in) : Transaction MVCCID.
29569  * reference_lsa (in) : Postpone reference LSA.
29570  */
29571 static int
29573  MVCCID tran_mvccid, LOG_LSA * reference_lsa)
29574 {
29576 
29577  assert (MVCCID_IS_VALID (tran_mvccid));
29578 
29580  {
29582  "BTREE_DELETE: Execute postponed delete: object %d|%d|%d, class_oid %d|%d|%d, "
29583  "mvcc_info=%llu|%llu, in index (%d, %d|%d).\n", btree_obj->oid.volid, btree_obj->oid.pageid,
29584  btree_obj->oid.slotid, btree_obj->class_oid.volid, btree_obj->class_oid.pageid,
29585  btree_obj->class_oid.slotid, (unsigned long long int) btree_obj->mvcc_info.insert_mvccid,
29586  (unsigned long long int) btree_obj->mvcc_info.delete_mvccid, btid->root_pageid, btid->vfid.volid,
29587  btid->vfid.fileid);
29588  }
29589 
29590  BTREE_MVCC_INFO_SET_DELID (&match_mvccinfo, tran_mvccid);
29591  return btree_delete_internal (thread_p, btid, &btree_obj->oid, &btree_obj->class_oid, &btree_obj->mvcc_info, NULL,
29592  buffered_key, NULL, SINGLE_ROW_MODIFY, NULL, &match_mvccinfo, reference_lsa, NULL,
29594 }
29595 
29596 /*
29597  * btree_delete_internal () - Index internal function to delete data from a b-tree key.
29598  *
29599  * return : Error code.
29600  * thread_p (in) : Thread entry.
29601  * btid (in) : B-tree ID.
29602  * oid (in) : Object OID.
29603  * class_oid (in) : Object class OID.
29604  * mvcc_info (in) : Object MVCC info.
29605  * key (in) : Object key value.
29606  * buffered_key (in) : Buffered value of key. Must be unpacked.
29607  * unique (out) : Output if index is unique.
29608  * op_type (in) : Operation type.
29609  * unique_stat_info (in) : Unique statistics collector.
29610  * match_mvccinfo (in) : B-tree MVCC info to be matched when searching object.
29611  * ref_lsa (in) : UNDO/Postpone reference LSA.
29612  * second_object_info (in) : B-tree object info for new version after an MVCC update same key.
29613  * purpose (in) : Purpose/context for function call.
29614  */
29615 static int
29616 btree_delete_internal (THREAD_ENTRY * thread_p, BTID * btid, OID * oid, OID * class_oid, BTREE_MVCC_INFO * mvcc_info,
29617  DB_VALUE * key, OR_BUF * buffered_key, int *unique, int op_type,
29618  btree_unique_stats * unique_stat_info, BTREE_MVCC_INFO * match_mvccinfo, LOG_LSA * ref_lsa,
29620 {
29621  /* Structure used by internal functions. */
29622  BTREE_DELETE_HELPER delete_helper;
29623  BTID_INT btree_info; /* B-tree info. */
29624  int error_code = NO_ERROR; /* Error code. */
29625  bool old_check_interrupt = false; /* Save check interrupt before setting it to false. */
29626  BTREE_PROCESS_KEY_FUNCTION *key_func = NULL; /* Internal function called to manipulate key. */
29627  DB_VALUE local_key; /* Local storage for DB_VALUE if key is buffered. */
29628 
29629  /* Assert expected arguments. */
29630  assert (btid != NULL && !BTREE_INVALID_INDEX_ID (btid));
29631  assert ((key == NULL && buffered_key != NULL) || (key != NULL && buffered_key == NULL));
29632  assert (oid != NULL);
29633  assert (op_type == SINGLE_ROW_DELETE || op_type == MULTI_ROW_DELETE || op_type == SINGLE_ROW_UPDATE
29634  || op_type == MULTI_ROW_UPDATE || op_type == SINGLE_ROW_MODIFY);
29635 
29636  PERF_UTIME_TRACKER_START (thread_p, &delete_helper.time_track);
29637 
29638  /* Choose internal function based on purpose. */
29639  switch (purpose)
29640  {
29643  /* Set ref_lsa. */
29644  assert (ref_lsa != NULL);
29645  LSA_COPY (&delete_helper.reference_lsa, ref_lsa);
29646  /* Fall through. */
29649  key_func = btree_key_delete_remove_object;
29650  break;
29653  /* Set ref_lsa. */
29654  assert (ref_lsa != NULL);
29655  LSA_COPY (&delete_helper.reference_lsa, ref_lsa);
29656  break;
29658  key_func = btree_key_remove_insert_mvccid;
29659  break;
29661  /* Set ref_lsa. */
29662  assert (ref_lsa != NULL);
29663  LSA_COPY (&delete_helper.reference_lsa, ref_lsa);
29664  key_func = btree_key_remove_delete_mvccid;
29665  break;
29666  default:
29667  /* Unhandled or unexpected. */
29668  assert_release (false);
29669  return ER_FAILED;
29670  }
29671 
29672  /* Initialize delete helper. */
29673  COPY_OID (BTREE_DELETE_OID (&delete_helper), oid);
29674  COPY_OID (BTREE_DELETE_CLASS_OID (&delete_helper), class_oid);
29675  *BTREE_DELETE_MVCC_INFO (&delete_helper) = *mvcc_info;
29676  delete_helper.purpose = purpose;
29677 
29678  /* Set operation type. */
29679  delete_helper.op_type = op_type;
29680 
29681  /* Set unique stats information. */
29682  delete_helper.unique_stats_info = unique_stat_info;
29683 
29684  /* Set MVCCID to be matched. */
29685  if (match_mvccinfo != NULL)
29686  {
29687  delete_helper.match_mvccinfo = *match_mvccinfo;
29688  }
29689 
29690  /* Is key buffered? */
29691  if (buffered_key != NULL)
29692  {
29693  /* Key will be unpacked after fixing root and getting key type. */
29694  delete_helper.buffered_key = buffered_key;
29695  key = &local_key;
29696  db_make_null (key);
29697  }
29698 
29699  /* Log b-tree operations? For debugging. */
29701 
29702  if (second_object_info != NULL)
29703  {
29704  delete_helper.second_object_info = *second_object_info;
29705  }
29706 
29707  /* Add more btree_delete_helper initialization here. */
29708 
29709  old_check_interrupt = logtb_set_check_interrupt (thread_p, false);
29711 
29712  error_code =
29713  btree_search_key_and_apply_functions (thread_p, btid, &btree_info, key, btree_fix_root_for_delete, &delete_helper,
29714  btree_merge_node_and_advance, &delete_helper, key_func, &delete_helper, NULL,
29715  NULL);
29716 
29717  (void) logtb_set_check_interrupt (thread_p, old_check_interrupt);
29719 
29720  if (delete_helper.printed_key != NULL)
29721  {
29722  db_private_free (thread_p, delete_helper.printed_key);
29723  }
29724 
29725  if (buffered_key != NULL)
29726  {
29727  assert (key != NULL);
29728  pr_clear_value (key);
29729  }
29730 
29731  if (error_code != NO_ERROR)
29732  {
29733  ASSERT_ERROR ();
29734 
29735  btree_delete_log (&delete_helper, "failed operation, error_code = %d \n" BTREE_DELETE_HELPER_MSG ("\t")
29736  "\t" BTREE_ID_MSG,
29737  error_code, BTREE_DELETE_HELPER_AS_ARGS (&delete_helper), BTID_AS_ARGS (btid));
29738  return error_code;
29739  }
29740 
29742 
29743  if (unique != NULL)
29744  {
29745  *unique = BTREE_IS_UNIQUE (btree_info.unique_pk);
29746  }
29747 
29748  if (delete_helper.check_key_deleted && !delete_helper.is_key_deleted)
29749  {
29750  /* Correct unique stats info (key is not actually deleted). */
29751  assert (delete_helper.unique_stats_info != NULL);
29752  // todo - just remove row, not key from the beginning
29753  // revert
29754  delete_helper.unique_stats_info->insert_key_and_row ();
29755  // delete row
29756  delete_helper.unique_stats_info->delete_row ();
29757  }
29758 
29759  return NO_ERROR;
29760 }
29761 
29762 /*
29763  * btree_fix_root_for_delete () - BTREE_ROOT_WITH_KEY_FUNCTION - fix root page before deleting data from a key.
29764  *
29765  * return : Error code.
29766  * thread_p (in) : Thread entry.
29767  * btid (in) : B-tree ID.
29768  * btid_int (in/out) : Can output b-tree info.
29769  * key (in) : Not used.
29770  * root_page (out) : Fixed root node page.
29771  * is_leaf (in) : Not used.
29772  * search_key (in) : Not used.
29773  * stop (out) : Outputs to stop when deleting a NULL key.
29774  * restart (out) : Not used.
29775  * other_args (in/out) : BTREE_DELETE_HELPER *
29776  */
29777 static int
29778 btree_fix_root_for_delete (THREAD_ENTRY * thread_p, BTID * btid, BTID_INT * btid_int, DB_VALUE * key,
29779  PAGE_PTR * root_page, bool * is_leaf, BTREE_SEARCH_KEY_HELPER * search_key, bool * stop,
29780  bool * restart, void *other_args)
29781 {
29782  /* Structure used for internal functions used in btree_delete_internal. */
29783  BTREE_DELETE_HELPER *delete_helper = (BTREE_DELETE_HELPER *) other_args;
29784  int error_code = NO_ERROR; /* Error code. */
29785  bool is_null = false; /* Is key null. */
29786 
29787  /* Assert expected arguments. */
29788  assert (btid != NULL);
29789  assert (btid_int != NULL);
29790  assert (root_page != NULL && *root_page == NULL);
29791  assert (delete_helper != NULL);
29792  assert (btree_is_delete_data_purpose (delete_helper->purpose));
29793 
29794  /* Root node is being fixed. */
29795  delete_helper->is_root = true;
29796  if (delete_helper->is_first_search)
29797  {
29798  /* First search: read b-tree info. */
29799  *root_page = btree_fix_root_with_info (thread_p, btid, delete_helper->nonleaf_latch_mode, NULL, NULL, btid_int);
29800  if (*root_page == NULL)
29801  {
29802  ASSERT_ERROR_AND_SET (error_code);
29803  return error_code;
29804  }
29805  }
29806  else
29807  {
29808  /* Just fix root page. */
29809  *root_page = btree_fix_root_with_info (thread_p, btid, delete_helper->nonleaf_latch_mode, NULL, NULL, NULL);
29810  if (*root_page == NULL)
29811  {
29812  ASSERT_ERROR_AND_SET (error_code);
29813  return error_code;
29814  }
29815  return NO_ERROR;
29816  }
29817  /* Root page fixed. */
29818  /* This is first search. */
29819  /* Reset first search flag. We don't want to repeat next operations. */
29820  delete_helper->is_first_search = false;
29821 
29822  /* If buffered key is not NULL, key value must be unpacked. */
29823  if (delete_helper->buffered_key != NULL)
29824  {
29825  /* Unpack key from buffer. */
29826  PR_TYPE *pr_type;
29827  int key_size = -1;
29828 
29829  /* Assert key is initialized. */
29830  assert (DB_IS_NULL (key));
29831 
29832  pr_type = btid_int->key_type->type;
29833 
29834  /* Do not copy the string--just use the pointer. The pr_ routines for strings and sets have different semantics
29835  * for length. */
29836  if (pr_type->id == DB_TYPE_MIDXKEY)
29837  {
29838  key_size = CAST_BUFLEN (delete_helper->buffered_key->endptr - delete_helper->buffered_key->ptr);
29839  }
29840 
29841  /* Read key. */
29842  error_code = pr_type->index_readval (delete_helper->buffered_key, key, btid_int->key_type, key_size,
29843  false /* not copy */ , NULL, 0);
29844  if (error_code != NO_ERROR)
29845  {
29846  ASSERT_ERROR ();
29847  pgbuf_unfix_and_init (thread_p, *root_page);
29848  return error_code;
29849  }
29850  }
29851 
29852  if (key != NULL && DB_VALUE_DOMAIN_TYPE (key) == DB_TYPE_MIDXKEY)
29853  {
29854  /* Set complete set domain. */
29855  key->data.midxkey.domain = btid_int->key_type;
29856  }
29857 
29858  /* Is key NULL? */
29859  is_null = key == NULL || DB_IS_NULL (key) || btree_multicol_key_is_null (key);
29860 
29861  /* Safe guard: key type matches. */
29862  assert (is_null || TP_ARE_COMPARABLE_KEY_TYPES (DB_VALUE_DOMAIN_TYPE (key), btid_int->key_type->type->id));
29863 
29864  if (delete_helper->log_operations)
29865  {
29866  /* Key must be printed. */
29867  delete_helper->printed_key = pr_valstring (key);
29868  (void) SHA1Compute ((unsigned char *) delete_helper->printed_key, strlen (delete_helper->printed_key),
29869  &delete_helper->printed_key_sha1);
29870  }
29871 
29872  /* Safe guard: key cannot always be NULL. */
29873  assert (!is_null || delete_helper->purpose == BTREE_OP_DELETE_OBJECT_PHYSICAL
29874  || delete_helper->purpose == BTREE_OP_ONLINE_INDEX_TRAN_DELETE
29875  || delete_helper->purpose == BTREE_OP_ONLINE_INDEX_UNDO_TRAN_INSERT);
29876 
29877  if (delete_helper->purpose == BTREE_OP_DELETE_VACUUM_INSID || delete_helper->purpose == BTREE_OP_DELETE_VACUUM_OBJECT)
29878  {
29879  /* Vacuum operations don't need to go further. */
29880  return NO_ERROR;
29881  }
29882  if (delete_helper->purpose == BTREE_OP_DELETE_UNDO_INSERT
29884  || delete_helper->purpose == BTREE_OP_DELETE_UNDO_INSERT_DELID
29886  || delete_helper->purpose == BTREE_OP_ONLINE_INDEX_UNDO_TRAN_INSERT)
29887  {
29888  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
29889  {
29890  /* Class OID is not packed for undo when it matches topclass_oid. If it is NULL here, then it must be
29891  * replaced with topclass_oid. */
29892  if (OID_ISNULL (BTREE_DELETE_CLASS_OID (delete_helper)))
29893  {
29894  COPY_OID (BTREE_DELETE_CLASS_OID (delete_helper), &btid_int->topclass_oid);
29895 
29896  /* BTREE_OP_DELETE_OBJECT_PHYSICAL_POSTPONED is used only for non-MVCC classes. */
29899  }
29901  && OID_ISNULL (&delete_helper->second_object_info.class_oid))
29902  {
29903  COPY_OID (&delete_helper->second_object_info.class_oid, &btid_int->topclass_oid);
29904  }
29905  }
29906  else
29907  {
29908  /* BTREE_OP_DELETE_OBJECT_PHYSICAL_POSTPONED is used only for unique indexes. */
29910  }
29911  /* Undo operations don't need to go further. */
29912  return NO_ERROR;
29913  }
29914 
29915  assert (btree_is_delete_object_purpose (delete_helper->purpose));
29916 
29917  /* Update unique statistics. */
29918  if (BTREE_IS_UNIQUE (btid_int->unique_pk) && delete_helper->purpose == BTREE_OP_DELETE_OBJECT_PHYSICAL)
29919  {
29920  /* Do not update statistics when vacuuming or during undo recovery. */
29921  btree_unique_stats incr;
29922 
29923  if (is_null)
29924  {
29925  incr.delete_null_and_row ();
29926  }
29927  else
29928  {
29929  incr.delete_key_and_row ();
29930  }
29931 
29932  if (BTREE_IS_MULTI_ROW_OP (delete_helper->op_type))
29933  {
29934  /* Collect statistics */
29935  assert (delete_helper->unique_stats_info != NULL);
29936  (*delete_helper->unique_stats_info) += incr;
29937 
29938  if (delete_helper->op_type == MULTI_ROW_UPDATE)
29939  {
29940  /* It is possible that after deleting key, it isn't actually deleted. We must count visible objects and
29941  * correct above statistics. */
29942  delete_helper->check_key_deleted = true;
29943  delete_helper->is_key_deleted = true;
29944  }
29945  }
29946  else
29947  {
29948  /* Save and log statistics changes. */
29949  if (!btree_is_online_index_loading (delete_helper->purpose))
29950  {
29951  error_code = logtb_tran_update_unique_stats (thread_p, *btid, incr, true);
29952  if (error_code != NO_ERROR)
29953  {
29954  ASSERT_ERROR ();
29955  return error_code;
29956  }
29957  }
29958  }
29959  }
29960 
29961  if (is_null)
29962  {
29963  /* Nothing to do anymore. */
29964  *stop = true;
29965  }
29966  return NO_ERROR;
29967 }
29968 
29969 /*
29970  * btree_merge_node_and_advance () - BTREE_ADVANCE_WITH_KEY_FUNCTION used by btree_delete_internal to merge b-tree
29971  * nodes while advancing to delete data from a key.
29972  *
29973  * return : Error code.
29974  * thread_p (in) : Thread entry.
29975  * btid_int (in) : B-tree info.
29976  * key (in) : Key to follow while advancing.
29977  * crt_page (in) : Pointer to current node's page.
29978  * advance_to_page (out) : Outputs next node page to advance to.
29979  * is_leaf (out) : Outputs whether current node is leaf.
29980  * search_key (out) : Outputs search key result when current node is leaf.
29981  * stop (out) : Outputs to end advancing (not used).
29982  * restart (out) : Outputs to restart from root.
29983  * other_args (in/out) : BTREE_DELETE_HELPER *
29984  */
29985 static int
29986 btree_merge_node_and_advance (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key, PAGE_PTR * crt_page,
29987  PAGE_PTR * advance_to_page, bool * is_leaf, BTREE_SEARCH_KEY_HELPER * search_key,
29988  bool * stop, bool * restart, void *other_args)
29989 {
29990  /* Delete helper used by internal functions of btree_delete_internal. */
29991  BTREE_DELETE_HELPER *delete_helper = (BTREE_DELETE_HELPER *) other_args;
29992  PAGE_PTR left_page = NULL; /* Left page to merge. */
29993  PAGE_PTR right_page = NULL; /* Right page to merge. */
29994  VPID left_vpid = VPID_INITIALIZER; /* VPID of left page to merge. */
29995  VPID right_vpid = VPID_INITIALIZER; /* VPID of right page to merge. */
29996  int left_used = 0; /* Space used in left page. */
29997  int right_used = 0; /* Space used in right page. */
29998  int error_code = NO_ERROR; /* Error code. */
29999  int key_count = 0; /* Node key count. */
30000  BTREE_NODE_HEADER *node_header = NULL; /* B-tree node header. */
30001  RECDES left_recdes, right_recdes; /* Record descriptors to read links to left/right pages. */
30002  NON_LEAF_REC non_leaf_rec_info; /* Non-leaf record info used to read links to left/right pages. */
30003  PGBUF_LATCH_MODE child_latch = PGBUF_LATCH_READ; /* Latch mode for children of current node. */
30004  PAGE_FETCH_MODE neighbor_fetch_mode = OLD_PAGE; /* fetch mode for neighbors checked on merge. */
30005  PGBUF_PROMOTE_CONDITION promote_cond; /* Promote condition when write latch is required on nodes. */
30006  VPID child_vpid; /* VPID of next child by following key argument. */
30007  VPID child_vpid_after_merge; /* VPID of next by following key argument after merge is done. */
30008  PAGE_PTR child_page = NULL; /* Next page by following key argument. */
30009  BTREE_MERGE_STATUS merge_status; /* Status that tells when nodes can be merged. */
30010  bool need_root_merge = false; /* Set to true when root can be merged. */
30011  bool force_root_merge = false; /* Set to true when root must be merged. */
30012  bool is_system_op_started = false; /* Set to true when a system operation is started to be properly aborted in
30013  * case of errors. */
30014 
30015  LOG_LSA save_lsa = LSA_INITIALIZER;
30016  LOG_LSA save_child_lsa = LSA_INITIALIZER;
30017 
30018  /* Assert expected arguments. */
30019  assert (btid_int != NULL);
30020  assert (key != NULL && !DB_IS_NULL (key) && !btree_multicol_key_is_null (key));
30021  assert (crt_page != NULL && *crt_page != NULL);
30022  assert (advance_to_page != NULL && *advance_to_page == NULL);
30023  assert (is_leaf != NULL);
30024  assert (search_key != NULL);
30025  assert (restart != NULL);
30026  assert (delete_helper != NULL);
30027 
30028  /* Merge algorithm: There are two types of merges: root merge and normal merge. 1. Root merge: If root has only two
30029  * keys and a level more than 2, it could be merged if all keys stored in leaf page pass the size check. All three
30030  * nodes are merged into current root. After merging, function will continue to check the merged node similarly with
30031  * any non-leaf nodes. NOTE: I don't really know why the root_level has to be greater than 2. This means the b-tree
30032  * will never be reduced back to one node. It may not be a real issue, since the index is too small to worry about
30033  * performance. 2. Normal merge: Two nodes are merged if they pass the size check or if any of them is empty. Both
30034  * are merged to "left" node and their parent is updated. During delete, the child node found by following key
30035  * argument is tested against its neighbors. A node is merged only once (if it was merged to the right node, it will
30036  * not be merged to left node too). To optimize b-tree access, the algorithm assumes that no change is required
30037  * (nodes are not merged) and uses READ latch on non-leaf nodes (leaf nodes still require WRITE latch since they are
30038  * very likely to be changed). Instead, if merge is required, latches are then promoted to WRITE. If promotions fail,
30039  * the algorithm has two choices: 1. Skip merging and just advance to child page. 2. Restart b-tree traversal using
30040  * exclusive access and force the merge. Second choice is decided when the two nodes use together less than one
30041  * third of a page or when one of them is completely empty. Normally, promotions use shared reader condition
30042  * (multiple readers are allowed to promote, but no other promoters). On level 2 of b-tree, single reader condition
30043  * is used for promotion. The restriction is explained in btree_split_node_and_advance. */
30044 
30045  /* Get current node header. */
30046  node_header = btree_get_node_header (thread_p, *crt_page);
30047  if (node_header == NULL)
30048  {
30049  assert_release (false);
30050  return ER_FAILED;
30051  }
30052  if (node_header->node_level == 1)
30053  {
30054  /* This is a leaf node. Advancing can stop. Search key, promote latch and return. */
30055  error_code = btree_search_leaf_page (thread_p, btid_int, *crt_page, key, search_key);
30056  if (error_code != NO_ERROR)
30057  {
30058  ASSERT_ERROR ();
30059  return error_code;
30060  }
30061  if (delete_helper->is_root && delete_helper->nonleaf_latch_mode != PGBUF_LATCH_WRITE)
30062  {
30063  /* Promote latch. */
30064  error_code = pgbuf_promote_read_latch (thread_p, crt_page, PGBUF_PROMOTE_SHARED_READER);
30065  if (error_code == ER_PAGE_LATCH_PROMOTE_FAIL)
30066  {
30067  /* Promotion failed. Restart using write latch directly. */
30068  *restart = true;
30069  delete_helper->nonleaf_latch_mode = PGBUF_LATCH_WRITE;
30070  return NO_ERROR;
30071  }
30072  else if (error_code != NO_ERROR)
30073  {
30074  /* Error promoting. */
30075  ASSERT_ERROR ();
30076  return error_code;
30077  }
30078  else if (*crt_page == NULL)
30079  {
30080  /* Promoting has failed. Make sure an error has been set. */
30081  ASSERT_ERROR_AND_SET (error_code);
30082  return error_code;
30083  }
30084  }
30085  else
30086  {
30087  /* Leaf node should already have exclusive latch. */
30089  }
30090  /* Successful. */
30091  *is_leaf = true;
30092  return NO_ERROR;
30093  }
30094  /* Not a leaf page. */
30095 
30096  /* Get key count. */
30097  key_count = btree_node_number_of_keys (thread_p, *crt_page);
30098 
30099  /* Check if current node is root and must be merged. */
30100  if (delete_helper->is_root /* Current node is root. */
30101  && node_header->node_level > 2 /* Its level is more than two. */
30102  && btree_node_number_of_keys (thread_p, *crt_page) == 2 /* Has only two keys */ )
30103  {
30104  /* Save root max key length. */
30105  int root_max_key_length = node_header->max_key_len;
30106 
30107  /* Since the root has at least level 2, its children are non-leaf. */
30108 
30109  /* Read the first record. */
30110  if (spage_get_record (thread_p, *crt_page, 1, &left_recdes, PEEK) != S_SUCCESS)
30111  {
30112  assert_release (false);
30113  error_code = ER_FAILED;
30114  goto error;
30115  }
30116  btree_read_fixed_portion_of_non_leaf_record (&left_recdes, &non_leaf_rec_info);
30117  /* Fix left child. */
30118  VPID_COPY (&left_vpid, &non_leaf_rec_info.pnt);
30119  assert (!VPID_ISNULL (&left_vpid));
30120  left_page =
30121  pgbuf_fix (thread_p, &left_vpid, OLD_PAGE, delete_helper->nonleaf_latch_mode, PGBUF_UNCONDITIONAL_LATCH);
30122  if (left_page == NULL)
30123  {
30124  ASSERT_ERROR_AND_SET (error_code);
30125  goto error;
30126  }
30127 #if !defined (NDEBUG)
30128  (void) pgbuf_check_page_ptype (thread_p, left_page, PAGE_BTREE);
30129 #endif /* !NDEBUG */
30130  left_used = DB_PAGESIZE - spage_get_free_space (thread_p, left_page);
30131 
30132  /* Read second record. */
30133  if (spage_get_record (thread_p, *crt_page, 2, &right_recdes, PEEK) != S_SUCCESS)
30134  {
30135  assert_release (false);
30136  error_code = ER_FAILED;
30137  goto error;
30138  }
30139  btree_read_fixed_portion_of_non_leaf_record (&right_recdes, &non_leaf_rec_info);
30140  /* Fix right child. */
30141  VPID_COPY (&right_vpid, &non_leaf_rec_info.pnt);
30142  assert (!VPID_ISNULL (&right_vpid));
30143  right_page =
30144  pgbuf_fix (thread_p, &right_vpid, OLD_PAGE, delete_helper->nonleaf_latch_mode, PGBUF_UNCONDITIONAL_LATCH);
30145  if (right_page == NULL)
30146  {
30147  ASSERT_ERROR_AND_SET (error_code);
30148  goto error;
30149  }
30150 #if !defined (NDEBUG)
30151  (void) pgbuf_check_page_ptype (thread_p, right_page, PAGE_BTREE);
30152 #endif /* !NDEBUG */
30153  right_used = DB_PAGESIZE - spage_get_free_space (thread_p, right_page);
30154 
30155  /* Is merge needed? Should be forced if promotion fails? */
30156  /* We need to consider the largest key size since merge will use a key from root page as the middle key. It may
30157  * be larger than any key in its children. This may be overly defensive, but it doesn't seem so bad for root
30158  * merges. TODO: The above comment may be part of a legacy code. Currently, Merging root algorithm doesn't show
30159  * any extra key being required. Consider removing root_max_key_length. */
30160  need_root_merge = (left_used + right_used + CAN_MERGE_WHEN_EMPTY + root_max_key_length) < DB_PAGESIZE;
30161  /* If latch promotions fail, merge might be skipped. However, it is not desirable to skip all merges in a highly
30162  * accessed index. So we have two levels of how much space can be wasted before merge is executed: - one level
30163  * when merge is attempted with latch promotion. - one level, when more space was wasted and we need to force
30164  * restarting b-tree traversal using exclusive latches and making sure that merge is executed. */
30165  force_root_merge = (left_used + right_used + FORCE_MERGE_WHEN_EMPTY + root_max_key_length) < DB_PAGESIZE;
30166 
30167  /* If merge is required, promote latches. */
30168  if (need_root_merge)
30169  {
30170  /* Need merge. */
30171  /* All pages must be write latched. */
30172 
30173  if (delete_helper->nonleaf_latch_mode == PGBUF_LATCH_READ)
30174  {
30175  /* Promote latches. */
30176 
30177  /* First promote root. */
30178  error_code = pgbuf_promote_read_latch (thread_p, crt_page, PGBUF_PROMOTE_ONLY_READER);
30179 
30180  if (error_code == NO_ERROR && *crt_page != NULL)
30181  {
30182  /* Root successfully promoted. Promote left page. */
30183  error_code = pgbuf_promote_read_latch (thread_p, &left_page, PGBUF_PROMOTE_SHARED_READER);
30184  if (error_code == NO_ERROR && left_page != NULL)
30185  {
30186  /* Left page successfully promoted. Promote right page. */
30187  error_code = pgbuf_promote_read_latch (thread_p, &right_page, PGBUF_PROMOTE_SHARED_READER);
30188  }
30189  }
30190  }
30191  else
30192  {
30193  /* Pages already latched exclusively. Fall through to continue merging. */
30194  }
30195  if (error_code == ER_PAGE_LATCH_PROMOTE_FAIL)
30196  {
30197  /* Not all nodes could be promoted. */
30198  if (force_root_merge)
30199  {
30200  /* Restart b-tree traversal with exclusive access. */
30201  delete_helper->nonleaf_latch_mode = PGBUF_LATCH_WRITE;
30202  *restart = true;
30203  if (left_page != NULL)
30204  {
30205  pgbuf_unfix_and_init (thread_p, left_page);
30206  }
30207  if (right_page != NULL)
30208  {
30209  pgbuf_unfix_and_init (thread_p, right_page);
30210  }
30211  return NO_ERROR;
30212  }
30213  /* Skip merging. */
30214  /* Fall through to advance to child. */
30215  }
30216  else if (error_code != NO_ERROR)
30217  {
30218  ASSERT_ERROR ();
30219  goto error;
30220  }
30221  else if (*crt_page == NULL || left_page == NULL || right_page == NULL)
30222  {
30223  ASSERT_ERROR_AND_SET (error_code);
30224  goto error;
30225  }
30226  else
30227  {
30228  /* All pages are write latched. Merge the nodes. */
30230  && (pgbuf_get_latch_mode (left_page) >= PGBUF_LATCH_WRITE)
30231  && (pgbuf_get_latch_mode (right_page) >= PGBUF_LATCH_WRITE));
30232 
30233  /* Start system operation. */
30234  log_sysop_start (thread_p);
30235  is_system_op_started = true;
30236 
30237  /* Merge the three nodes into root node. */
30238  error_code = btree_merge_root (thread_p, btid_int, *crt_page, left_page, right_page);
30239  if (error_code != NO_ERROR)
30240  {
30241  ASSERT_ERROR ();
30242  goto error;
30243  }
30244 #if !defined (NDEBUG)
30245  (void) spage_check_num_slots (thread_p, *crt_page);
30246 #endif /* !NDEBUG */
30247 
30248  pgbuf_unfix_and_init (thread_p, left_page);
30249  error_code = file_dealloc (thread_p, &btid_int->sys_btid->vfid, &left_vpid, FILE_BTREE);
30250  if (error_code != NO_ERROR)
30251  {
30252  ASSERT_ERROR ();
30253  goto error;
30254  }
30255  pgbuf_unfix_and_init (thread_p, right_page);
30256  error_code = file_dealloc (thread_p, &btid_int->sys_btid->vfid, &right_vpid, FILE_BTREE);
30257  if (error_code != NO_ERROR)
30258  {
30259  ASSERT_ERROR ();
30260  goto error;
30261  }
30262 
30263  /* Merge successfully finished. */
30264  log_sysop_commit (thread_p);
30265  is_system_op_started = false;
30266 
30267  /* Nodes have been merged into root. Repeat loop in case we can merge root again. */
30268  *advance_to_page = *crt_page;
30269  *crt_page = NULL;
30270  return NO_ERROR;
30271  }
30272  }
30273  /* Root was not merged. */
30274  /* Advance to one of the children. */
30275  error_code = btree_search_nonleaf_page (thread_p, btid_int, *crt_page, key, &search_key->slotid, &child_vpid,
30276  NULL);
30277  if (error_code != NO_ERROR)
30278  {
30279  ASSERT_ERROR ();
30280  goto error;
30281  }
30282  assert (search_key->slotid == 1 || search_key->slotid == 2);
30283  pgbuf_unfix_and_init (thread_p, *crt_page);
30284  if (search_key->slotid == 1)
30285  {
30286  *crt_page = left_page;
30287  left_page = NULL;
30288  pgbuf_unfix_and_init (thread_p, right_page);
30289  }
30290  else
30291  {
30292  *crt_page = right_page;
30293  right_page = NULL;
30294  pgbuf_unfix_and_init (thread_p, left_page);
30295  }
30296  /* We advanced to one of the children. Proceed to check non-leaf node. */
30297 
30298  /* Get node header. */
30299  node_header = btree_get_node_header (thread_p, *crt_page);
30300  if (node_header == NULL)
30301  {
30302  assert_release (false);
30303  return ER_FAILED;
30304  }
30305  /* This cannot be a leaf node. */
30306  assert (node_header->node_level > 1);
30307  /* Get key count. */
30308  key_count = btree_node_number_of_keys (thread_p, *crt_page);
30309  }
30310  assert (left_page == NULL);
30311  assert (right_page == NULL);
30312 
30313  /* Choose the node to advance to. Then check whether it can be merged with any of its neighbors. */
30314  error_code = btree_search_nonleaf_page (thread_p, btid_int, *crt_page, key, &search_key->slotid, &child_vpid, NULL);
30315  if (error_code != NO_ERROR)
30316  {
30317  ASSERT_ERROR ();
30318  goto error;
30319  }
30320  if (node_header->node_level == 2)
30321  {
30322  /* Next child is leaf and must be latched using write latch mode directly. Promote condition is single reader to
30323  * avoid dead-latch (see the comment explaining promote condition in btree_split_node_and_advance). */
30324  child_latch = PGBUF_LATCH_WRITE;
30325  promote_cond = PGBUF_PROMOTE_ONLY_READER;
30326  }
30327  else
30328  {
30329  /* Use non-leaf latch mode. */
30330  /* Promote when non-leaf latch mode is PGBUF_LATCH_READ can be shared reader. If latch mode is PGBUF_LATCH_WRITE,
30331  * no promotion is required. */
30332  child_latch = delete_helper->nonleaf_latch_mode;
30333  promote_cond = PGBUF_PROMOTE_SHARED_READER;
30334  }
30335  /* Fix child node. */
30336  child_page = pgbuf_fix (thread_p, &child_vpid, OLD_PAGE, child_latch, PGBUF_UNCONDITIONAL_LATCH);
30337  if (child_page == NULL)
30338  {
30339  ASSERT_ERROR_AND_SET (error_code);
30340  goto error;
30341  }
30342 #if !defined (NDEBUG)
30343  (void) pgbuf_check_page_ptype (thread_p, child_page, PAGE_BTREE);
30344 #endif /* !NDEBUG */
30345 
30346  /* Get header of child. */
30347  node_header = btree_get_node_header (thread_p, child_page);
30348  if (node_header == NULL)
30349  {
30350  assert_release (false);
30351  error_code = ER_FAILED;
30352  goto error;
30353  }
30354 
30355  /* todo: do another cleanup of this code!! */
30356 
30357  /* let's try merge with right node..
30358  * note: we will try to avoid checking merges uselessly. The whole check can be very expensive if we also have to load
30359  * the node from disk. I am talking from (bad) experience here. The merges are usually done by vacuum, following
30360  * the path of active workers. In huge indexes, that can be several levels in height, the lower levels
30361  * neighboring nodes can be cold. The problem is not really reading the page from disk (which is relatively
30362  * fast), but finding a viable bcb to victimize and replace with this cold page. Usually writes are slower, and
30363  * if the IO write does not keep up, bcb allocation becomes the main bottleneck. And this expensive check makes
30364  * vacuum fall behind, which is usually followed by constantly degrading overall performance.
30365  * So, in such systems, it might be better to rather give up the merge. */
30366  if (search_key->slotid < key_count && spage_get_free_space (thread_p, child_page) > DB_PAGESIZE / 2)
30367  {
30368  /* this can cause a lot of problems. but we need it to know that page was not fixed because not in buffer and not
30369  * because some error occurred. */
30370  er_clear ();
30371 
30372  /* Check merges. */
30373  /* Check right merge. */
30374 
30375  /* Get link to right page. */
30376  if (spage_get_record (thread_p, *crt_page, search_key->slotid + 1, &right_recdes, PEEK) != S_SUCCESS)
30377  {
30378  assert_release (false);
30379  error_code = ER_FAILED;
30380  goto error;
30381  }
30382  btree_read_fixed_portion_of_non_leaf_record (&right_recdes, &non_leaf_rec_info);
30383  /* Fix right page. */
30384  VPID_COPY (&right_vpid, &non_leaf_rec_info.pnt);
30385 #if defined (SERVER_MODE)
30386  if (pgbuf_is_io_stressful () && spage_get_free_space (thread_p, child_page) < (int) (DB_PAGESIZE * 0.75f)
30387  && spage_number_of_slots (child_page) > 2)
30388  {
30389  /* avoid fetching "cold" neighbor pages, that are not in page buffer. at the same time, we should avoid doing
30390  * zero merges. so if we have a strong indicator that merge is possible (e.g. current page is almost empty)
30391  * force fixing the page.
30392  * I set an experimental value of 75% free space to try the merge. I don't know what a good value is (or if
30393  * there is any). */
30394  neighbor_fetch_mode = OLD_PAGE_IF_IN_BUFFER;
30395  }
30396 #endif /* SERVER_MODE */
30397  right_page = pgbuf_fix (thread_p, &right_vpid, neighbor_fetch_mode, child_latch, PGBUF_UNCONDITIONAL_LATCH);
30398  if (right_page == NULL)
30399  {
30400  error_code = er_errid ();
30401 
30402  if (error_code != NO_ERROR)
30403  {
30404  goto error;
30405  }
30406  /* page not in buffer */
30407  /* fall through */
30408  }
30409  else
30410  {
30411 #if !defined (NDEBUG)
30412  (void) pgbuf_check_page_ptype (thread_p, right_page, PAGE_BTREE);
30413 #endif /* !NDEBUG */
30414 
30415  /* Can child page be merged with its right page? */
30416  merge_status = btree_node_mergeable (thread_p, btid_int, child_page, right_page);
30417  if (merge_status != BTREE_MERGE_NO)
30418  {
30419  /* Try to merge. */
30420 
30421  /* Exclusive latch is required on all nodes. */
30422  if (delete_helper->nonleaf_latch_mode != PGBUF_LATCH_WRITE)
30423  {
30424  /* Promote latch on current node. */
30425  error_code = pgbuf_promote_read_latch (thread_p, crt_page, PGBUF_PROMOTE_ONLY_READER);
30426  if (error_code == NO_ERROR && *crt_page != NULL && child_latch != PGBUF_LATCH_WRITE)
30427  {
30428  /* Promote latches on children. */
30429 
30430  /* Promote latch on child. */
30431  error_code = pgbuf_promote_read_latch (thread_p, &child_page, promote_cond);
30432  if (error_code == NO_ERROR && child_page != NULL)
30433  {
30434  /* Promote latch on right page. */
30435  error_code = pgbuf_promote_read_latch (thread_p, &right_page, promote_cond);
30436  }
30437  }
30438  }
30439  /* Are all pages successfully write latched? */
30440  if (error_code == ER_PAGE_LATCH_PROMOTE_FAIL)
30441  {
30442  /* Failed to promote all latches to exclusive. */
30443  if (merge_status != BTREE_MERGE_TRY)
30444  {
30445  /* Merge must be executed. Restart using exclusive access. */
30446  delete_helper->nonleaf_latch_mode = PGBUF_LATCH_WRITE;
30447  *restart = true;
30448  if (right_page != NULL)
30449  {
30450  pgbuf_unfix_and_init (thread_p, right_page);
30451  }
30452  if (child_page != NULL)
30453  {
30454  pgbuf_unfix_and_init (thread_p, child_page);
30455  }
30456  return NO_ERROR;
30457  }
30458  /* Merge can be skipped. Fall through. */
30459  }
30460  else if (error_code != NO_ERROR)
30461  {
30462  ASSERT_ERROR ();
30463  goto error;
30464  }
30465  else if (*crt_page == NULL || child_page == NULL || right_page == NULL)
30466  {
30467  ASSERT_ERROR_AND_SET (error_code);
30468  goto error;
30469  }
30470  else
30471  {
30472  /* All pages are write latched. */
30474  && (pgbuf_get_latch_mode (child_page) >= PGBUF_LATCH_WRITE)
30475  && (pgbuf_get_latch_mode (right_page) >= PGBUF_LATCH_WRITE));
30476 
30477  /* Start system operation. */
30478  log_sysop_start (thread_p);
30479  is_system_op_started = true;
30480 
30481  save_lsa = *pgbuf_get_lsa (*crt_page);
30482  save_child_lsa = *pgbuf_get_lsa (child_page);
30483 
30484  /* Merge children and update parent. */
30485  error_code =
30486  btree_merge_node (thread_p, btid_int, *crt_page, child_page, right_page, search_key->slotid + 1,
30487  &child_vpid_after_merge, merge_status);
30488  if (error_code != NO_ERROR)
30489  {
30490  ASSERT_ERROR ();
30491  goto error;
30492  }
30493 
30494  btree_delete_log (delete_helper, "Merged nodes into left. \n"
30495  "\t" PGBUF_PAGE_MODIFY_MSG ("parent node page") "\n"
30496  "\t" PGBUF_PAGE_MODIFY_MSG ("left node page") "\n"
30497  "\t" "right node vpid = %d|%d",
30498  PGBUF_PAGE_MODIFY_ARGS (*crt_page, &save_lsa),
30499  PGBUF_PAGE_MODIFY_ARGS (child_page, &save_child_lsa), VPID_AS_ARGS (&right_vpid));
30500 
30501  /* Children are merged to the "left" node which is our case is the child page. */
30502  assert (!VPID_ISNULL (&child_vpid_after_merge));
30503  assert (VPID_EQ (&child_vpid_after_merge, &child_vpid));
30504 
30505 #if !defined(NDEBUG)
30506  (void) spage_check_num_slots (thread_p, *crt_page);
30507  (void) spage_check_num_slots (thread_p, child_page);
30508 #endif
30509 
30510  /* Deallocate right page. */
30511  pgbuf_unfix_and_init (thread_p, right_page);
30512  error_code = file_dealloc (thread_p, &btid_int->sys_btid->vfid, &right_vpid, FILE_BTREE);
30513  if (error_code != NO_ERROR)
30514  {
30515  ASSERT_ERROR ();
30516  goto error;
30517  }
30518 
30519  log_sysop_commit (thread_p);
30520  is_system_op_started = false;
30521 
30522  /* Advance to child page. */
30523  *advance_to_page = child_page;
30524  pgbuf_unfix_and_init (thread_p, *crt_page);
30525  delete_helper->is_root = false;
30526  return NO_ERROR;
30527  }
30528  }
30529  /* Merge not executed. Unfix right page. */
30530  pgbuf_unfix_and_init (thread_p, right_page);
30531  }
30532  }
30533  /* No merge has been executed. */
30534  assert (left_page == NULL);
30535  assert (right_page == NULL);
30536 
30537  /* Advance to current child. */
30538  *advance_to_page = child_page;
30539  pgbuf_unfix_and_init (thread_p, *crt_page);
30540  delete_helper->is_root = false;
30541 
30542  return NO_ERROR;
30543 
30544 error:
30545  assert_release (error_code != NO_ERROR);
30546 
30547  if (is_system_op_started)
30548  {
30549  /* Abort system operation, before unfixing the pages to be sure that no other transaction modify the pages. */
30550  log_sysop_abort (thread_p);
30551  }
30552 
30553  /* Unfix used pages. */
30554  assert (*advance_to_page == NULL);
30555  if (left_page != NULL)
30556  {
30557  pgbuf_unfix_and_init (thread_p, left_page);
30558  }
30559  if (right_page != NULL)
30560  {
30561  pgbuf_unfix_and_init (thread_p, right_page);
30562  }
30563  if (child_page != NULL)
30564  {
30565  pgbuf_unfix_and_init (thread_p, child_page);
30566  }
30567 
30568  return error_code;
30569 }
30570 
30571 /*
30572  * btree_key_delete_remove_object () - Remove one object and all its info from b-tree key.
30573  *
30574  * return : Error code.
30575  * thread_p (in) : Thread entry.
30576  * btid_int (in) : B-tree info.
30577  * key (in) : Key value.
30578  * leaf_page (in) : Leaf page.
30579  * search_key (in) : Search key result.
30580  * restart (in) : Not used.
30581  * other_args (in) : BTREE_DELETE_HELPER *
30582  */
30583 static int
30584 btree_key_delete_remove_object (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key, PAGE_PTR * leaf_page,
30585  BTREE_SEARCH_KEY_HELPER * search_key, bool * restart, void *other_args)
30586 {
30587  /* btree_delete_internal helper. */
30588  BTREE_DELETE_HELPER *delete_helper = (BTREE_DELETE_HELPER *) other_args;
30589  int error_code = NO_ERROR; /* Error code. */
30590  RECDES leaf_record; /* Copy leaf record. */
30591  /* Buffer used to copy leaf record. */
30592  char record_data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
30593  LEAF_REC leaf_rec_info; /* Leaf record info. */
30594  int offset_after_key = 0; /* Offset after key in leaf record. */
30595  bool dummy_clear_value = false; /* Dummy. */
30596  PAGE_PTR found_page = NULL; /* Page where object being removed is found. */
30597  PAGE_PTR prev_found_page = NULL; /* Previous page to the page where object being removed is found. Saved in case
30598  * that object is last in an overflow page and page must be deallocated. */
30599  int offset_to_object = NOT_FOUND; /* Offset in record where object to be removed is found. */
30600  BTREE_NODE_TYPE node_type;
30601 
30602  /* Recovery structures. */
30603  /* Undo recovery structures. */
30604  char rv_undo_data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
30605  char *rv_undo_data_bufalign = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
30606  int rv_undo_data_capacity = IO_MAX_PAGE_SIZE;
30607  /* Redo recovery structures. */
30608  char rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
30609 
30610  /* Assert expected arguments. */
30611  assert (btid_int != NULL);
30612  assert (key != NULL && !DB_IS_NULL (key) && !btree_multicol_key_is_null (key));
30613  assert (leaf_page != NULL && *leaf_page != NULL && pgbuf_get_latch_mode (*leaf_page) >= PGBUF_LATCH_WRITE);
30614  assert (search_key != NULL);
30615  assert (delete_helper != NULL);
30617  && delete_helper->purpose != BTREE_OP_DELETE_UNDO_INSERT_UNQ_MULTIUPD);
30618 
30619  btree_perf_track_traverse_time (thread_p, delete_helper);
30620 
30621  if (search_key->result == BTREE_KEY_FOUND)
30622  {
30623  /* Key was found. We need to find OID. */
30624  /* Read key record. */
30625  leaf_record.data = PTR_ALIGN (record_data_buffer, BTREE_MAX_ALIGN);
30626  leaf_record.area_size = DB_PAGESIZE;
30627  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &leaf_record, COPY) != S_SUCCESS)
30628  {
30629  assert_release (false);
30630  return ER_FAILED;
30631  }
30632  error_code =
30633  btree_read_record (thread_p, btid_int, *leaf_page, &leaf_record, NULL, &leaf_rec_info, BTREE_LEAF_NODE,
30634  &dummy_clear_value, &offset_after_key, PEEK_KEY_VALUE, NULL);
30635  if (error_code != NO_ERROR)
30636  {
30637  ASSERT_ERROR ();
30638  goto exit;
30639  }
30640  /* Find OID and output its location/MVCC info. */
30641  error_code =
30642  btree_find_oid_and_its_page (thread_p, btid_int, BTREE_DELETE_OID (delete_helper), *leaf_page,
30643  delete_helper->purpose, &delete_helper->match_mvccinfo, &leaf_record,
30644  &leaf_rec_info, offset_after_key, &found_page, &prev_found_page, &offset_to_object,
30645  BTREE_DELETE_MVCC_INFO (delete_helper));
30646  if (error_code != NO_ERROR)
30647  {
30648  ASSERT_ERROR ();
30649  goto exit;
30650  }
30651  }
30652  else
30653  {
30654  /* Key was not found. Fall through to handle the case. */
30655  }
30656  if (offset_to_object == NOT_FOUND)
30657  {
30658  /* Key/object was not found. */
30659  assert (found_page == NULL && prev_found_page == NULL);
30660 
30661  if (delete_helper->purpose == BTREE_OP_DELETE_VACUUM_OBJECT)
30662  {
30663  /* Most probably, object was already vacuumed. One uncommon, but yet possible case when this can happen: 1.
30664  * OID1, reusable object in a b-tree overflow page. 2. OID1 is marked as deleted to be vacuumed later. 3.
30665  * OID1 is vacuumed from heap and its slot can be reused. 4. OID1 is reused and is inserted in the same
30666  * overflow page. The algorithm recognizes that the old version is just not vacuumed yet and replaces it.
30667  * OID's cannot be repeated in overflow pages. 5. Vacuum will not find the old OID1 to remove. */
30669  "Could not find object %d|%d|%d in key=%s to vacuum it.",
30670  delete_helper->object_info.oid.volid, delete_helper->object_info.oid.pageid,
30671  delete_helper->object_info.oid.slotid,
30672  delete_helper->printed_key != NULL ? delete_helper->printed_key : "(unknown)");
30673  btree_delete_log (delete_helper, "could not find object to vacuum \n"
30674  BTREE_DELETE_HELPER_MSG ("\t"), BTREE_DELETE_HELPER_AS_ARGS (delete_helper));
30675  goto exit;
30676  }
30677  else
30678  {
30679  /* Key/oid should be found. */
30680  assert_release (false);
30681  btree_set_unknown_key_error (thread_p, btid_int->sys_btid, key,
30682  "btree_key_delete_remove_object: key was not found.");
30683  error_code = ER_BTREE_UNKNOWN_KEY;
30684  goto exit;
30685  }
30686  }
30687  /* Object was found. */
30688 
30689 #if defined (SERVER_MODE)
30690  /* When do we need to have a lock on object to protect it? 1. If this is not vacuum. 2. If this is not crash
30691  * recovery. 3. If object is not a rollback of insert into non-unique index. 4. If object is not inserted by current
30692  * transaction into non-unique index. All other cases must have lock on object. */
30694  || (!BTREE_IS_UNIQUE (btid_int->unique_pk)
30695  && (delete_helper->purpose == BTREE_OP_DELETE_UNDO_INSERT
30696  || BTREE_MVCC_INFO_INSID (BTREE_DELETE_MVCC_INFO (delete_helper)) ==
30697  logtb_find_current_mvccid (thread_p)))
30698  /* Cannot check if class OID is NULL. Get it in debug mode. */
30699  || OID_ISNULL (BTREE_DELETE_CLASS_OID (delete_helper))
30700  || btree_check_locking_for_delete_unique (thread_p, delete_helper));
30701 #endif /* SERVER_MODE */
30702 
30703  /* Safe guard: if the index is unique and we want to physically delete the object and if operation type is not
30704  * MULTI_ROW_UPDATE, then object must be first in record. */
30705  assert (!BTREE_IS_UNIQUE (btid_int->unique_pk) || delete_helper->purpose != BTREE_OP_DELETE_OBJECT_PHYSICAL
30706  || delete_helper->op_type == MULTI_ROW_UPDATE || (found_page == *leaf_page && offset_to_object == 0));
30707 
30708  /* Prepare logging. */
30709  delete_helper->leaf_addr.pgptr = *leaf_page;
30710  delete_helper->leaf_addr.offset = search_key->slotid;
30711  delete_helper->leaf_addr.vfid = &btid_int->sys_btid->vfid;
30712 
30713  /* Undo logging. */
30714  /* Vacuum doesn't require undo logging - vacuum may sometime open a system operation to handle complex actions, like
30715  * deallocating overflow pages. However, the object is removal is last and if successful, then the entire operation
30716  * is considered successful. Object removal does not require undo logging. BTREE_OP_DELETE_UNDO_INSERT will append a
30717  * compensate log record and also requires only redo recovery data. */
30718  if (delete_helper->purpose == BTREE_OP_DELETE_OBJECT_PHYSICAL)
30719  {
30720  delete_helper->rv_keyval_data = rv_undo_data_bufalign;
30721  error_code =
30722  btree_rv_save_keyval_for_undo (btid_int, key, BTREE_DELETE_CLASS_OID (delete_helper),
30723  BTREE_DELETE_OID (delete_helper), BTREE_DELETE_MVCC_INFO (delete_helper),
30724  delete_helper->purpose, rv_undo_data_bufalign, &delete_helper->rv_keyval_data,
30725  &rv_undo_data_capacity, &delete_helper->rv_keyval_data_length);
30726  if (error_code != NO_ERROR)
30727  {
30728  ASSERT_ERROR ();
30729  goto exit;
30730  }
30731  }
30732  else
30733  {
30734  /* No need for undo data. */
30735  delete_helper->rv_keyval_data = NULL;
30736  delete_helper->rv_keyval_data_length = 0;
30737  }
30738 
30739  /* Redo logging. */
30740  delete_helper->rv_redo_data = PTR_ALIGN (rv_redo_data_buffer, BTREE_MAX_ALIGN);
30741  delete_helper->rv_redo_data_ptr = delete_helper->rv_redo_data;
30742 
30743  /* Where was object found? */
30744  node_type = *leaf_page == found_page ? BTREE_LEAF_NODE : BTREE_OVERFLOW_NODE;
30745  error_code =
30746  btree_key_remove_object (thread_p, key, btid_int, delete_helper, *leaf_page, &leaf_record, &leaf_rec_info,
30747  offset_after_key, search_key, &found_page, prev_found_page, node_type, offset_to_object);
30748  if (error_code != NO_ERROR)
30749  {
30750  ASSERT_ERROR ();
30751  goto exit;
30752  }
30753  if (found_page != NULL && found_page != *leaf_page)
30754  {
30755  pgbuf_unfix_and_init (thread_p, found_page);
30756  }
30757  if (prev_found_page != NULL && prev_found_page != *leaf_page)
30758  {
30759  pgbuf_unfix_and_init (thread_p, prev_found_page);
30760  }
30761 
30762  if (delete_helper->check_key_deleted)
30763  {
30764  /* Check key is really deleted (it may still have visible objects). */
30765  int num_visible_oids;
30766  int max_visible_oids = 1;
30767  MVCC_SNAPSHOT mvcc_snapshot_dirty;
30768 
30769  assert (delete_helper->purpose == BTREE_OP_DELETE_OBJECT_PHYSICAL);
30770 
30771  mvcc_snapshot_dirty.snapshot_fnc = mvcc_satisfies_dirty;
30772 
30773  /* Re-read leaf record. */
30774  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &leaf_record, PEEK) != S_SUCCESS)
30775  {
30776  assert_release (false);
30777  error_code = ER_FAILED;
30778  goto exit;
30779  }
30780  error_code =
30781  btree_read_record (thread_p, btid_int, *leaf_page, &leaf_record, NULL, &leaf_rec_info, BTREE_LEAF_NODE,
30782  &dummy_clear_value, &offset_after_key, PEEK_KEY_VALUE, NULL);
30783  if (error_code != NO_ERROR)
30784  {
30785  ASSERT_ERROR ();
30786  goto exit;
30787  }
30788  error_code =
30789  btree_get_num_visible_from_leaf_and_ovf (thread_p, btid_int, &leaf_record, offset_after_key, &leaf_rec_info,
30790  &max_visible_oids, &mvcc_snapshot_dirty, &num_visible_oids);
30791  if (error_code != NO_ERROR)
30792  {
30793  ASSERT_ERROR ();
30794  goto exit;
30795  }
30796  else if (num_visible_oids > 0)
30797  {
30798  /* Key still has visible objects and is not deleted. */
30799  delete_helper->is_key_deleted = false;
30800  }
30801  else
30802  {
30803  /* Key is deleted. */
30804  assert (delete_helper->is_key_deleted);
30805  }
30806  }
30807  /* Success. */
30808 
30809 exit:
30810 
30811  if (found_page != NULL && found_page != *leaf_page)
30812  {
30813  pgbuf_unfix_and_init (thread_p, found_page);
30814  }
30815  if (prev_found_page != NULL && prev_found_page != *leaf_page)
30816  {
30817  pgbuf_unfix_and_init (thread_p, prev_found_page);
30818  }
30819  if (delete_helper->rv_keyval_data != NULL && delete_helper->rv_keyval_data != rv_undo_data_bufalign)
30820  {
30821  db_private_free_and_init (thread_p, delete_helper->rv_keyval_data);
30822  }
30823  delete_helper->rv_keyval_data = NULL;
30824 
30825  btree_perf_track_time (thread_p, delete_helper);
30826  return error_code;
30827 }
30828 
30829 /*
30830  * btree_key_remove_object_and_keep_visible_first () - Remove one object and all its info from b-tree key. Then find
30831  * other visible version and move it first in leaf record.
30832  * Special case of unique index.
30833  *
30834  * return : Error code.
30835  * thread_p (in) : Thread entry.
30836  * btid_int (in) : B-tree info.
30837  * key (in) : Key value.
30838  * leaf_page (in) : Leaf page.
30839  * search_key (in) : Search key result.
30840  * restart (in) : Not used.
30841  * other_args (in) : BTREE_DELETE_HELPER *
30842  */
30843 static int
30845  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key,
30846  bool * restart, void *other_args)
30847 {
30848  /* btree_delete_internal helper. */
30849  BTREE_DELETE_HELPER *delete_helper = (BTREE_DELETE_HELPER *) other_args;
30850  int error_code = NO_ERROR; /* Error code. */
30851  RECDES leaf_record; /* Copy leaf record. */
30852  /* Buffer used to copy leaf record. */
30853  char record_data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
30854  LEAF_REC leaf_rec_info; /* Leaf record info. */
30855  int offset_after_key = 0; /* Offset after key in leaf record. */
30856  bool dummy_clear_value = false; /* Dummy. */
30857  PAGE_PTR found_page = NULL; /* Page where object being removed is found. */
30858  PAGE_PTR prev_found_page = NULL; /* Previous page to the page where object being removed is found. Saved in case
30859  * that object is last in an overflow page and page must be deallocated. */
30860  int offset_to_object = NOT_FOUND; /* Offset in record where object to be removed is found. */
30861  int offset_to_second_object = NOT_FOUND; /* Offset to second visible object. */
30862  BTREE_OP_PURPOSE second_object_search_purpose; /* Purpose used for searching second object. */
30863  BTREE_MVCC_INFO match_2nd_obj_mvccinfo; /* MVCC info of second object to be matched. */
30864 
30865  /* Recovery structures. */
30866  /* Undo recovery structures. */
30867  char rv_undo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
30868  char *rv_undo_data = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
30869  char *rv_undo_data_ptr = NULL;
30870  int rv_undo_data_length = 0;
30871  /* Redo recovery structures. */
30872  char rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
30873  char *rv_redo_data = PTR_ALIGN (rv_redo_data_buffer, BTREE_MAX_ALIGN);
30874  char *rv_redo_data_ptr = NULL;
30875  int rv_redo_data_length = 0;
30876  char helper_rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
30877 
30878  LOG_LSA prev_lsa;
30879 
30880  /* Assert expected arguments. */
30881  assert (btid_int != NULL);
30882  assert (BTREE_IS_UNIQUE (btid_int->unique_pk));
30883  assert (key != NULL && !DB_IS_NULL (key) && !btree_multicol_key_is_null (key));
30884  assert (leaf_page != NULL && *leaf_page != NULL && pgbuf_get_latch_mode (*leaf_page) >= PGBUF_LATCH_WRITE);
30885  assert (search_key != NULL);
30886  assert (delete_helper != NULL);
30888 
30889  if (search_key->result == BTREE_KEY_FOUND)
30890  {
30891  /* Key was found. We need to find OID. */
30892  /* Read key record. */
30893  leaf_record.data = PTR_ALIGN (record_data_buffer, BTREE_MAX_ALIGN);
30894  leaf_record.area_size = DB_PAGESIZE;
30895  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &leaf_record, COPY) != S_SUCCESS)
30896  {
30897  assert_release (false);
30898  return ER_FAILED;
30899  }
30900  error_code =
30901  btree_read_record (thread_p, btid_int, *leaf_page, &leaf_record, NULL, &leaf_rec_info, BTREE_LEAF_NODE,
30902  &dummy_clear_value, &offset_after_key, PEEK_KEY_VALUE, NULL);
30903  if (error_code != NO_ERROR)
30904  {
30905  ASSERT_ERROR ();
30906  goto exit;
30907  }
30908  /* Find OID and output its location/MVCC info. */
30909  error_code =
30910  btree_find_oid_and_its_page (thread_p, btid_int, BTREE_DELETE_OID (delete_helper), *leaf_page,
30911  delete_helper->purpose, &delete_helper->match_mvccinfo, &leaf_record,
30912  &leaf_rec_info, offset_after_key, &found_page, &prev_found_page, &offset_to_object,
30913  BTREE_DELETE_MVCC_INFO (delete_helper));
30914  if (error_code != NO_ERROR)
30915  {
30916  ASSERT_ERROR ();
30917  goto exit;
30918  }
30919  }
30920  else
30921  {
30922  /* Key was not found. Fall through to handle the case. */
30923  }
30924  if (offset_to_object == NOT_FOUND)
30925  {
30926  /* Key/object was not found. */
30927  assert (found_page == NULL && prev_found_page == NULL);
30928 
30929  /* Key/oid should be found. */
30930  assert_release (false);
30931  btree_set_unknown_key_error (thread_p, btid_int->sys_btid, key,
30932  "btree_key_remove_object_and_keep_visible_first: key wasn't found.");
30933  error_code = ER_BTREE_UNKNOWN_KEY;
30934  goto exit;
30935  }
30936  /* Object was found. */
30937 
30938  /* Prepare logging. */
30939  delete_helper->leaf_addr.pgptr = *leaf_page;
30940  delete_helper->leaf_addr.offset = search_key->slotid;
30941  delete_helper->leaf_addr.vfid = &btid_int->sys_btid->vfid;
30942  delete_helper->rv_redo_data = PTR_ALIGN (helper_rv_redo_data_buffer, BTREE_MAX_ALIGN);
30943  delete_helper->rv_redo_data_ptr = delete_helper->rv_redo_data;
30944 
30945  if (offset_to_object != 0 || found_page != *leaf_page)
30946  {
30947  /* Object is normally expected to be first. */
30948  /* But there is this case: create table t (a int unique); insert into t values (1), (2); update t set a=a+1 where
30949  * a > 0; rollback; First a new version of object from key 1 is inserted in key 2. Then the other object in key
30950  * 2 is updated to key 3. The version in key 2 is deleted (marked as deleted). On undo/rollback, the second
30951  * object old version delete MVCCID is removed first. The algorithm for undo MVCC delete in unique index makes
30952  * sure it is brought back to the first position in key. The inserted version is relocated to another position. */
30953  BTREE_NODE_TYPE node_type;
30954 
30955 #if !defined (NDEBUG)
30956  {
30957  /* Let's confirm the above theory. The first object in leaf record must be same with the object we wanted to
30958  * bring first. */
30959  BTREE_OBJECT_INFO first_object;
30960  btree_leaf_get_first_object (btid_int, &leaf_record, &first_object.oid, &first_object.class_oid,
30961  &first_object.mvcc_info);
30962  assert (OID_EQ (&first_object.oid, &delete_helper->second_object_info.oid));
30963  }
30964 #endif /* !NDEBUG */
30965 
30966  /* Just remove inserted object. */
30967  node_type = (*leaf_page == found_page) ? BTREE_LEAF_NODE : BTREE_OVERFLOW_NODE;
30968  error_code =
30969  btree_key_remove_object (thread_p, key, btid_int, delete_helper, *leaf_page, &leaf_record, &leaf_rec_info,
30970  offset_after_key, search_key, &found_page, prev_found_page, node_type,
30971  offset_to_object);
30972  if (error_code != NO_ERROR)
30973  {
30974  ASSERT_ERROR ();
30975  }
30976  /* Skip the rest of the function. The code handles the case when inserted version was first. */
30977  goto exit;
30978  }
30979 
30980  /* Find second visible version (which must be moved first). */
30981  assert (prev_found_page == NULL);
30982  found_page = NULL;
30983 
30985  {
30986  /* This must be an object deleted by current transaction. */
30988  == delete_helper->match_mvccinfo.insert_mvccid);
30989  /* Search with matching delete MVCCID. */
30990  second_object_search_purpose = BTREE_OP_DELETE_UNDO_INSERT_DELID;
30991 
30992  /* We should not have insert MVCCID set. */
30994  }
30995  else
30996  {
30997  /* Previous object was not deleted. Search as if we'd want to delete it. */
30998  second_object_search_purpose = BTREE_OP_DELETE_OBJECT_PHYSICAL;
30999  }
31000  /* Copy second object MVCC info we want to match (so it is not overwritten). */
31001  match_2nd_obj_mvccinfo = delete_helper->second_object_info.mvcc_info;
31002  error_code =
31003  btree_find_oid_and_its_page (thread_p, btid_int, &delete_helper->second_object_info.oid, *leaf_page,
31004  second_object_search_purpose, &match_2nd_obj_mvccinfo, &leaf_record, &leaf_rec_info,
31005  offset_after_key, &found_page, &prev_found_page, &offset_to_second_object,
31006  &delete_helper->second_object_info.mvcc_info);
31007  if (error_code != NO_ERROR)
31008  {
31009  assert_release (false);
31010  error_code = ER_FAILED;
31011  goto exit;
31012  }
31013  if (offset_to_second_object == NOT_FOUND)
31014  {
31015  assert (false);
31016  error_code = ER_FAILED;
31017  goto exit;
31018  }
31019 
31020  /* Prepare leaf page logging. */
31021  rv_redo_data_ptr = rv_redo_data;
31022 
31023  if (found_page == *leaf_page)
31024  {
31025 #if !defined (NDEBUG)
31026  BTREE_RV_REDO_SET_DEBUG_INFO (&delete_helper->leaf_addr, rv_redo_data_ptr, btid_int,
31028 #endif /* !NDEBUG */
31030 
31031  /* Remove record from leaf. */
31032  btree_record_remove_object_internal (thread_p, btid_int, &leaf_record, BTREE_LEAF_NODE, offset_to_second_object,
31033  NULL, &rv_redo_data_ptr, NULL);
31034  }
31035  else
31036  {
31037  /* Leaf and overflow OID's page are going to be changed. A system operation and undo logging is required. */
31038  log_sysop_start (thread_p);
31039  delete_helper->is_system_op_started = true;
31040 
31041  error_code =
31042  btree_overflow_remove_object (thread_p, key, btid_int, delete_helper, &found_page, prev_found_page, *leaf_page,
31043  &leaf_record, search_key, offset_to_second_object);
31044  if (error_code != NO_ERROR)
31045  {
31046  assert_release (false);
31047  goto exit;
31048  }
31049 
31050  rv_undo_data_ptr = rv_undo_data;
31051  rv_redo_data_ptr = rv_redo_data;
31052 
31053 #if !defined (NDEBUG)
31054  /* Leaf may have been logged if object was removed from overflow page. Reset logging structures for new logging.
31055  */
31056  delete_helper->leaf_addr.offset = search_key->slotid;
31057  BTREE_RV_UNDOREDO_SET_DEBUG_INFO (&delete_helper->leaf_addr, rv_redo_data_ptr, rv_undo_data_ptr, btid_int,
31059 #endif /* !NDEBUG */
31061  }
31062 
31063  /* Replace inserted object with second visible object. */
31064  btree_leaf_change_first_object (thread_p, &leaf_record, btid_int, &delete_helper->second_object_info.oid,
31065  &delete_helper->second_object_info.class_oid,
31066  &delete_helper->second_object_info.mvcc_info, NULL, &rv_undo_data_ptr,
31067  &rv_redo_data_ptr);
31068 
31069  /* Update record in page. */
31070  if (spage_update (thread_p, *leaf_page, search_key->slotid, &leaf_record) != SP_SUCCESS)
31071  {
31072  assert_release (false);
31073  error_code = ER_FAILED;
31074  goto exit;
31075  }
31076 
31077  /* Add logging for leaf page. */
31078  LSA_COPY (&prev_lsa, pgbuf_get_lsa (*leaf_page));
31079  BTREE_RV_GET_DATA_LENGTH (rv_redo_data_ptr, rv_redo_data, rv_redo_data_length);
31080  if (delete_helper->is_system_op_started)
31081  {
31082  BTREE_RV_GET_DATA_LENGTH (rv_undo_data_ptr, rv_undo_data, rv_undo_data_length);
31083  log_append_undoredo_data (thread_p, RVBT_RECORD_MODIFY_UNDOREDO, &delete_helper->leaf_addr, rv_undo_data_length,
31084  rv_redo_data_length, rv_undo_data, rv_redo_data);
31085  }
31086  else
31087  {
31089  delete_helper->leaf_addr.offset, *leaf_page, rv_redo_data_length,
31090  rv_redo_data, LOG_FIND_CURRENT_TDES (thread_p),
31091  &delete_helper->reference_lsa);
31092  }
31093 
31094  /* Success. */
31095  btree_delete_log (delete_helper, BTREE_DELETE_MODIFY_MSG ("unique undo insert, brought back previous first object")
31096  "\t" BTREE_OBJINFO_MSG ("first object"),
31097  BTREE_DELETE_MODIFY_ARGS (thread_p, delete_helper, *leaf_page, &prev_lsa, true, search_key->slotid,
31098  leaf_record.length, btid_int->sys_btid),
31099  BTREE_OBJINFO_AS_ARGS (&delete_helper->second_object_info));
31100 
31101 exit:
31102 
31103  if (delete_helper->is_system_op_started)
31104  {
31105  assert_release (error_code == NO_ERROR);
31106  btree_delete_sysop_end (thread_p, delete_helper);
31107  }
31108  if (found_page != NULL && found_page != *leaf_page)
31109  {
31110  pgbuf_unfix_and_init (thread_p, found_page);
31111  }
31112  if (prev_found_page != NULL && prev_found_page != *leaf_page)
31113  {
31114  pgbuf_unfix_and_init (thread_p, prev_found_page);
31115  }
31116 
31117  return error_code;
31118 }
31119 
31120 /*
31121  * btree_leaf_record_replace_first_with_last () - Remove first object by replacing it with last.
31122  *
31123  * return : Error code.
31124  * thread_p (in) : Thread entry.
31125  * btid_int (in) : B-tree identifier.
31126  * key (in) : Key value.
31127  * delete_helper (in) : B-tree delete helper.
31128  * leaf_page (in) : Leaf page.
31129  * leaf_record (in) : Key leaf record.
31130  * search_key (in) : Search key result.
31131  * last_oid (in) : Last object OID.
31132  * last_class_oid (in) : Last object class OID.
31133  * last_mvcc_info (in) : Last object MVCC info.
31134  * offset_to_last_object (in) : Offset to last object.
31135  */
31136 static int
31138  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR leaf_page,
31139  RECDES * leaf_record, BTREE_SEARCH_KEY_HELPER * search_key, OID * last_oid,
31140  OID * last_class_oid, BTREE_MVCC_INFO * last_mvcc_info,
31141  int offset_to_last_object)
31142 {
31143  char rv_undo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
31144  char *rv_undo_data = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
31145  char *rv_undo_data_ptr = NULL;
31146  int rv_undo_data_length = 0;
31147  int rv_redo_data_length = 0;
31148 
31149  LOG_LSA prev_lsa;
31150 
31151  /* Assert expected arguments. */
31152  assert (btid_int != NULL);
31153  assert (delete_helper != NULL);
31154  assert (leaf_page != NULL && pgbuf_get_latch_mode (leaf_page) >= PGBUF_LATCH_WRITE);
31155  assert (leaf_record != NULL);
31156  assert (search_key != NULL);
31157  assert (last_oid != NULL);
31158  assert (last_class_oid != NULL);
31159  assert (last_mvcc_info != NULL);
31160  assert (offset_to_last_object > 0 && offset_to_last_object < leaf_record->length);
31162  && delete_helper->purpose != BTREE_OP_DELETE_UNDO_INSERT_UNQ_MULTIUPD);
31163  assert (delete_helper->rv_redo_data != NULL && delete_helper->rv_redo_data_ptr != NULL);
31164 
31165 #if !defined (NDEBUG)
31166  /* For debugging recovery. */
31167  BTREE_RV_UNDOREDO_SET_DEBUG_INFO (&delete_helper->leaf_addr, delete_helper->rv_redo_data_ptr, rv_undo_data_ptr,
31168  btid_int, BTREE_RV_DEBUG_ID_LAST_OID);
31169 #endif /* !NDEBUG */
31171 
31172  /* Replace first object with last object. */
31173  /* First remove last object (so its offset doesn't change. */
31174  btree_record_remove_last_object (thread_p, btid_int, leaf_record, BTREE_LEAF_NODE, offset_to_last_object,
31175  &rv_undo_data_ptr, &delete_helper->rv_redo_data_ptr);
31176  /* Replace first. */
31177  btree_leaf_change_first_object (thread_p, leaf_record, btid_int, last_oid, last_class_oid, last_mvcc_info, NULL,
31178  &rv_undo_data_ptr, &delete_helper->rv_redo_data_ptr);
31179 
31181 
31182  /* Update record. */
31183  if (spage_update (thread_p, leaf_page, search_key->slotid, leaf_record) != SP_SUCCESS)
31184  {
31185  /* Should not fail. */
31186  assert_release (false);
31187  return ER_FAILED;
31188  }
31189 
31190  /* We need to log previous lsa. */
31191  LSA_COPY (&prev_lsa, pgbuf_get_lsa (leaf_page));
31192 
31193  /* Log changes. */
31194  BTREE_RV_GET_DATA_LENGTH (delete_helper->rv_redo_data_ptr, delete_helper->rv_redo_data, rv_redo_data_length);
31195  assert (!delete_helper->is_system_op_started || delete_helper->purpose != BTREE_OP_DELETE_OBJECT_PHYSICAL);
31196  btree_rv_log_delete_object (thread_p, *delete_helper, delete_helper->leaf_addr, rv_undo_data_length,
31197  rv_redo_data_length, rv_undo_data_ptr, delete_helper->rv_redo_data);
31198 
31200 
31201  pgbuf_set_dirty (thread_p, leaf_page, DONT_FREE);
31202 
31203  btree_delete_log (delete_helper, BTREE_DELETE_MODIFY_MSG ("delete object in leaf record by replacing with last")
31204  "\t" BTREE_OBJINFO_MSG ("replacement object"),
31205  BTREE_DELETE_MODIFY_ARGS (thread_p, delete_helper, leaf_page, &prev_lsa, true, search_key->slotid,
31206  leaf_record->length, btid_int->sys_btid),
31207  OID_AS_ARGS (last_oid), OID_AS_ARGS (last_class_oid), BTREE_MVCC_INFO_AS_ARGS (last_mvcc_info));
31208 
31209  /* Success */
31210  return NO_ERROR;
31211 }
31212 
31213 /*
31214  * btree_record_remove_object () - Remove object from b-tree leaf or overflow record.
31215  *
31216  * return : Error code.
31217  * thread_p (in) : Thread entry.
31218  * btid_int (in) : B-tree info.
31219  * delete_helper (in) : B-tree delete helper.
31220  * page (in) : Leaf or overflow page.
31221  * record (in) : Leaf or overflow record.
31222  * search_key (in) : Search key result.
31223  * node_type (in) : Leaf or overflow node type.
31224  * offset_to_object (in) : Offset to object being removed.
31225  * addr (in) : Leaf or overflow log address.
31226  */
31227 static int
31228 btree_record_remove_object (THREAD_ENTRY * thread_p, BTID_INT * btid_int, BTREE_DELETE_HELPER * delete_helper,
31229  PAGE_PTR page, RECDES * record, BTREE_SEARCH_KEY_HELPER * search_key,
31230  BTREE_NODE_TYPE node_type, int offset_to_object, LOG_DATA_ADDR * addr)
31231 {
31232  char rv_undo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
31233  char *rv_undo_data = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
31234  char *rv_undo_data_ptr = NULL;
31235  int rv_undo_data_length = 0;
31236  int rv_redo_data_length = 0;
31237 
31238  LOG_LSA prev_lsa;
31239 
31240  /* Assert expected arguments. */
31241  assert (btid_int != NULL);
31242  assert (delete_helper != NULL);
31243  assert (page != NULL);
31244  assert (record != NULL);
31245  assert (search_key != NULL && search_key->result == BTREE_KEY_FOUND && search_key->slotid > 0);
31246  assert (addr != NULL && addr->offset != 0 && addr->pgptr == page);
31247  assert (btree_is_delete_object_purpose (delete_helper->purpose));
31248  assert (delete_helper->rv_redo_data != NULL && delete_helper->rv_redo_data_ptr != NULL);
31249 
31250  /* Safe guard: first object in leaf record cannot be handled here. */
31251  assert (offset_to_object > 0 || node_type == BTREE_OVERFLOW_NODE);
31252 
31253  if (delete_helper->is_system_op_started)
31254  {
31255  /* Undoredo logging is required. */
31256  rv_undo_data_ptr = rv_undo_data;
31257  }
31258 
31259 #if !defined (NDEBUG)
31260  /* For debugging recovery. */
31261  BTREE_RV_UNDOREDO_SET_DEBUG_INFO (addr, delete_helper->rv_redo_data_ptr, rv_undo_data_ptr, btid_int,
31263 #endif /* !NDEBUG */
31265  if (node_type == BTREE_OVERFLOW_NODE)
31266  {
31268  }
31269 
31270  btree_record_remove_object_internal (thread_p, btid_int, record, node_type, offset_to_object, &rv_undo_data_ptr,
31271  &delete_helper->rv_redo_data_ptr, NULL);
31272 
31274 
31275  /* Update page. */
31276  /* NOTE: Overflow record slotid is always 1. */
31277  if (spage_update (thread_p, page, node_type == BTREE_LEAF_NODE ? search_key->slotid : 1, record) != SP_SUCCESS)
31278  {
31279  /* No error is expected. */
31280  assert_release (false);
31281  return ER_FAILED;
31282  }
31283 
31284  /* We need to log previous lsa. */
31285  LSA_COPY (&prev_lsa, pgbuf_get_lsa (page));
31286 
31287  /* Add logging. */
31288  BTREE_RV_GET_DATA_LENGTH (delete_helper->rv_redo_data_ptr, delete_helper->rv_redo_data, rv_redo_data_length);
31289  assert (rv_redo_data_length > 0);
31290  assert (!delete_helper->is_system_op_started || delete_helper->purpose != BTREE_OP_DELETE_OBJECT_PHYSICAL);
31291  btree_rv_log_delete_object (thread_p, *delete_helper, *addr, rv_undo_data_length, rv_redo_data_length,
31292  rv_undo_data, delete_helper->rv_redo_data);
31294 
31295  /* Set page dirty. */
31296  pgbuf_set_dirty (thread_p, page, DONT_FREE);
31297 
31298  btree_delete_log (delete_helper, BTREE_DELETE_MODIFY_MSG ("remove object from record"),
31299  BTREE_DELETE_MODIFY_ARGS (thread_p, delete_helper, page, &prev_lsa, node_type == BTREE_LEAF_NODE,
31300  node_type == BTREE_LEAF_NODE ? search_key->slotid : 1, record->length,
31301  btid_int->sys_btid));
31302 
31303  /* Success. */
31304  return NO_ERROR;
31305 }
31306 
31307 /*
31308  * btree_record_remove_object_internal () - Remove object and all it's info from b-tree record.
31309  *
31310  * return : Void.
31311  * thread_p (in) : Thread entry.
31312  * btid_int (in) : B-tree info.
31313  * record (in) : Record.
31314  * node_type (in) : Node type.
31315  * offset_to_object (in) : Offset to object being removed.
31316  * rv_undo_data (out) : Output undo recovery data.
31317  * rv_redo_data (out) : Output redo recovery data.
31318  * displacement (out) : Output displacement of the rest of the record.
31319  */
31320 static void
31322  BTREE_NODE_TYPE node_type, int offset_to_object, char **rv_undo_data,
31323  char **rv_redo_data, int *displacement)
31324 {
31325  int object_info_size = OR_OID_SIZE;
31326 
31327  /* Assert expected arguments. */
31328  assert (btid_int != NULL);
31329  assert (record != NULL);
31330  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_OVERFLOW_NODE);
31331  assert (offset_to_object >= 0 && offset_to_object < record->length);
31332  /* We cannot remove here the first object of leaf record. */
31333  assert (node_type == BTREE_OVERFLOW_NODE || offset_to_object > 0);
31334 
31335  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
31336  {
31337  object_info_size += OR_OID_SIZE;
31338  }
31339  object_info_size +=
31341 
31342  /* Undo logging. */
31343  if (rv_undo_data != NULL && *rv_undo_data != NULL)
31344  {
31345  *rv_undo_data =
31346  log_rv_pack_undo_record_changes (*rv_undo_data, offset_to_object, object_info_size, 0,
31347  record->data + offset_to_object);
31348  }
31349 
31350  /* Update record. */
31351  RECORD_MOVE_DATA (record, offset_to_object, offset_to_object + object_info_size);
31352 
31353  /* Redo logging. */
31354  if (rv_redo_data != NULL && *rv_redo_data != NULL)
31355  {
31356  *rv_redo_data = log_rv_pack_redo_record_changes (*rv_redo_data, offset_to_object, object_info_size, 0, NULL);
31357  }
31358 
31359  if (displacement != NULL)
31360  {
31361  *displacement = -object_info_size;
31362  }
31363 
31364 #if !defined (NDEBUG)
31365  (void) btree_check_valid_record (thread_p, btid_int, record, node_type, NULL);
31366 #endif /* !NDEBUG */
31367 }
31368 
31369 /*
31370  * btree_key_remove_object () - Remove object from key. Function is interface for btree_leaf_remove_object and
31371  * btree_overflow_remove_object.
31372  *
31373  * return : Error code.
31374  * thread_p (in) : Thread entry.
31375  * key (in) : Key value.
31376  * btid_int (in) : B-tree info.
31377  * delete_helper (in) : B-tree delete helper.
31378  * leaf_page (in) : Leaf page.
31379  * leaf_record (in) : Leaf record.
31380  * leaf_info (in) : Leaf record info.
31381  * offset_after_key (in) : Offset to where packed key is ended in leaf record.
31382  * search_key (in) : Search key result.
31383  * overflow_page (in) : Overflow page.
31384  * prev_page (in) : Previous page to overflow page.
31385  * node_type (in) : Node type where object is found.
31386  * offset_to_object (in) : Offset in record where object is found.
31387  */
31388 static int
31390  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR leaf_page, RECDES * leaf_record,
31391  LEAF_REC * leaf_info, int offset_after_key, BTREE_SEARCH_KEY_HELPER * search_key,
31392  PAGE_PTR * overflow_page, PAGE_PTR prev_page, BTREE_NODE_TYPE node_type, int offset_to_object)
31393 {
31394  int error_code = NO_ERROR;
31395 
31396  if (node_type == BTREE_LEAF_NODE)
31397  {
31398  error_code =
31399  btree_leaf_remove_object (thread_p, key, btid_int, delete_helper, leaf_page, leaf_record, leaf_info,
31400  offset_after_key, search_key, offset_to_object);
31401  if (error_code != NO_ERROR)
31402  {
31403  ASSERT_ERROR ();
31404  }
31405  }
31406  else
31407  {
31408  error_code =
31409  btree_overflow_remove_object (thread_p, key, btid_int, delete_helper, overflow_page, prev_page, leaf_page,
31410  leaf_record, search_key, offset_to_object);
31411  if (error_code != NO_ERROR)
31412  {
31413  ASSERT_ERROR ();
31414  }
31415  }
31416  return error_code;
31417 }
31418 
31419 /*
31420  * btree_overflow_remove_object () - Remove an object from overflow page.
31421  *
31422  * return : Error code.
31423  * thread_p (in) : Thread entry.
31424  * key (in) : Key value.
31425  * btid_int (in) : B-tree info.
31426  * delete_helper (in) : B-tree delete helper.
31427  * overflow_page (in) : Overflow page (can be set to NULL).
31428  * prev_page (in) : Page previous to overflow page (can be leaf page or another overflow page).
31429  * leaf_page (in) : Leaf page.
31430  * leaf_record (in) : Leaf record.
31431  * search_key (in) : Search key result.
31432  * offset_to_object (in) : Offset to object being removed.
31433  */
31434 static int
31436  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR * overflow_page, PAGE_PTR prev_page,
31437  PAGE_PTR leaf_page, RECDES * leaf_record, BTREE_SEARCH_KEY_HELPER * search_key,
31438  int offset_to_object)
31439 {
31440  int error_code = NO_ERROR; /* Error code. */
31441  OID *notification_class_oid;
31442  RECDES overflow_record; /* Overflow record. */
31443  /* Buffer to copy overflow record data. */
31444  char overflow_record_data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
31445  bool save_system_op_started = false; /* Save previous is_system_op_started. */
31446  VPID overflow_vpid = VPID_INITIALIZER; /* VPID of overflow page. */
31447  VPID next_overflow_vpid = VPID_INITIALIZER; /* VPID of next overflow page. */
31448  LOG_DATA_ADDR ovf_addr; /* Address for logging. */
31449 
31450  /* Assert expected arguments. */
31451  assert (btid_int != NULL);
31452  assert (delete_helper != NULL);
31453  assert (overflow_page != NULL && *overflow_page != NULL);
31454  assert (prev_page != NULL);
31455  assert (leaf_page != NULL);
31456  assert (leaf_record != NULL);
31457  assert (search_key != NULL && search_key->result == BTREE_KEY_FOUND && search_key->slotid > 0);
31458  assert (btree_is_delete_object_purpose (delete_helper->purpose));
31459 
31460  /* Read overflow record. */
31461  overflow_record.area_size = DB_PAGESIZE;
31462  overflow_record.data = PTR_ALIGN (overflow_record_data_buffer, BTREE_MAX_ALIGN);
31463  if (spage_get_record (thread_p, *overflow_page, 1, &overflow_record, COPY) != S_SUCCESS)
31464  {
31465  /* Unexpected. */
31466  assert_release (false);
31467  return ER_FAILED;
31468  }
31469 
31470 #if !defined (NDEBUG)
31471  (void) btree_check_valid_record (thread_p, btid_int, &overflow_record, BTREE_OVERFLOW_NODE, NULL);
31472 #endif /* !NDEBUG */
31473 
31474  save_system_op_started = delete_helper->is_system_op_started;
31475 
31476  if (overflow_record.length == BTREE_OBJECT_FIXED_SIZE (btid_int))
31477  {
31478  /* Only one object. */
31479  /* Remove page completely. */
31480 
31481  /* Safe guard: only first object can be deleted. */
31482  assert (offset_to_object == 0);
31483 
31484  /* Get VPID of next overflow page. */
31485  error_code = btree_get_next_overflow_vpid (thread_p, *overflow_page, &next_overflow_vpid);
31486  if (error_code != NO_ERROR)
31487  {
31488  ASSERT_ERROR ();
31489  return error_code;
31490  }
31491  /* Unfix the page before deallocating. */
31492  pgbuf_get_vpid (*overflow_page, &overflow_vpid);
31493  pgbuf_unfix_and_init (thread_p, *overflow_page);
31494 
31495  /* we need system op to deallocate pages. */
31496  if (!delete_helper->is_system_op_started)
31497  {
31498  log_sysop_start (thread_p);
31499  delete_helper->is_system_op_started = true;
31500  }
31501 
31502  /* todo: we always need a system operation to deallocate page. otherwise the page may be "leaked" on rollback.
31503  * fixme when replacing the old system operation system */
31504  /* Deallocate page. */
31505  error_code = file_dealloc (thread_p, &btid_int->sys_btid->vfid, &overflow_vpid, FILE_BTREE);
31506  if (error_code != NO_ERROR)
31507  {
31508  ASSERT_ERROR ();
31509  goto error;
31510  }
31511  /* Notification. */
31512  if (!OID_ISNULL (BTREE_DELETE_CLASS_OID (delete_helper)))
31513  {
31514  notification_class_oid = BTREE_DELETE_CLASS_OID (delete_helper);
31515  }
31516  else
31517  {
31518  notification_class_oid = &btid_int->topclass_oid;
31519  }
31520  BTREE_SET_DELETED_OVERFLOW_PAGE_NOTIFICATION (thread_p, key, BTREE_DELETE_OID (delete_helper),
31521  notification_class_oid, btid_int->sys_btid);
31522 
31524 
31525  /* Update previous page link. */
31526  if (prev_page == leaf_page)
31527  {
31528  /* Update leaf record link. */
31529  error_code =
31530  btree_modify_leaf_ovfl_vpid (thread_p, btid_int, delete_helper, leaf_page, leaf_record, search_key,
31531  &next_overflow_vpid);
31532  if (error_code != NO_ERROR)
31533  {
31534  ASSERT_ERROR ();
31535  goto error;
31536  }
31537  }
31538  else /* prev_page != leaf_page. */
31539  {
31540  /* Update link in an overflow page. */
31541  error_code = btree_modify_overflow_link (thread_p, btid_int, delete_helper, prev_page, &next_overflow_vpid);
31542  if (error_code != NO_ERROR)
31543  {
31544  ASSERT_ERROR ();
31545  goto error;
31546  }
31547  }
31548 
31550 
31551  /* End system operation. */
31552  if (delete_helper->is_system_op_started && !save_system_op_started)
31553  {
31554  btree_delete_sysop_end (thread_p, delete_helper);
31555  }
31556  }
31557  else
31558  {
31559  /* More than one object. */
31560 
31561  /* Just remove object from record. */
31562  ovf_addr.offset = 1;
31563  ovf_addr.pgptr = *overflow_page;
31564  ovf_addr.vfid = &btid_int->sys_btid->vfid;
31565 
31566  error_code =
31567  btree_record_remove_object (thread_p, btid_int, delete_helper, *overflow_page, &overflow_record, search_key,
31568  BTREE_OVERFLOW_NODE, offset_to_object, &ovf_addr);
31569  if (error_code != NO_ERROR)
31570  {
31571  ASSERT_ERROR ();
31572  goto error;
31573  }
31574  }
31575 
31576  /* Success. */
31577  return NO_ERROR;
31578 
31579 error:
31580  if (delete_helper->is_system_op_started && !save_system_op_started)
31581  {
31582  assert (delete_helper->purpose != BTREE_OP_DELETE_UNDO_INSERT
31584  && delete_helper->purpose != BTREE_OP_DELETE_OBJECT_PHYSICAL_POSTPONED);
31585  btree_delete_sysop_end (thread_p, delete_helper);
31586  }
31587  assert_release (error_code != NO_ERROR);
31588  return error_code;
31589 }
31590 
31591 /*
31592  * btree_leaf_remove_object () - Remove an object from leaf record.
31593  *
31594  * return : Error code.
31595  * thread_p (in) : Thread entry.
31596  * key (in) : Key value.
31597  * btid_int (in) : B-tree info.
31598  * delete_helper (in) : B-tree delete helper.
31599  * leaf_page (in) : Leaf page.
31600  * leaf_record (in) : Leaf record.
31601  * leaf_rec_info (in) : Leaf record info.
31602  * offset_after_key (in) : Offset in leaf record where packed key ends.
31603  * search_key (in) : Search key result.
31604  * offset_to_object (in) : Offset to object being removed.
31605  * leaf_addr (in) : Log address for leaf record.
31606  */
31607 static int
31609  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR leaf_page, RECDES * leaf_record,
31610  LEAF_REC * leaf_rec_info, int offset_after_key, BTREE_SEARCH_KEY_HELPER * search_key,
31611  int offset_to_object)
31612 {
31613  int error_code = NO_ERROR; /* Error code. */
31614  OID last_oid; /* Last object OID. */
31615  OID last_class_oid; /* Last object class OID. */
31616  BTREE_MVCC_INFO last_mvcc_info; /* Last object MVCC info. */
31617  int offset_to_last_object = 0; /* Offset to last object. */
31618 
31619  /* Assert expected arguments. */
31620  assert (btid_int != NULL);
31621  assert (delete_helper != NULL);
31622  assert (leaf_page != NULL);
31623  assert (leaf_record != NULL);
31624  assert (search_key != NULL && search_key->result == BTREE_KEY_FOUND && search_key->slotid > 0);
31625  assert (btree_is_delete_object_purpose (delete_helper->purpose));
31626 
31627 #if !defined (NDEBUG)
31628  (void) btree_check_valid_record (thread_p, btid_int, leaf_record, BTREE_LEAF_NODE, NULL);
31629 #endif /* NDEBUG */
31630 
31631  /* Remove object from leaf record. */
31632  if (offset_to_object == 0)
31633  {
31634  /* Object to remove is first. */
31635 
31636  /* Get last object in leaf. */
31637  error_code =
31638  btree_record_get_last_object (thread_p, btid_int, leaf_record, BTREE_LEAF_NODE, offset_after_key, &last_oid,
31639  &last_class_oid, &last_mvcc_info, &offset_to_last_object);
31640  if (error_code != NO_ERROR)
31641  {
31642  ASSERT_ERROR ();
31643  return ER_FAILED;
31644  }
31645 
31646  if (offset_to_last_object == 0)
31647  {
31648  /* This is the only object in leaf record. */
31649  /* Safe guard: first and last are the same object. */
31650  assert (OID_EQ (BTREE_DELETE_OID (delete_helper), &last_oid));
31651  if (VPID_ISNULL (&leaf_rec_info->ovfl))
31652  {
31653  /* This is the last key object! */
31654  /* Remove key. */
31655  error_code =
31656  btree_delete_key_from_leaf (thread_p, btid_int, leaf_page, leaf_rec_info, delete_helper, search_key);
31657  if (error_code != NO_ERROR)
31658  {
31659  ASSERT_ERROR ();
31660  return ER_FAILED;
31661  }
31662  /* Key was successfully removed. */
31663 
31664  /* MULTI_ROW_UPDATE will try to check key was deleted in btree_key_delete_remove_object. Don't allow it
31665  * since the key no longer exists. */
31666  assert (!delete_helper->check_key_deleted || delete_helper->is_key_deleted);
31667  delete_helper->check_key_deleted = false;
31668 
31669  /* Fall through. */
31670  }
31671  else /* !VPID_ISNULL (&leaf_rec_info.ovfl) */
31672  {
31673  /* Key has overflow objects. Swap one object from first overflow page. */
31674  error_code =
31675  btree_replace_first_oid_with_ovfl_oid (thread_p, btid_int, key, delete_helper, leaf_page, search_key,
31676  leaf_record, &leaf_rec_info->ovfl);
31677  if (error_code != NO_ERROR)
31678  {
31679  ASSERT_ERROR ();
31680  return ER_FAILED;
31681  }
31682  /* First object was successfully replaced. Fall through. */
31683  }
31684  }
31685  else /* offset_to_last_object != 0. */
31686  {
31687  /* Replace first object with last object. */
31688  error_code =
31689  btree_leaf_record_replace_first_with_last (thread_p, btid_int, delete_helper, leaf_page, leaf_record,
31690  search_key, &last_oid, &last_class_oid, &last_mvcc_info,
31691  offset_to_last_object);
31692  if (error_code != NO_ERROR)
31693  {
31694  ASSERT_ERROR ();
31695  return ER_FAILED;
31696  }
31697  }
31698  }
31699  else /* offset_to_object != 0 */
31700  {
31701  /* Not the first object. Just remove it. */
31702  error_code =
31703  btree_record_remove_object (thread_p, btid_int, delete_helper, leaf_page, leaf_record, search_key,
31704  BTREE_LEAF_NODE, offset_to_object, &delete_helper->leaf_addr);
31705  if (error_code != NO_ERROR)
31706  {
31707  ASSERT_ERROR ();
31708  return ER_FAILED;
31709  }
31710  }
31711  /* Success. */
31712  return NO_ERROR;
31713 }
31714 
31715 /*
31716  * btree_key_remove_insert_mvccid () - Remove insert MVCCID from object info.
31717  *
31718  * return : Error code.
31719  * thread_p (in) : Thread entry.
31720  * btid_int (in) : B-tree info.
31721  * key (in) : Key of object.
31722  * leaf_page (in) : Leaf page.
31723  * search_key (in) : Search key result.
31724  * restart (in) : Not used.
31725  * other_args (in) : BTREE_DELETE_HELPER *
31726  */
31727 static int
31728 btree_key_remove_insert_mvccid (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key, PAGE_PTR * leaf_page,
31729  BTREE_SEARCH_KEY_HELPER * search_key, bool * restart, void *other_args)
31730 {
31731  /* btree_delete_internal helper. */
31732  BTREE_DELETE_HELPER *delete_helper = (BTREE_DELETE_HELPER *) other_args;
31733  int offset_to_object = NOT_FOUND; /* Offset to found object. */
31734  int error_code = NO_ERROR; /* Error code. */
31735  PAGE_PTR found_page = NULL; /* Page of found object. */
31736  RECDES record; /* B-tree record. */
31737  /* Buffer to copy the b-tree record. */
31738  char record_data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
31739  LEAF_REC leaf_rec_info; /* Leaf record info. */
31740  int offset_after_key = 0; /* Offset after key in leaf record. */
31741  bool dummy_clear_key = false; /* Dummy. */
31742  PGSLOTID slotid; /* Slot ID of record being updated. It is either search_key->slotid if record is from
31743  * leaf or 1 if record is from overflow. */
31744  BTREE_NODE_TYPE node_type; /* Page of found object node type. */
31745  LOG_DATA_ADDR addr; /* Address for recovery. */
31746 
31747  LOG_LSA prev_lsa;
31748 
31749  /* Redo recovery structures. */
31750  char rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
31751  char *rv_redo_data = PTR_ALIGN (rv_redo_data_buffer, BTREE_MAX_ALIGN);
31753  int rv_redo_data_length = 0;
31754  /* NOTE: No undo logging is required to vacuum insert MVCCID. */
31755 
31756  /* Assert expected arguments. */
31757  assert (btid_int != NULL);
31758  assert (key != NULL && !DB_IS_NULL (key) && !btree_multicol_key_is_null (key));
31759  assert (leaf_page != NULL && *leaf_page != NULL && pgbuf_get_latch_mode (*leaf_page) >= PGBUF_LATCH_WRITE);
31760  assert (search_key != NULL);
31761  assert (delete_helper != NULL);
31762  assert (delete_helper->purpose == BTREE_OP_DELETE_VACUUM_INSID);
31764 
31765  btree_perf_track_traverse_time (thread_p, delete_helper);
31766 
31767  if (search_key->result == BTREE_KEY_FOUND)
31768  {
31769  /* Key was found. Find the object. */
31770 
31771  /* Get leaf record. */
31772  record.area_size = DB_PAGESIZE;
31773  record.data = PTR_ALIGN (record_data_buffer, BTREE_MAX_ALIGN);
31774  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &record, COPY) != S_SUCCESS)
31775  {
31776  assert_release (false);
31777  return ER_FAILED;
31778  }
31779 
31780 #if !defined (NDEBUG)
31781  (void) btree_check_valid_record (thread_p, btid_int, &record, BTREE_LEAF_NODE, NULL);
31782 #endif /* !NDEBUG */
31783 
31784  error_code =
31785  btree_read_record (thread_p, btid_int, *leaf_page, &record, NULL, &leaf_rec_info, BTREE_LEAF_NODE,
31786  &dummy_clear_key, &offset_after_key, PEEK_KEY_VALUE, NULL);
31787  if (error_code != NO_ERROR)
31788  {
31789  ASSERT_ERROR ();
31790  goto exit;
31791  }
31792 
31793  /* Search object with insert MVCCID. */
31794  error_code =
31795  btree_find_oid_and_its_page (thread_p, btid_int, BTREE_DELETE_OID (delete_helper), *leaf_page,
31796  delete_helper->purpose, &delete_helper->match_mvccinfo, &record, &leaf_rec_info,
31797  offset_after_key, &found_page, NULL, &offset_to_object,
31798  BTREE_DELETE_MVCC_INFO (delete_helper));
31799  if (error_code != NO_ERROR)
31800  {
31801  ASSERT_ERROR ();
31802  goto exit;
31803  }
31804  }
31805  if (offset_to_object == NOT_FOUND)
31806  {
31807  /* Key or object not found. */
31808  /* Object must have been vacuumed/removed already. */
31810  "Could not find object %d|%d|%d in key=%s to vacuum it.",
31811  delete_helper->object_info.oid.volid, delete_helper->object_info.oid.pageid,
31812  delete_helper->object_info.oid.slotid,
31813  delete_helper->printed_key != NULL ? delete_helper->printed_key : "(unknown)");
31814  btree_delete_log (delete_helper, "could not find object to vacuum its insert MVCCID \n"
31815  BTREE_DELETE_HELPER_MSG ("\t"), BTREE_DELETE_HELPER_AS_ARGS (delete_helper));
31816  return NO_ERROR;
31817  }
31818  /* Object was found. */
31819  node_type = (found_page == *leaf_page) ? BTREE_LEAF_NODE : BTREE_OVERFLOW_NODE;
31820 
31821  if (node_type == BTREE_OVERFLOW_NODE)
31822  {
31823  /* Get overflow record. */
31824  slotid = 1;
31825  if (spage_get_record (thread_p, found_page, slotid, &record, COPY) != S_SUCCESS)
31826  {
31827  assert_release (false);
31828  error_code = ER_FAILED;
31829  goto exit;
31830  }
31831 #if !defined (NDEBUG)
31832  (void) btree_check_valid_record (thread_p, btid_int, &record, BTREE_OVERFLOW_NODE, NULL);
31833 #endif /* !NDEBUG */
31834  }
31835  else
31836  {
31837  /* Leaf record was already obtained. */
31838  slotid = search_key->slotid;
31839  }
31840 
31841  /* It should have delete MVCCID. */
31843 
31844  /* Prepare logging. */
31845  addr.offset = slotid;
31846  addr.pgptr = found_page;
31847  addr.vfid = &btid_int->sys_btid->vfid;
31848 
31849 #if !defined (NDEBUG)
31850  /* For debugging recovery. */
31851  BTREE_RV_REDO_SET_DEBUG_INFO (&addr, rv_redo_data_ptr, btid_int, BTREE_RV_DEBUG_ID_REM_INSID);
31852 #endif
31853  if (node_type == BTREE_OVERFLOW_NODE)
31854  {
31856  }
31858 
31859  btree_record_remove_insid (thread_p, btid_int, &record, node_type, offset_to_object, NULL, &rv_redo_data_ptr, NULL);
31860  /* Update in page. */
31861  if (spage_update (thread_p, found_page, slotid, &record) != SP_SUCCESS)
31862  {
31863  /* Unexpected. */
31864  assert_release (false);
31865  error_code = ER_FAILED;
31866  goto exit;
31867  }
31868 
31870 
31871  /* We need to log previous lsa. */
31872  LSA_COPY (&prev_lsa, pgbuf_get_lsa (found_page));
31873 
31874  /* Logging. */
31875  BTREE_RV_GET_DATA_LENGTH (rv_redo_data_ptr, rv_redo_data, rv_redo_data_length);
31876  log_append_redo_data (thread_p, RVBT_RECORD_MODIFY_NO_UNDO, &addr, rv_redo_data_length, rv_redo_data);
31877 
31879 
31880  pgbuf_set_dirty (thread_p, found_page, DONT_FREE);
31881 
31882  btree_delete_log (delete_helper, BTREE_DELETE_MODIFY_MSG ("removed insert MVCCID"),
31883  BTREE_DELETE_MODIFY_ARGS (thread_p, delete_helper, found_page, &prev_lsa,
31884  node_type == BTREE_LEAF_NODE, slotid, record.length, btid_int->sys_btid));
31885 
31886 exit:
31887  if (found_page != NULL && found_page != *leaf_page)
31888  {
31889  pgbuf_unfix_and_init (thread_p, found_page);
31890  }
31891 
31892  btree_perf_track_time (thread_p, delete_helper);
31893  return error_code;
31894 }
31895 
31896 /*
31897  * btree_key_remove_delete_mvccid () - Remove delete MVCCID from object info.
31898  *
31899  * return : Error code.
31900  * thread_p (in) : Thread entry.
31901  * btid_int (in) : B-tree info.
31902  * key (in) : Key of object.
31903  * leaf_page (in) : Leaf page.
31904  * search_key (in) : Search key result.
31905  * restart (in) : Not used.
31906  * other_args (in) : BTREE_DELETE_HELPER *
31907  */
31908 static int
31909 btree_key_remove_delete_mvccid (THREAD_ENTRY * thread_p, BTID_INT * btid_int, DB_VALUE * key, PAGE_PTR * leaf_page,
31910  BTREE_SEARCH_KEY_HELPER * search_key, bool * restart, void *other_args)
31911 {
31912  /* btree_delete_internal helper. */
31913  BTREE_DELETE_HELPER *delete_helper = (BTREE_DELETE_HELPER *) other_args;
31914  int offset_to_object = NOT_FOUND; /* Offset to found object. */
31915  int error_code = NO_ERROR; /* Error code. */
31916  PAGE_PTR found_page = NULL; /* Page of found object. */
31917  RECDES leaf_record; /* B-tree leaf record. */
31918  RECDES overflow_record; /* B-tree overflow record. */
31919  /* Buffers to copy b-tree records. */
31920  char leaf_record_data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
31921  char ovf_record_data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
31922  LEAF_REC leaf_rec_info; /* Leaf leaf_record info. */
31923  int offset_after_key = 0; /* Offset after key in leaf leaf_record. */
31924  bool dummy_clear_key = false; /* Dummy. */
31925  PGSLOTID slotid; /* Slot ID of leaf_record being updated. It is either search_key->slotid if leaf_record
31926  * is from leaf or 1 if leaf_record is from overflow. */
31927  BTREE_NODE_TYPE node_type; /* Page of found object node type. */
31928 
31929  /* Redo recovery structures. */
31930  char rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
31931 
31932  /* No undo logging is required during rollback. */
31933 
31934  /* Assert expected arguments. */
31935  assert (btid_int != NULL);
31936  assert (key != NULL && !DB_IS_NULL (key) && !btree_multicol_key_is_null (key));
31937  assert (leaf_page != NULL && *leaf_page != NULL && pgbuf_get_latch_mode (*leaf_page) >= PGBUF_LATCH_WRITE);
31938  assert (search_key != NULL);
31939  assert (delete_helper != NULL);
31940  assert (delete_helper->purpose == BTREE_OP_DELETE_UNDO_INSERT_DELID);
31941 
31942  btree_perf_track_traverse_time (thread_p, delete_helper);
31943 
31944  if (search_key->result == BTREE_KEY_FOUND)
31945  {
31946  /* Key was found. Try to find object. */
31947 
31948  /* Get leaf leaf record. */
31949  leaf_record.area_size = DB_PAGESIZE;
31950  leaf_record.data = PTR_ALIGN (leaf_record_data_buffer, BTREE_MAX_ALIGN);
31951  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &leaf_record, COPY) != S_SUCCESS)
31952  {
31953  assert_release (false);
31954  return ER_FAILED;
31955  }
31956 
31957 #if !defined (NDEBUG)
31958  (void) btree_check_valid_record (thread_p, btid_int, &leaf_record, BTREE_LEAF_NODE, NULL);
31959 #endif /* !NDEBUG */
31960 
31961  error_code =
31962  btree_read_record (thread_p, btid_int, *leaf_page, &leaf_record, NULL, &leaf_rec_info, BTREE_LEAF_NODE,
31963  &dummy_clear_key, &offset_after_key, PEEK_KEY_VALUE, NULL);
31964  if (error_code != NO_ERROR)
31965  {
31966  ASSERT_ERROR ();
31967  goto exit;
31968  }
31969 
31970  /* Find object. */
31971  error_code =
31972  btree_find_oid_and_its_page (thread_p, btid_int, BTREE_DELETE_OID (delete_helper), *leaf_page,
31973  delete_helper->purpose, &delete_helper->match_mvccinfo, &leaf_record,
31974  &leaf_rec_info, offset_after_key, &found_page, NULL, &offset_to_object,
31975  BTREE_DELETE_MVCC_INFO (delete_helper));
31976  if (error_code != NO_ERROR)
31977  {
31978  ASSERT_ERROR ();
31979  goto exit;
31980  }
31981  }
31982  if (offset_to_object == NOT_FOUND)
31983  {
31984  /* Key or object not found, but it should have been found. */
31985  assert_release (false);
31986  btree_set_unknown_key_error (thread_p, btid_int->sys_btid, key,
31987  "btree_key_remove_delete_mvccid: key was not found.");
31988  error_code = ER_BTREE_UNKNOWN_KEY;
31989  goto exit;
31990  }
31991  /* Object was found. */
31992 
31993  /* Prepare logging. */
31994  delete_helper->rv_redo_data = PTR_ALIGN (rv_redo_data_buffer, BTREE_MAX_ALIGN);
31995  delete_helper->rv_redo_data_ptr = delete_helper->rv_redo_data;
31996 
31997  /* Where was object found? */
31998  node_type = (found_page == *leaf_page) ? BTREE_LEAF_NODE : BTREE_OVERFLOW_NODE;
31999 
32000  if (node_type == BTREE_OVERFLOW_NODE)
32001  {
32002  /* Get overflow record. */
32003  slotid = 1;
32004  overflow_record.data = PTR_ALIGN (ovf_record_data_buffer, BTREE_MAX_ALIGN);
32005  overflow_record.area_size = DB_PAGESIZE;
32006  if (spage_get_record (thread_p, found_page, slotid, &overflow_record, COPY) != S_SUCCESS)
32007  {
32008  assert_release (false);
32009  error_code = ER_FAILED;
32010  goto exit;
32011  }
32012 #if !defined (NDEBUG)
32013  (void) btree_check_valid_record (thread_p, btid_int, &overflow_record, BTREE_OVERFLOW_NODE, NULL);
32014 #endif /* !NDEBUG */
32015  }
32016  else
32017  {
32018  /* Leaf leaf_record was already obtained. */
32019  slotid = search_key->slotid;
32020  }
32021 
32022  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
32023  {
32024  PAGE_PTR overflow_page = node_type == BTREE_OVERFLOW_NODE ? found_page : NULL;
32025  RECDES *ovf_record_p = node_type == BTREE_OVERFLOW_NODE ? &overflow_record : NULL;
32026 
32027  error_code =
32028  btree_key_remove_delete_mvccid_unique (thread_p, btid_int, delete_helper, search_key, *leaf_page, &leaf_record,
32029  overflow_page, ovf_record_p, node_type, offset_to_object);
32030  if (error_code != NO_ERROR)
32031  {
32032  ASSERT_ERROR ();
32033  goto exit;
32034  }
32035  }
32036  else
32037  {
32038  RECDES *recdes_p = node_type == BTREE_OVERFLOW_NODE ? &overflow_record : &leaf_record;
32039 
32040  error_code =
32041  btree_key_remove_delete_mvccid_non_unique (thread_p, btid_int, delete_helper, found_page, recdes_p, slotid,
32042  node_type, offset_to_object);
32043  if (error_code != NO_ERROR)
32044  {
32045  ASSERT_ERROR ();
32046  goto exit;
32047  }
32048  }
32049 
32050 exit:
32051  if (found_page != NULL && found_page != *leaf_page)
32052  {
32053  pgbuf_unfix_and_init (thread_p, found_page);
32054  }
32055 
32056  btree_perf_track_time (thread_p, delete_helper);
32057  return error_code;
32058 }
32059 
32060 /*
32061  * btree_key_remove_delete_mvccid_unique () - Remove delete MVCCID from an object in unique index as part of undoing a
32062  * MVCC delete operation.
32063  *
32064  * return : Error code.
32065  * thread_p (in) : Thread entry.
32066  * btid_int (in) : B-tree info.
32067  * delete_helper (in) : B-tree delete helper.
32068  * search_key (in) : Search key result.
32069  * leaf_page (in) : Leaf node page.
32070  * leaf_record (in) : Key's leaf record.
32071  * overflow_page (in) : Overflow node page (if object was found in overflow page).
32072  * overflow_record (in) : Overflow record (if object was found in overflow page).
32073  * node_type (in) : Node type of page where object was found.
32074  * offset_to_object (in) : Offset to object in its record.
32075  */
32076 static int
32078  BTREE_DELETE_HELPER * delete_helper, BTREE_SEARCH_KEY_HELPER * search_key,
32079  PAGE_PTR leaf_page, RECDES * leaf_record, PAGE_PTR overflow_page,
32080  RECDES * overflow_record, BTREE_NODE_TYPE node_type, int offset_to_object)
32081 {
32082  int error_code = NO_ERROR; /* Error code. */
32083  LOG_DATA_ADDR leaf_addr; /* Leaf record address for logging. */
32084 
32085  LOG_LSA prev_lsa;
32086 
32087  char rv_undo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
32088  char *rv_undo_data = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
32089  char *rv_undo_data_ptr = NULL;
32090  int rv_undo_data_length = 0;
32091  int rv_redo_data_length = 0; /* Length of redo recovery data. */
32092 
32093  /* Assert expected arguments. */
32094  assert (btid_int != NULL && BTREE_IS_UNIQUE (btid_int->unique_pk));
32095  assert (delete_helper != NULL);
32096  assert (delete_helper->purpose == BTREE_OP_DELETE_UNDO_INSERT_DELID);
32097  assert (leaf_page != NULL);
32098  assert (leaf_record != NULL && leaf_record->data != NULL);
32099  assert (search_key != NULL && search_key->slotid > 0);
32100  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_OVERFLOW_NODE);
32101  assert (node_type == BTREE_LEAF_NODE || (overflow_page != NULL && overflow_record != NULL));
32102  assert (offset_to_object >= 0
32103  && offset_to_object < (node_type == BTREE_LEAF_NODE ? leaf_record->length : overflow_record->length));
32105 
32106  /* Prepare logging for leaf record. */
32107  leaf_addr.offset = search_key->slotid;
32108  leaf_addr.pgptr = leaf_page;
32109  leaf_addr.vfid = &btid_int->sys_btid->vfid;
32110 
32111  if (node_type == BTREE_OVERFLOW_NODE)
32112  {
32113  /* Two pages will be modified, system operation is required and undoredo logging. */
32114  log_sysop_start (thread_p);
32115  delete_helper->is_system_op_started = true;
32116  rv_undo_data_ptr = rv_undo_data;
32117  }
32118 
32119 #if !defined (NDEBUG)
32120  BTREE_RV_UNDOREDO_SET_DEBUG_INFO (&leaf_addr, delete_helper->rv_redo_data_ptr, rv_undo_data_ptr, btid_int,
32122 #endif /* !NDEBUG */
32124 
32125  /* Remove delete MVCCID & swap with first object in leaf record. */
32126  error_code =
32127  btree_remove_delete_mvccid_unique_internal (thread_p, btid_int, delete_helper, leaf_page, leaf_record, node_type,
32128  overflow_page, overflow_record, offset_to_object, &rv_undo_data_ptr,
32129  &delete_helper->rv_redo_data_ptr);
32130  if (error_code != NO_ERROR)
32131  {
32132  assert_release (false);
32133  if (delete_helper->is_system_op_started)
32134  {
32135  log_sysop_abort (thread_p);
32136  }
32137  return error_code;
32138  }
32139 
32140  /* Update in page. */
32141  if (spage_update (thread_p, leaf_page, search_key->slotid, leaf_record) != SP_SUCCESS)
32142  {
32143  assert_release (false);
32144  if (delete_helper->is_system_op_started)
32145  {
32146  log_sysop_abort (thread_p);
32147  }
32148  return ER_FAILED;
32149  }
32150 
32152 
32153  /* We need to log previous lsa. */
32154  LSA_COPY (&prev_lsa, pgbuf_get_lsa (leaf_page));
32155 
32156  /* Add compensate log. */
32157  BTREE_RV_GET_DATA_LENGTH (delete_helper->rv_redo_data_ptr, delete_helper->rv_redo_data, rv_redo_data_length);
32158  if (delete_helper->is_system_op_started)
32159  {
32160  BTREE_RV_GET_DATA_LENGTH (rv_undo_data_ptr, rv_undo_data, rv_undo_data_length);
32161  log_append_undoredo_data (thread_p, RVBT_RECORD_MODIFY_UNDOREDO, &leaf_addr, rv_undo_data_length,
32162  rv_redo_data_length, rv_undo_data, delete_helper->rv_redo_data);
32163 
32164  btree_delete_sysop_end (thread_p, delete_helper);
32165  }
32166  else
32167  {
32169  leaf_addr.offset, leaf_page, rv_redo_data_length,
32170  delete_helper->rv_redo_data, LOG_FIND_CURRENT_TDES (thread_p),
32171  &delete_helper->reference_lsa);
32172  }
32173  pgbuf_set_dirty (thread_p, leaf_page, DONT_FREE);
32174 
32175  btree_delete_log (delete_helper, BTREE_DELETE_MODIFY_MSG ("unique remove delete MVCCID"),
32176  BTREE_DELETE_MODIFY_ARGS (thread_p, delete_helper, leaf_page, &prev_lsa, true, search_key->slotid,
32177  leaf_record->length, btid_int->sys_btid));
32178 
32180 
32181  /* Success. */
32182  return NO_ERROR;
32183 }
32184 
32185 /*
32186  * btree_remove_delete_mvccid_unique_internal () - Internal function that will remove delete MVCCID for an object in an
32187  * unique index. It will take care to also move the object to the
32188  * first position in leaf record.
32189  *
32190  * return : Error code.
32191  * thread_p (in) : Thread entry.
32192  * btid_int (in) : B-tree info.
32193  * helper (in) : B-tree delete helper.
32194  * leaf_page (in) : Leaf node (where object key is found).
32195  * leaf_record (in) : Leaf record.
32196  * node_type (in) : Node type where object is found (leaf or overflow).
32197  * overflow_page (in) : Page pointer to overflow node. Only used if object is in an overflow node.
32198  * overflow_record (in) : Overflow record. Only used if object is in an overflow node.
32199  * offset_to_object (in) : Offset to object in its record.
32200  * rv_undo_data (out) : If not NULL, outputs undo data recovery for leaf node changes.
32201  * rv_redo_data (out) : If not NULL, outputs redo data recovery for leaf node changes.
32202  */
32203 static int
32205  PAGE_PTR leaf_page, RECDES * leaf_record, BTREE_NODE_TYPE node_type,
32206  PAGE_PTR overflow_page, RECDES * overflow_record, int offset_to_object,
32207  char **rv_undo_data, char **rv_redo_data)
32208 {
32209  int error_code = NO_ERROR;
32211 
32212  /* Not the first object in leaf. */
32213  assert (btid_int != NULL);
32214  assert (helper != NULL);
32216  assert (leaf_page != NULL);
32217  assert (leaf_record != NULL);
32218  assert (node_type == BTREE_LEAF_NODE || (overflow_page != NULL && overflow_record != NULL));
32219  assert (offset_to_object >= 0);
32220 
32221  /* Undoing MVCC delete should consider one rule for unique index: the non-dirty visible object should always be
32222  * first. When this object was deleted, it may have been relocated from the first position in leaf record. If that's
32223  * the case, we now have to move it back. */
32224 
32225  /* Get the first object in leaf record. */
32226  error_code =
32227  btree_leaf_get_first_object (btid_int, leaf_record, &first_object.oid, &first_object.class_oid,
32228  &first_object.mvcc_info);
32229  if (error_code != NO_ERROR)
32230  {
32231  ASSERT_ERROR ();
32232  return error_code;
32233  }
32234 
32235  if (node_type == BTREE_LEAF_NODE && offset_to_object == 0)
32236  {
32237  /* Object is already first in leaf record. We need no swapping, just remove its delete MVCCID. */
32238  btree_record_remove_delid (thread_p, btid_int, leaf_record, BTREE_LEAF_NODE, offset_to_object, rv_undo_data,
32239  rv_redo_data);
32240  return NO_ERROR;
32241  }
32242  /* Object is not first and must be swapped with first. */
32243 
32244  /* Since first object is going to be relocated, it will have fixed size. */
32245  BTREE_MVCC_INFO_SET_FIXED_SIZE (&first_object.mvcc_info);
32246 
32247  /* Since our object becomes first, if there are no overflow OID's, having fixed size is no longer required. */
32249  {
32250  /* Clear unnecessary MVCC info. */
32252  }
32253  /* Remove delete MVCCID from object. */
32255 
32256  /* Object are ready to be swapped. */
32257 
32258  /* Where is object found (leaf or overflow node). */
32259  if (node_type == BTREE_LEAF_NODE)
32260  {
32261  /* Object belongs to leaf. */
32262  /* Replace our object with first object. */
32263  char *oid_ptr = leaf_record->data + offset_to_object;
32264  int object_fixed_size = BTREE_OBJECT_FIXED_SIZE (btid_int);
32265 
32266  /* Objects are fixed size, therefore replacing data is the same size. Just pack first object info where our
32267  * object used to be. */
32268 
32269  /* Undo logging */
32270  if (rv_undo_data != NULL && *rv_undo_data != NULL)
32271  {
32272  *rv_undo_data =
32273  log_rv_pack_undo_record_changes (*rv_undo_data, offset_to_object, object_fixed_size, object_fixed_size,
32274  oid_ptr);
32275  }
32276 
32277  (void) btree_pack_object (oid_ptr, btid_int, BTREE_LEAF_NODE, leaf_record, &first_object);
32278 #if !defined (NDEBUG)
32279  (void) btree_check_valid_record (thread_p, btid_int, leaf_record, node_type, NULL);
32280 #endif /* !NDEBUG */
32281 
32282  /* Redo logging. */
32283  if (rv_redo_data != NULL && *rv_redo_data != NULL)
32284  {
32285  *rv_redo_data =
32286  log_rv_pack_redo_record_changes (*rv_redo_data, offset_to_object, object_fixed_size, object_fixed_size,
32287  oid_ptr);
32288  }
32289 
32290  btree_delete_log (helper, "swapped first object (logging is postponed) \n"
32291  "\t" BTREE_OBJINFO_MSG ("first object") "\n"
32292  "\t" PGBUF_PAGE_STATE_MSG ("leaf page") "\n\t" BTREE_ID_MSG,
32293  BTREE_OBJINFO_AS_ARGS (&first_object), PGBUF_PAGE_STATE_ARGS (leaf_page),
32294  BTID_AS_ARGS (btid_int->sys_btid));
32295  }
32296  else
32297  {
32298  /* Object belongs to overflow. */
32299  assert (helper->is_system_op_started);
32300 
32301  /* Swap first object to overflow page and replace it with our object. */
32302  error_code =
32303  btree_overflow_record_replace_object (thread_p, btid_int, helper, overflow_page, overflow_record,
32304  &offset_to_object, &first_object);
32305  if (error_code != NO_ERROR)
32306  {
32307  assert_release (false);
32308  return error_code;
32309  }
32310  }
32311 
32313 
32314  /* Replace first object. */
32315  btree_leaf_change_first_object (thread_p, leaf_record, btid_int, BTREE_DELETE_OID (helper),
32316  BTREE_DELETE_CLASS_OID (helper), BTREE_DELETE_MVCC_INFO (helper), NULL, rv_undo_data,
32317  rv_redo_data);
32318 
32319  btree_delete_log (helper, "successfully moved object and removed its delete MVCCID %llu (logging is postponed) \n"
32320  BTREE_DELETE_HELPER_MSG ("\t") "\t" PGBUF_PAGE_STATE_MSG ("leaf page") "\n\t" BTREE_ID_MSG,
32321  (unsigned long long int) helper->match_mvccinfo.delete_mvccid,
32322  BTREE_DELETE_HELPER_AS_ARGS (helper), PGBUF_PAGE_STATE_ARGS (leaf_page),
32323  BTID_AS_ARGS (btid_int->sys_btid));
32324 
32325  /* Success */
32326  return NO_ERROR;
32327 }
32328 
32329 /*
32330  * btree_key_remove_delete_mvccid_non_unique () - Remove delete MVCCID from an index object as part of undoing a MVCC
32331  * delete operation.
32332  *
32333  * return : Error code.
32334  * thread_p (in) : Thread entry.
32335  * btid_int (in) : B-tree info.
32336  * delete_helper (in) : B-tree delete helper.
32337  * page (in) : Leaf or overflow page.
32338  * record (in) : Leaf or overflow record.
32339  * slotid (in) : Slot ID of record.
32340  * node_type (in) : BTREE_LEAF_NODE or BTREE_OVERFLOW_NODE.
32341  * offset_to_object (in) : Offset to object in its record.
32342  *
32343  * NOTE: Even though this function is targeted for non-unique indexes, it can be used in one case for unique indexes:
32344  * when the object being undone is already first in leaf record and does not require relocation.
32345  */
32346 static int
32348  BTREE_DELETE_HELPER * delete_helper, PAGE_PTR page, RECDES * record,
32349  PGSLOTID slotid, BTREE_NODE_TYPE node_type, int offset_to_object)
32350 {
32351  LOG_DATA_ADDR addr; /* Log address for record. */
32352  int rv_redo_data_length = 0; /* Redo recovery data length. */
32353  TDE_ALGORITHM tde_algo = TDE_ALGORITHM_NONE;
32354 
32355  LOG_LSA prev_lsa;
32356 
32357  /* Assert expected arguments. */
32358  assert (btid_int != NULL);
32359  assert (delete_helper != NULL);
32360  assert (delete_helper->purpose == BTREE_OP_DELETE_UNDO_INSERT_DELID);
32361  assert (page != NULL);
32362  assert (record != NULL && record->data != NULL);
32363  assert (slotid > 0);
32364  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_OVERFLOW_NODE);
32365  assert (offset_to_object >= 0 && offset_to_object < record->length);
32366  assert (!BTREE_IS_UNIQUE (btid_int->unique_pk) || (node_type == BTREE_LEAF_NODE && offset_to_object == 0));
32367 
32368  /* Prepare logging before starting changes. */
32369  addr.offset = slotid;
32370  addr.pgptr = page;
32371  addr.vfid = &btid_int->sys_btid->vfid;
32372 
32373 #if !defined (NDEBUG)
32374  /* For debugging recovery. */
32375  BTREE_RV_REDO_SET_DEBUG_INFO (&addr, delete_helper->rv_redo_data_ptr, btid_int,
32377 #endif /* !NDEBUG */
32378  if (node_type == BTREE_OVERFLOW_NODE)
32379  {
32381  }
32383 
32384  btree_record_remove_delid (thread_p, btid_int, record, node_type, offset_to_object, NULL,
32385  &delete_helper->rv_redo_data_ptr);
32386 
32387  /* Update in page. */
32388  if (spage_update (thread_p, page, slotid, record) != SP_SUCCESS)
32389  {
32390  /* Unexpected. */
32391  assert_release (false);
32392  return ER_FAILED;
32393  }
32394 
32396 
32397  /* Add logging. */
32398  prev_lsa = *pgbuf_get_lsa (page);
32399  BTREE_RV_GET_DATA_LENGTH (delete_helper->rv_redo_data_ptr, delete_helper->rv_redo_data, rv_redo_data_length);
32401  addr.offset, page, rv_redo_data_length, delete_helper->rv_redo_data,
32402  LOG_FIND_CURRENT_TDES (thread_p), &delete_helper->reference_lsa);
32403 
32405 
32406  pgbuf_set_dirty (thread_p, page, DONT_FREE);
32407 
32408  btree_delete_log (delete_helper, BTREE_DELETE_MODIFY_MSG ("removed delete MVCCID %llu"),
32409  (unsigned long long int) delete_helper->object_info.mvcc_info.delete_mvccid,
32410  BTREE_DELETE_MODIFY_ARGS (thread_p, delete_helper, page, &prev_lsa, node_type == BTREE_LEAF_NODE,
32411  slotid, record->length, btid_int->sys_btid));
32412 
32413  return NO_ERROR;
32414 }
32415 
32416 /*
32417  * btree_overflow_record_replace_object () - Replace an object from an overflow record with another object. Part of
32418  * remove MVCCID algorithm for unique indexes.
32419  *
32420  * return : Error code.
32421  * thread_p (in) : Thread entry.
32422  * btid_int (in) : B-tree info.
32423  * delete_helper (in) : B-tree delete helper.
32424  * overflow_page (in) : Overflow page.
32425  * overflow_record (in) : Overflow record.
32426  * offset_to_replaced_object (in) : Offset to object being replaced.
32427  * replacing_object (in) : Object info for replacement.
32428  */
32429 static int
32431  PAGE_PTR overflow_page, RECDES * overflow_record, int *offset_to_replaced_object,
32432  BTREE_OBJECT_INFO * replacing_object)
32433 {
32434  /* Redo recovery data. */
32435  LOG_DATA_ADDR overflow_addr;
32436  char rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
32437  char *rv_redo_data = PTR_ALIGN (rv_redo_data_buffer, BTREE_MAX_ALIGN);
32439  int rv_redo_data_length = 0;
32440 
32441  char rv_undo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
32442  char *rv_undo_data = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
32443  char *rv_undo_data_ptr = rv_undo_data;
32444  int rv_undo_data_length = 0;
32445 
32446  /* Assert expected arguments. */
32447  assert (btid_int != NULL);
32448  assert (delete_helper != NULL);
32450  || delete_helper->purpose == BTREE_OP_DELETE_UNDO_INSERT_DELID);
32451  assert (overflow_page != NULL);
32452  assert (overflow_record != NULL);
32453  assert (offset_to_replaced_object != NULL);
32454  assert ((*offset_to_replaced_object) >= 0 && (*offset_to_replaced_object) < overflow_record->length);
32455  assert (replacing_object != NULL);
32456 
32457  assert (delete_helper->is_system_op_started);
32458 
32459  /* Prepare logging. */
32460  overflow_addr.offset = 1;
32461  overflow_addr.pgptr = overflow_page;
32462  overflow_addr.vfid = &btid_int->sys_btid->vfid;
32463 
32464 #if !defined (NDEBUG)
32465  BTREE_RV_UNDOREDO_SET_DEBUG_INFO (&overflow_addr, rv_redo_data_ptr, rv_undo_data_ptr, btid_int,
32467 #endif /* !NDEBUG */
32468  BTREE_RV_SET_OVERFLOW_NODE (&overflow_addr);
32469 
32470  btree_record_replace_object (thread_p, btid_int, overflow_record, BTREE_OVERFLOW_NODE, offset_to_replaced_object,
32471  replacing_object, &rv_undo_data_ptr, &rv_redo_data_ptr);
32472 
32473  /* Update page. */
32474  if (spage_update (thread_p, overflow_page, 1, overflow_record) != SP_SUCCESS)
32475  {
32476  assert_release (false);
32477  return ER_FAILED;
32478  }
32479 
32481 
32482  /* Add undoredo logging. */
32483  BTREE_RV_GET_DATA_LENGTH (rv_redo_data_ptr, rv_redo_data, rv_redo_data_length);
32484  BTREE_RV_GET_DATA_LENGTH (rv_undo_data_ptr, rv_undo_data, rv_undo_data_length);
32485  log_append_undoredo_data (thread_p, RVBT_RECORD_MODIFY_UNDOREDO, &overflow_addr, rv_undo_data_length,
32486  rv_redo_data_length, rv_undo_data, rv_redo_data);
32487  pgbuf_set_dirty (thread_p, overflow_page, DONT_FREE);
32488 
32490 
32491  return NO_ERROR;
32492 }
32493 
32494 /*
32495  * btree_record_remove_insid () - Remove object insert MVCCID from b-tree record.
32496  *
32497  * return : Void.
32498  * thread_p (in) : Thread entry.
32499  * btid_int (in) : B-tree info.
32500  * record (in/out) : B-tree record.
32501  * node_type (in) : Leaf or overflow node type.
32502  * offset_to_object (in) : Offset to object in record data.
32503  * rv_undo_data (out) : If not NULL, output redo recovery data for the change.
32504  * rv_redo_data (out) : If not NULL, output redo recovery data for the change.
32505  * displacement (out) : Output the displacement of the rest of the record.
32506  */
32507 static void
32508 btree_record_remove_insid (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record, BTREE_NODE_TYPE node_type,
32509  int offset_to_object, char **rv_undo_data, char **rv_redo_data, int *displacement)
32510 {
32511  int insert_mvccid_offset;
32512  bool has_fixed_size = false;
32513  MVCCID all_visible_mvccid = MVCCID_ALL_VISIBLE;
32514 
32515  /* Assert expected arguments. */
32516  assert (btid_int != NULL);
32517  assert (record != NULL);
32518  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_OVERFLOW_NODE);
32519  assert (offset_to_object >= 0 && offset_to_object < record->length);
32520 
32521  has_fixed_size = ((node_type == BTREE_OVERFLOW_NODE)
32522  || (offset_to_object > 0 && BTREE_IS_UNIQUE (btid_int->unique_pk))
32523  || (offset_to_object == 0 && btree_leaf_is_flaged (record, BTREE_LEAF_RECORD_OVERFLOW_OIDS)));
32524 
32525  /* Where is insert MVCCID. */
32526  /* Skip object OID. */
32527  insert_mvccid_offset = offset_to_object + OR_OID_SIZE;
32528 
32529  if (btree_is_class_oid_packed (btid_int, record, node_type, (offset_to_object == 0)))
32530  {
32531  /* Also class OID is stored. */
32532  insert_mvccid_offset += OR_OID_SIZE;
32533  }
32534 
32535  if (has_fixed_size)
32536  {
32537  btree_set_mvccid (record, insert_mvccid_offset, &all_visible_mvccid, rv_undo_data, rv_redo_data);
32538  }
32539  else
32540  {
32541  btree_remove_mvccid (record, offset_to_object, insert_mvccid_offset, BTREE_OID_HAS_MVCC_INSID, rv_undo_data,
32542  rv_redo_data);
32543 
32544  if (displacement != NULL)
32545  {
32546  *displacement = -OR_MVCCID_SIZE;
32547  }
32548  }
32549 
32550 #if !defined (NDEBUG)
32551  (void) btree_check_valid_record (thread_p, btid_int, record, node_type, NULL);
32552 #endif /* !NDEBUG */
32553 }
32554 
32555 /*
32556  * btree_record_remove_delid () - Remove object delete MVCCID from b-tree record.
32557  *
32558  * return : Void.
32559  * thread_p (in) : Thread entry.
32560  * btid_int (in) : B-tree info.
32561  * record (in/out) : B-tree record.
32562  * node_type (in) : Leaf or overflow node type.
32563  * offset_to_object (in) : Offset to object in record data.
32564  * rv_undo_data (out) : If not NULL, output undo recovery data for the change.
32565  * rv_redo_data (out) : If not NULL, output redo recovery data for the change.
32566  */
32567 static void
32568 btree_record_remove_delid (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record, BTREE_NODE_TYPE node_type,
32569  int offset_to_object, char **rv_undo_data, char **rv_redo_data)
32570 {
32571  int offset_to_delete_mvccid;
32572  bool has_fixed_size;
32573  MVCCID null_mvccid = MVCCID_NULL;
32574 
32575  /* Assert expected arguments. */
32576  assert (btid_int != NULL);
32577  assert (record != NULL);
32578  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_OVERFLOW_NODE);
32579  assert (offset_to_object >= 0 && offset_to_object < record->length);
32580 
32581  /* Safe guard: unique indexes are not allowed to remove delete MVCCID unless it is the first object. Otherwise,
32582  * object should be relocated to first position. */
32583  assert (!BTREE_IS_UNIQUE (btid_int->unique_pk) || (node_type == BTREE_LEAF_NODE && offset_to_object == 0));
32584 
32585  has_fixed_size = (node_type == BTREE_OVERFLOW_NODE
32586  || (offset_to_object == 0 && btree_leaf_is_flaged (record, BTREE_LEAF_RECORD_OVERFLOW_OIDS)));
32587 
32588  /* Compute offset to delete MVCCID. */
32589  /* Start with offset_to_object. */
32590  /* OID is always saved. */
32591  offset_to_delete_mvccid = offset_to_object + OR_OID_SIZE;
32592 
32594  {
32595  /* Class OID is also saved. */
32596  offset_to_delete_mvccid += OR_OID_SIZE;
32597  }
32598  if (has_fixed_size || btree_record_object_is_flagged (record->data + offset_to_object, BTREE_OID_HAS_MVCC_INSID))
32599  {
32600  /* Insert MVCCID is also saved. */
32601  offset_to_delete_mvccid += OR_MVCCID_SIZE;
32602  }
32603 
32604  /* Remove or replace delete MVCCID. */
32605  if (has_fixed_size)
32606  {
32607  btree_set_mvccid (record, offset_to_delete_mvccid, &null_mvccid, rv_undo_data, rv_redo_data);
32608  }
32609  else
32610  {
32611  btree_remove_mvccid (record, offset_to_object, offset_to_delete_mvccid, BTREE_OID_HAS_MVCC_DELID, rv_undo_data,
32612  rv_redo_data);
32613  }
32614 
32615 #if !defined (NDEBUG)
32616  (void) btree_check_valid_record (thread_p, btid_int, record, node_type, NULL);
32617 #endif /* !NDEBUG */
32618 }
32619 
32620 /*
32621  * btree_record_add_delid () - Add object delete MVCCID to b-tree record.
32622  *
32623  * return : Void.
32624  * thread_p (in) : Thread entry.
32625  * btid_int (in) : B-tree info.
32626  * record (in/out) : B-tree record.
32627  * node_type (in) : Leaf or overflow node type.
32628  * offset_to_object (in) : Offset to object in record data.
32629  * delete_mvccid (in) : Delete MVCCID to add.
32630  * rv_undo_data (out) : If not NULL, output undo recovery data for the change.
32631  * rv_redo_data (out) : If not NULL, output redo recovery data for the change.
32632  */
32633 static void
32634 btree_record_add_delid (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record, BTREE_NODE_TYPE node_type,
32635  int offset_to_object, MVCCID delete_mvccid, char **rv_undo_data, char **rv_redo_data)
32636 {
32637  int offset_to_delete_mvccid;
32638  char *oid_ptr = NULL;
32639  char *mvccid_ptr = NULL;
32640 
32641  /* Assert expected arguments. */
32642  assert (btid_int != NULL);
32643  assert (record != NULL);
32644  assert (node_type == BTREE_LEAF_NODE || node_type == BTREE_OVERFLOW_NODE);
32645  assert (offset_to_object >= 0 && offset_to_object < record->length);
32646 
32647  /* Set oid_ptr */
32648  oid_ptr = record->data + offset_to_object;
32649 
32650  /* Compute offset to delete MVCCID. */
32651  /* Instance OID is always packed. */
32652  offset_to_delete_mvccid = offset_to_object + OR_OID_SIZE;
32653  if (btree_is_class_oid_packed (btid_int, record, node_type, (offset_to_object == 0)))
32654  {
32655  /* Class OID is also packed. */
32656  offset_to_delete_mvccid += OR_OID_SIZE;
32657  }
32659  {
32660  /* Insert MVCCID is also packed. */
32661  offset_to_delete_mvccid += OR_MVCCID_SIZE;
32662  }
32663  /* Set mvccid_ptr. */
32664  mvccid_ptr = record->data + offset_to_delete_mvccid;
32665 
32667  {
32668  /* Just replace the MVCCID. */
32669  btree_set_mvccid (record, offset_to_delete_mvccid, &delete_mvccid, rv_undo_data, rv_redo_data);
32670  }
32671  else
32672  {
32673  /* Insert delete MVCCID. */
32674  btree_add_mvccid (record, offset_to_object, offset_to_delete_mvccid, delete_mvccid, BTREE_OID_HAS_MVCC_DELID,
32675  rv_undo_data, rv_redo_data);
32676  }
32677 #if !defined (NDEBUG)
32678  btree_check_valid_record (thread_p, btid_int, record, node_type, NULL);
32679 #endif
32680 }
32681 
32682 /*
32683  * btree_record_replace_object () - Replace object in b-tree record.
32684  *
32685  * return : Void.
32686  * thread_p (in) : Thread entry.
32687  * btid_int (in) : B-tree info.
32688  * record (in) : B-tree record.
32689  * node_type (in) : Leaf or overflow node type.
32690  * offset_to_replaced_inout (in/out) : Offset in record to object being replaced. It will output offset to replacing
32691  * object.
32692  * replacement (in) : B-tree object info for replacement.
32693  * rv_undo_data (out) : Output undo data recovery for the change.
32694  * rv_redo_data (out) : Output undo data recovery for the change.
32695  */
32696 static void
32697 btree_record_replace_object (THREAD_ENTRY * thread_p, BTID_INT * btid_int, RECDES * record, BTREE_NODE_TYPE node_type,
32698  int *offset_to_replaced_inout, BTREE_OBJECT_INFO * replacement, char **rv_undo_data,
32699  char **rv_redo_data)
32700 {
32701  int old_object_size;
32702  int new_object_size;
32703  char *object_ptr;
32704  char *ptr = NULL;
32705 
32706  bool undo_logging = rv_undo_data != NULL && *rv_undo_data != NULL;
32707  bool redo_logging = rv_redo_data != NULL && *rv_redo_data != NULL;
32708 
32709  int offset_to_replaced;
32710 
32711  /* Assert expected arguments. */
32712  assert (btid_int != NULL);
32713  assert (record != NULL);
32714  assert (offset_to_replaced_inout != NULL);
32715  assert (replacement != NULL);
32716 
32717  offset_to_replaced = *offset_to_replaced_inout;
32718  assert (offset_to_replaced >= 0 && offset_to_replaced < record->length);
32719 
32720  if (node_type == BTREE_LEAF_NODE)
32721  {
32722  if (offset_to_replaced == 0)
32723  {
32724  /* First in leaf record. */
32725  btree_leaf_change_first_object (thread_p, record, btid_int, &replacement->oid, &replacement->class_oid,
32726  &replacement->mvcc_info, NULL, rv_undo_data, rv_redo_data);
32727  return;
32728  }
32729  /* Not first in leaf record. */
32730  if (BTREE_IS_UNIQUE (btid_int->unique_pk))
32731  {
32732  /* Fixed size objects. */
32733  new_object_size = BTREE_OBJECT_FIXED_SIZE (btid_int);
32734  old_object_size = new_object_size;
32735 
32736  /* Include all MVCC info. */
32737  BTREE_MVCC_INFO_SET_FIXED_SIZE (&replacement->mvcc_info);
32738  }
32739  else
32740  {
32741  /* Compute old and new object size. */
32742  /* Both have instance OID. */
32743  old_object_size = new_object_size = OR_OID_SIZE;
32744 
32745  /* Add old object MVCC info size. */
32746  old_object_size +=
32748  + offset_to_replaced));
32749 
32750  /* Add new object MVCC info size. */
32751  new_object_size += BTREE_GET_MVCC_INFO_SIZE_FROM_FLAGS (replacement->mvcc_info.flags);
32752  }
32753  /* Change the record. */
32754  object_ptr = record->data + offset_to_replaced;
32755 
32756  /* Undo logging. */
32757  if (undo_logging)
32758  {
32759  *rv_undo_data =
32760  log_rv_pack_undo_record_changes (*rv_undo_data, offset_to_replaced, old_object_size, new_object_size,
32761  object_ptr);
32762  }
32763 
32764  RECORD_MOVE_DATA (record, offset_to_replaced + new_object_size, offset_to_replaced + old_object_size);
32765  ptr = btree_pack_object (object_ptr, btid_int, node_type, record, replacement);
32766  assert (CAST_BUFLEN (ptr - object_ptr) == new_object_size);
32767 
32768  /* Redo logging. */
32769  if (redo_logging)
32770  {
32771  *rv_redo_data =
32772  log_rv_pack_redo_record_changes (*rv_redo_data, offset_to_replaced, old_object_size, new_object_size,
32773  object_ptr);
32774  }
32775 
32776 #if !defined (NDEBUG)
32777  (void) btree_check_valid_record (thread_p, btid_int, record, node_type, NULL);
32778 #endif
32779  }
32780  else
32781  {
32782  /* Object must be fixed size. */
32783  int fixed_object_size = BTREE_OBJECT_FIXED_SIZE (btid_int);
32784  BTREE_MVCC_INFO_SET_FIXED_SIZE (&replacement->mvcc_info);
32785 
32786  if (record->length == fixed_object_size)
32787  {
32788  /* Only one object. Just replace it. */
32789  assert (offset_to_replaced == 0);
32790  if (undo_logging)
32791  {
32792  /* Undo logging. */
32793  *rv_undo_data =
32794  log_rv_pack_undo_record_changes (*rv_undo_data, 0, fixed_object_size, fixed_object_size, record->data);
32795  }
32796  ptr = btree_pack_object (record->data, btid_int, node_type, record, replacement);
32797  assert (ptr == record->data + fixed_object_size);
32798  if (redo_logging)
32799  {
32800  /* Redo logging. */
32801  *rv_redo_data =
32802  log_rv_pack_redo_record_changes (*rv_redo_data, 0, fixed_object_size, fixed_object_size, record->data);
32803  }
32804 #if !defined (NDEBUG)
32805  (void) btree_check_valid_record (thread_p, btid_int, record, node_type, NULL);
32806 #endif
32807  }
32808  else
32809  {
32810  /* Remove old object and insert new ordered by OID. */
32811  btree_record_remove_object_internal (thread_p, btid_int, record, node_type, offset_to_replaced, rv_undo_data,
32812  rv_redo_data, NULL);
32813  btree_insert_object_ordered_by_oid (thread_p, record, btid_int, replacement, rv_undo_data, rv_redo_data,
32814  offset_to_replaced_inout);
32815  }
32816  }
32817 }
32818 
32819 /*
32820  * btree_get_creator_mvccid () - Get MVCCID of creator from root header.
32821  *
32822  * return : MVCCID of creator.
32823  * thread_p (in) : Thread entry.
32824  * root_page (in) : Root page.
32825  */
32826 static MVCCID
32828 {
32829  BTREE_ROOT_HEADER *root_header = NULL;
32830 
32831  assert (root_page != NULL);
32832 
32833  root_header = btree_get_root_header (thread_p, root_page);
32834  assert (root_header != NULL);
32835 
32836  return root_header->creator_mvccid;
32837 }
32838 
32839 /*
32840  * btree_rv_undo_mark_dealloc_page () - Undo marking index page as deallocated by setting its level back.
32841  *
32842  * return : Error code.
32843  * thread_p (in) : Thread entry.
32844  * rcv (in) : Recovery data.
32845  */
32846 int
32848 {
32849  BTREE_NODE_HEADER *node_header = btree_get_node_header (thread_p, rcv->pgptr);
32850 
32851  if (node_header == NULL)
32852  {
32853  assert (false);
32854  return ER_FAILED;
32855  }
32856 
32857  assert (rcv->length == sizeof (node_header->node_level));
32858  assert (sizeof (short) == sizeof (node_header->node_level));
32859 
32860  node_header->node_level = *(short *) rcv->data;
32861  pgbuf_set_dirty (thread_p, rcv->pgptr, DONT_FREE);
32862 
32863  return NO_ERROR;
32864 }
32865 
32866 /*
32867  * btree_hash_btid () - Create hash value from btid.
32868  *
32869  * return : Hash value
32870  * btid (in) : Pointer to b-tree ID.
32871  * hash_size (in) : Hash size.
32872  */
32873 unsigned int
32874 btree_hash_btid (void *btid, int hash_size)
32875 {
32876  return ((BTID *) btid)->vfid.fileid % hash_size;
32877 }
32878 
32879 /*
32880  * btree_create_file () - Create a b-tree file and allocate its root.
32881  *
32882  * return : Error code
32883  * thread_p (in) : Thread entry
32884  * class_oid (in) : Top class OID
32885  * attrid (in) : Attribute identifier
32886  * npages (in) : Number of pages
32887  * btid (out) : B-tree identifier
32888  *
32889  * todo: use table space.
32890  */
32891 int
32892 btree_create_file (THREAD_ENTRY * thread_p, const OID * class_oid, int attrid, BTID * btid)
32893 {
32894  FILE_DESCRIPTORS des;
32895  VPID vpid_root;
32896  TDE_ALGORITHM tde_algo = TDE_ALGORITHM_NONE;
32897 
32898  int error_code = NO_ERROR;
32899 
32900  memset (&des, 0, sizeof (des));
32901  des.btree.class_oid = *class_oid;
32902  des.btree.attr_id = attrid;
32903 
32904  error_code = file_create_with_npages (thread_p, FILE_BTREE, 1, &des, &btid->vfid);
32905  if (error_code != NO_ERROR)
32906  {
32907  ASSERT_ERROR ();
32908  return error_code;
32909  }
32910 
32911  error_code = heap_get_class_tde_algorithm (thread_p, class_oid, &tde_algo);
32912  if (error_code == NO_ERROR)
32913  {
32914  /*
32915  * It can happen to fail to get the class record.
32916  * For example, a class record that is assigned but not updated poperly yet.
32917  * In this case, Setting tde flag is just skipped and it is expected to be done later.
32918  * see file_apply_tde_to_class_files()
32919  */
32920  error_code = file_apply_tde_algorithm (thread_p, &btid->vfid, tde_algo);
32921  if (error_code != NO_ERROR)
32922  {
32923  ASSERT_ERROR ();
32924  return error_code;
32925  }
32926  }
32927  else
32928  {
32929  er_clear ();
32930  }
32931 
32932  /* index page allocations need to be committed. they are not individually deallocated on undo; all pages are
32933  * deallocated when the file is destroyed. */
32934  log_sysop_start (thread_p);
32935  error_code = file_alloc_sticky_first_page (thread_p, &btid->vfid, btree_initialize_new_page, NULL, &vpid_root, NULL);
32936  if (error_code != NO_ERROR)
32937  {
32938  ASSERT_ERROR ();
32939  log_sysop_abort (thread_p);
32940  return error_code;
32941  }
32942  if (vpid_root.volid != btid->vfid.volid)
32943  {
32944  /* should not happen */
32945  assert_release (false);
32946  log_sysop_abort (thread_p);
32947  return ER_FAILED;
32948  }
32949  btid->root_pageid = vpid_root.pageid;
32950 
32951  log_sysop_commit (thread_p);
32952  return NO_ERROR;
32953 }
32954 
32955 /*
32956  * btree_delete_sysop_end () - end system op used for b-tree delete based on purpose.
32957  *
32958  * return : void
32959  * thread_p (in) : thread entry
32960  * helper (in/out) : delete helper
32961  */
32962 STATIC_INLINE void
32964 {
32965  if (!helper->is_system_op_started)
32966  {
32967  assert_release (false);
32968  return;
32969  }
32970 
32971  switch (helper->purpose)
32972  {
32975  helper->rv_keyval_data_length, helper->rv_keyval_data);
32976  break;
32977 
32980  helper->rv_keyval_data_length, helper->rv_keyval_data);
32981  break;
32982 
32985  break;
32986 
32991  log_sysop_end_logical_compensate (thread_p, &helper->reference_lsa);
32992  break;
32993 
32995  /* system op to just vacuum insert MVCCID? not really expected. */
32996  assert (false);
32997  /* fall through to commit on release */
32998 
33001  log_sysop_commit (thread_p);
33002  break;
33003 
33004  default:
33005  assert_release (false);
33006  log_sysop_abort (thread_p);
33007  break;
33008  }
33009 
33010  helper->is_system_op_started = false;
33011 }
33012 
33013 /*
33014  * btree_insert_sysop_end () - end system op used for b-tree insert based on purpose.
33015  *
33016  * return : void
33017  * thread_p (in) : thread entry
33018  * helper (in/out) : insert helper
33019  */
33020 STATIC_INLINE void
33022 {
33023  if (!helper->is_system_op_started)
33024  {
33025  assert_release (false);
33026  return;
33027  }
33028 
33029  switch (helper->purpose)
33030  {
33032  assert (helper->rcvindex != RV_NOT_DEFINED);
33033  log_sysop_end_logical_undo (thread_p, helper->rcvindex, helper->leaf_addr.vfid, helper->rv_keyval_data_length,
33034  helper->rv_keyval_data);
33035  break;
33036 
33039  helper->rv_keyval_data_length, helper->rv_keyval_data);
33040  break;
33041 
33044  helper->rv_keyval_data_length, helper->rv_keyval_data);
33045  break;
33046 
33050  break;
33051 
33053  log_sysop_commit (thread_p);
33054  break;
33055 
33058  /* no system ops are expected! */
33059 
33060  default:
33061  assert_release (false);
33062  log_sysop_abort (thread_p);
33063  break;
33064  }
33065 
33066  helper->is_system_op_started = false;
33067 }
33068 
33069 /*
33070  * btree_purpose_to_string () - purpose to string
33071  *
33072  * return : string
33073  * purpose (in) : purpose
33074  */
33075 STATIC_INLINE const char *
33077 {
33078  switch (purpose)
33079  {
33081  return "BTREE_OP_INSERT_NEW_OBJECT";
33083  return "BTREE_OP_INSERT_UNDO_PHYSICAL_DELETE";
33085  return "BTREE_OP_INSERT_MVCC_DELID";
33087  return "BTREE_OP_INSERT_MARK_DELETED";
33089  return "BTREE_OP_DELETE_OBJECT_PHYSICAL";
33091  return "BTREE_OP_DELETE_OBJECT_PHYSICAL_POSTPONED";
33093  return "BTREE_OP_DELETE_UNDO_INSERT";
33095  return "BTREE_OP_DELETE_UNDO_INSERT_UNQ_MULTIUPD";
33097  return "BTREE_OP_DELETE_UNDO_INSERT_DELID";
33099  return "BTREE_OP_DELETE_VACUUM_INSID";
33101  return "BTREE_OP_DELETE_VACUUM_OBJECT";
33103  return "BTREE_OP_ONLINE_INDEX_TRAN_INSERT";
33105  return "BTREE_OP_ONLINE_INDEX_TRAN_INSERT_DF";
33107  return "BTREE_OP_ONLINE_INDEX_UNDO_TRAN_DELETE";
33109  return "case BTREE_OP_ONLINE_INDEX_TRAN_DELETE";
33111  return "BTREE_OP_ONLINE_INDEX_UNDO_TRAN_INSERT";
33113  return "BTREE_OP_ONLINE_INDEX_IB_INSERT";
33115  return "BTREE_OP_ONLINE_INDEX_IB_DELETE";
33116  default:
33117  assert (false);
33118  return "** UNKNOWN PURPOSE **";
33119  }
33120 }
33121 
33122 /*
33123  * btree_op_type_to_string () - operation type to string
33124  *
33125  * return : string
33126  * op_type (in) : operation type
33127  */
33128 STATIC_INLINE const char *
33130 {
33131  switch (op_type)
33132  {
33133  case SINGLE_ROW_INSERT:
33134  return "SINGLE_ROW_INSERT";
33135  case SINGLE_ROW_DELETE:
33136  return "SINGLE_ROW_DELETE";
33137  case SINGLE_ROW_UPDATE:
33138  return "SINGLE_ROW_UPDATE";
33139  case SINGLE_ROW_MODIFY:
33140  return "SINGLE_ROW_MODIFY";
33141  case MULTI_ROW_INSERT:
33142  return "MULTI_ROW_INSERT";
33143  case MULTI_ROW_DELETE:
33144  return "MULTI_ROW_DELETE";
33145  case MULTI_ROW_UPDATE:
33146  return "MULTI_ROW_UPDATE";
33147  default:
33148  assert (false);
33149  return "** UNKNOWN OP TYPE **";
33150  }
33151 }
33152 
33153 /*
33154  * btree_get_btree_node_type_from_page () -
33155  *
33156  * return:
33157  * page_ptr(in):
33158  *
33159  */
33162 {
33163  RECDES header_record;
33164  SPAGE_HEADER *page_header_p;
33165  int root_header_fixed_size = (int) offsetof (BTREE_ROOT_HEADER, packed_key_domain);
33166 
33167  assert (page_ptr != NULL);
33168 
33169  page_header_p = (SPAGE_HEADER *) page_ptr;
33170 
33171  if (page_header_p->num_slots <= 0 || spage_get_record (thread_p, page_ptr, HEADER, &header_record, PEEK) != S_SUCCESS)
33172  {
33173  return PERF_PAGE_BTREE_GENERIC;
33174  }
33175 
33176  if (header_record.length == sizeof (BTREE_OVERFLOW_HEADER))
33177  {
33178  return PERF_PAGE_BTREE_OVF;
33179  }
33180  else if (header_record.length == sizeof (BTREE_NODE_HEADER))
33181  {
33182  BTREE_NODE_HEADER *header;
33183 
33184  header = (BTREE_NODE_HEADER *) header_record.data;
33185  if (header != NULL)
33186  {
33187  if (header->node_level > 1)
33188  {
33189  return PERF_PAGE_BTREE_NONLEAF;
33190  }
33191  else
33192  {
33193  return PERF_PAGE_BTREE_LEAF;
33194  }
33195  }
33196  else
33197  {
33198  return PERF_PAGE_UNKNOWN;
33199  }
33200  }
33201  else
33202  {
33203  assert (header_record.length >= root_header_fixed_size);
33204 
33205  return PERF_PAGE_BTREE_ROOT;
33206  }
33207  return PERF_PAGE_BTREE_ROOT;
33208 }
33209 
33210 //
33211 // btree_online_index_check_state () - check online index state is valid
33212 //
33213 // state (in) : state
33214 //
33215 static inline void
33217 {
33220 }
33221 
33222 static inline bool
33224 {
33225  return state == BTREE_ONLINE_INDEX_INSERT_FLAG_STATE;
33226 }
33227 
33228 static inline bool
33230 {
33231  return state == BTREE_ONLINE_INDEX_DELETE_FLAG_STATE;
33232 }
33233 
33234 static inline bool
33236 {
33237  return state == BTREE_ONLINE_INDEX_NORMAL_FLAG_STATE;
33238 }
33239 
33240 static inline void
33242 {
33244 }
33245 
33246 static inline void
33248 {
33250 }
33251 
33252 static inline void
33254 {
33256 }
33257 
33258 //
33259 // btree_online_index_dispatcher () - dispatch online index operation: populate insert/delete helper and choose
33260 // appropriate root/traversal/leaf functions
33261 //
33262 // return : error code
33263 // thread_p (in) : thread entry
33264 // btid_int (in) : b-tree info
33265 // key (in) : key
33266 // class_oid (in) : class OID
33267 // oid (in) : instance OID
33268 // unique (in) : ... todo
33269 // purpose (in) : function purpose
33270 //
33271 int
33272 btree_online_index_dispatcher (THREAD_ENTRY * thread_p, BTID * btid, DB_VALUE * key, OID * cls_oid,
33273  OID * oid, int unique, BTREE_OP_PURPOSE purpose, LOG_LSA * undo_nxlsa)
33274 {
33275  btree_insert_list one_item_list (key, oid);
33276 
33277  return btree_online_index_list_dispatcher (thread_p, btid, cls_oid, &one_item_list, unique, purpose, undo_nxlsa);
33278 }
33279 
33280 //
33281 // btree_online_index_list_dispatcher () - dispatch online index operation with list mode
33282 //
33283 // return : error code
33284 // thread_p (in) : thread entry
33285 // btid_int (in) : b-tree info
33286 // class_oid (in) : class OID
33287 // insert_list (in) : list of pairs key, OID
33288 // unique (in) :
33289 // purpose (in) : function purpose
33290 // undo_nxlsa (in):
33291 //
33292 int
33293 btree_online_index_list_dispatcher (THREAD_ENTRY * thread_p, BTID * btid, OID * class_oid,
33294  btree_insert_list * insert_list, int unique, BTREE_OP_PURPOSE purpose,
33295  LOG_LSA * undo_nxlsa)
33296 {
33297  int error_code = NO_ERROR;
33298  /* Search key helper which will point to where data should inserted. */
33300  /* Processing key function: can insert an object or just a delete MVCCID. */
33301  BTREE_ROOT_WITH_KEY_FUNCTION *root_function = NULL;
33302  BTREE_ADVANCE_WITH_KEY_FUNCTION *advance_function = NULL;
33303  BTREE_PROCESS_KEY_FUNCTION *key_function = NULL;
33304  BTREE_HELPER helper;
33305  BTID_INT btid_int;
33306 
33307  DB_VALUE *key = insert_list->get_key ();
33308  OID *oid = insert_list->get_oid ();
33309 
33310  helper.insert_helper.insert_list = insert_list;
33311 
33312  /* Safe guards */
33313  assert (oid != NULL);
33314  assert (class_oid != NULL);
33318 
33319  /* Check for null keys. */
33320  if (DB_IS_NULL (key) || btree_multicol_key_is_null (key))
33321  {
33322  /* We do not store NULL keys but we track them for unique indexes. */
33323  if (BTREE_IS_UNIQUE (unique))
33324  {
33325  /* In this scenario, we have to write log for the update of local statistics, since we do not
33326  * log the physical operation of a NULL key.
33327  */
33329  {
33330  /* DELETE operation, we decrement oids and nulls. */
33331  logtb_tran_update_unique_stats (thread_p, btid, 0, -1, -1, true);
33332  }
33333  else
33334  {
33335  /* Insert operation, we increment oids and nulls. */
33336  logtb_tran_update_unique_stats (thread_p, btid, 0, 1, 1, true);
33337  }
33338  }
33339 
33340  return NO_ERROR;
33341  }
33342 
33343  /* Save OID, class OID and MVCC info in insert helper. */
33344  COPY_OID (BTREE_INSERT_OID (&helper.insert_helper), oid);
33345  COPY_OID (BTREE_DELETE_OID (&helper.delete_helper), oid);
33346  if (class_oid != NULL)
33347  {
33348  COPY_OID (BTREE_INSERT_CLASS_OID (&helper.insert_helper), class_oid);
33349  COPY_OID (BTREE_DELETE_CLASS_OID (&helper.delete_helper), class_oid);
33350  }
33351  else
33352  {
33355  }
33356 
33357  if (undo_nxlsa != NULL)
33358  {
33359  LSA_COPY (&helper.insert_helper.compensate_undo_nxlsa, undo_nxlsa);
33360  LSA_COPY (&helper.delete_helper.reference_lsa, undo_nxlsa);
33361  }
33362 
33365 
33366  switch (purpose)
33367  {
33369  /* This is an insert done by the index builder. */
33371  helper.insert_helper.purpose = purpose;
33372  root_function = btree_fix_root_for_insert;
33373  advance_function = btree_split_node_and_advance;
33375  break;
33376 
33380  helper.insert_helper.purpose = purpose;
33381  root_function = btree_fix_root_for_insert;
33382  advance_function = btree_split_node_and_advance;
33383  key_function = btree_key_online_index_tran_insert;
33384  break;
33385 
33389  helper.delete_helper.purpose = purpose;
33390  root_function = btree_fix_root_for_delete;
33391  advance_function = btree_merge_node_and_advance;
33392  key_function = btree_key_online_index_tran_delete;
33393 
33394  error_code =
33395  btree_search_key_and_apply_functions (thread_p, btid, &btid_int, key, root_function, &helper.delete_helper,
33396  advance_function, &helper.delete_helper, key_function, &helper,
33397  &search_key, NULL);
33398 
33399  if (error_code == NO_ERROR && search_key.result == BTREE_KEY_NOTFOUND)
33400  {
33401  /* We failed to find the object in the index. We must traverse again the btree and treat the operation
33402  * as an insert with DELETE_FLAG set.
33403  */
33404  helper.insert_helper.purpose = purpose;
33407  {
33409  }
33410  root_function = btree_fix_root_for_insert;
33411  advance_function = btree_split_node_and_advance;
33413  break; // Fall through.
33414  }
33415  else
33416  {
33417  goto end;
33418  }
33419 
33420  default:
33421  /* This should never happen. */
33422  assert (false);
33423  return ER_FAILED;
33424  }
33425 
33426  error_code =
33427  btree_search_key_and_apply_functions (thread_p, btid, &btid_int, key, root_function, &helper.insert_helper,
33428  advance_function, &helper.insert_helper, key_function, &helper, &search_key,
33429  NULL);
33430 
33431 end:
33432 
33433  if (helper.insert_helper.printed_key != NULL)
33434  {
33435  db_private_free (thread_p, helper.insert_helper.printed_key);
33436  }
33437 
33439  {
33440  db_private_free (thread_p, helper.delete_helper.printed_key);
33441  }
33442 
33443  return error_code;
33444 }
33445 
33446 /*
33447  * btree_key_online_index_IB_insert_list () - BTREE_PROCESS_KEY_FUNCTION used for inserting a new object in b-tree during
33448  * online index loading.
33449  *
33450  * return : Error code.
33451  * thread_p (in) : Thread entry.
33452  * btid_int (in) : B-tree info.
33453  * key (int) : Key info
33454  * leaf_page (in) : Pointer to the leaf page.
33455  * search_key (in) : Search helper
33456  * restart (in/out): Restart
33457  * args (in/out) : BTREE_INSERT_HELPER *.
33458  */
33459 int
33461  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
33462  void *other_args)
33463 {
33464  BTREE_HELPER *helper = (BTREE_HELPER *) other_args;
33465  btree_insert_list *insert_list = helper->insert_helper.insert_list;
33466  DB_VALUE *curr_key;
33467  int error_code = NO_ERROR;
33468  bool first_insert = true;
33469 
33470  curr_key = key;
33471 
33472  assert (insert_list->m_key_type == btid_int->key_type);
33473 
33474  insert_list->m_keep_page_iterations = 0;
33475  insert_list->m_ovf_appends = 0;
33476  insert_list->m_ovf_appends_new_page = 0;
33477  PERF_UTIME_TRACKER time_insert_same_leaf = PERF_UTIME_TRACKER_INITIALIZER;
33478  PERF_UTIME_TRACKER_START (thread_p, &time_insert_same_leaf);
33479 
33480  while (1)
33481  {
33482  error_code = btree_key_online_index_IB_insert (thread_p, btid_int, curr_key, leaf_page, search_key, restart,
33483  other_args);
33484  if (error_code != NO_ERROR)
33485  {
33486  ASSERT_ERROR ();
33487  break;
33488  }
33489 
33491  {
33492  assert (insert_list->m_keys_oids.size () == 1);
33493  break;
33494  }
33495 
33497  if (!first_insert)
33498  {
33500  }
33501 
33502  if (insert_list->next_key () != btree_insert_list::KEY_AVAILABLE)
33503  {
33504  /* no more keys in list */
33505  break;
33506  }
33507 
33508  /* prepare next pair (key, oid) */
33509  COPY_OID (BTREE_INSERT_OID (&helper->insert_helper), insert_list->get_oid ());
33510  curr_key = insert_list->get_key ();
33511 
33512  int key_len = btree_get_disk_size_of_key (curr_key);
33513  BTREE_NODE_HEADER *node_header = btree_get_node_header (thread_p, *leaf_page);
33514 
33515  if (key_len > node_header->max_key_len)
33516  {
33517  /* cannot insert a key having len > max key len : abort and let advance/split algorithm to deal with this */
33519  break;
33520  }
33521 
33522  /* assuming the key does not exist in page (an existing key requires less space,
33523  * we may miss adding one more record; this is a less expensive check, we accept the 'loss' */
33524  bool key_already_in_page = false;
33525  int new_ent_size = btree_get_max_new_data_size (thread_p, btid_int, *leaf_page, BTREE_LEAF_NODE, key_len,
33526  &helper->insert_helper, key_already_in_page);
33527  if (new_ent_size > spage_get_free_space_without_saving (thread_p, *leaf_page, NULL))
33528  {
33529  /* no more space in page */
33531  break;
33532  }
33533 
33534  /* compare with boundary keys : NULL keys means INF bound, no check is required */
33535  if (!insert_list->m_boundaries.m_is_inf_left_key)
33536  {
33538  c = btree_compare_key (&insert_list->m_boundaries.m_left_key, curr_key, btid_int->key_type, 1, 1, NULL);
33539  if (c != DB_LT && c != DB_EQ)
33540  {
33542  break;
33543  }
33544  }
33545 
33546  if (!insert_list->m_boundaries.m_is_inf_right_key)
33547  {
33549  c = btree_compare_key (curr_key, &insert_list->m_boundaries.m_right_key, btid_int->key_type, 1, 1, NULL);
33550  if (c != DB_LT)
33551  {
33553  break;
33554  }
33555  }
33556 
33557  /* early filter-out of out-page-range key : compare with min/max of page
33558  * it also has the purpose of silencing the debug assertion of btree_search_leaf_page;
33559  * after this, the 'search_key' structure is incomplete (slot id will be computed by btree_search_leaf_page) */
33561  {
33562  error_code = btree_leaf_is_key_between_min_max (thread_p, btid_int, *leaf_page, curr_key, search_key);
33563  if (error_code != NO_ERROR)
33564  {
33565  ASSERT_ERROR ();
33566  break;
33567  }
33568 
33569  if (search_key->result == BTREE_ERROR_OCCURRED || search_key->result == BTREE_KEY_SMALLER
33570  || search_key->result == BTREE_KEY_BIGGER)
33571  {
33572  if (search_key->result == BTREE_KEY_SMALLER && VPID_ISNULL (&node_header->prev_vpid))
33573  {
33574  /* key is out of range (smaller), but since there is no leaf page to the left, we may continue */
33575  ;
33576  }
33577  else if (search_key->result == BTREE_KEY_BIGGER && VPID_ISNULL (&node_header->next_vpid))
33578  {
33579  /* key is out of range (bigger), but since there is no leaf page to the right, we may continue */
33580  ;
33581  }
33582  else
33583  {
33584  /* key is out of range (smaller or bigger) and the current leaf page has neighbours :
33585  * abort and search from root */
33587  break;
33588  }
33589  }
33590  }
33591 
33592  /* resolution of where to insert : slot, position relative to this slot and if page has fence keys */
33593  error_code = btree_search_leaf_page (thread_p, btid_int, *leaf_page, curr_key, search_key);
33594  if (error_code != NO_ERROR)
33595  {
33596  ASSERT_ERROR ();
33597  break;
33598  }
33599 
33600  if ((search_key->result == BTREE_KEY_BIGGER || search_key->result == BTREE_KEY_SMALLER)
33602  {
33603  /* key is out of range and presence of fence key suggests that next/prev leaf page should be
33604  * a better place; no fence means current key is bigger/lesser than all index keys and we can insert here
33605  * (this is backed-up by key page boundaries checked before) */
33607  break;
33608  }
33609  else if (search_key->result != BTREE_KEY_BETWEEN && search_key->result != BTREE_KEY_FOUND
33610  && search_key->result != BTREE_KEY_BIGGER && search_key->result != BTREE_KEY_SMALLER)
33611  {
33612  /* unexpected, abort insert and retry from root page */
33613  assert (false);
33614  break;
33615  }
33616 
33617  first_insert = false;
33618  insert_list->m_keep_page_iterations++;
33619 
33620  if (insert_list->check_release_latch (thread_p, &helper->insert_helper, *leaf_page) == true)
33621  {
33623  break;
33624  }
33625  }
33626 
33627  insert_list->reset_boundary_keys ();
33628 
33629  PERF_UTIME_TRACKER_TIME (thread_p, &time_insert_same_leaf, PSTAT_BT_ONLINE_INSERT_LEAF);
33630 
33631  return error_code;
33632 }
33633 
33634 /*
33635  * btree_key_online_index_IB_insert () - BTREE_PROCESS_KEY_FUNCTION used for inserting a new object in b-tree during
33636  * online index loading.
33637  *
33638  * return : Error code.
33639  * thread_p (in) : Thread entry.
33640  * btid_int (in) : B-tree info.
33641  * key (int) : Key info
33642  * leaf_page (in) : Pointer to the leaf page.
33643  * search_key (in) : Search helper
33644  * restart (in/out): Restart
33645  * args (in/out) : BTREE_INSERT_HELPER *.
33646  */
33647 int
33649  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
33650  void *other_args)
33651 {
33652  BTREE_HELPER *helper = (BTREE_HELPER *) other_args;
33653  int error_code = NO_ERROR; /* Error code. */
33654  RECDES record; /* Record descriptor for leaf key record. */
33655  LEAF_REC leaf_info; /* Leaf record info. */
33656  int offset_after_key; /* Offset in record data where packed key is ended. */
33657  bool dummy_clear_key; /* Dummy field used as argument for btree_read_record. */
33658  PAGE_PTR page_found = NULL;
33659  int offset_to_object = 0;
33661  PAGE_PTR prev_page = NULL;
33662  BTREE_NODE_TYPE node_type;
33663  /* Redo recovery structures. */
33664  char rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
33665  char *rv_redo_data = PTR_ALIGN (rv_redo_data_buffer, BTREE_MAX_ALIGN);
33667  int rv_redo_data_length = 0;
33668  LOG_DATA_ADDR addr;
33669  LOG_LSA prev_lsa;
33670  PGSLOTID slotid;
33671  char rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
33672  char new_rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
33673  RECDES new_record;
33674  int n_keys = 0;
33675  int n_oids = 0;
33676 
33677  record.data = PTR_ALIGN (rec_buf, BTREE_MAX_ALIGN);
33678  record.area_size = IO_MAX_PAGE_SIZE;
33679 
33680  new_record.data = PTR_ALIGN (new_rec_buf, BTREE_MAX_ALIGN);
33681  new_record.area_size = IO_MAX_PAGE_SIZE;
33682 
33683  /* Redo logging. */
33686 
33687  helper->insert_helper.leaf_addr.offset = search_key->slotid;
33688  helper->insert_helper.leaf_addr.pgptr = *leaf_page;
33689  helper->insert_helper.leaf_addr.vfid = &btid_int->sys_btid->vfid;
33690 
33691  /* We are in leaf level now, and we must inspect if we have found the OID inside the key. */
33692  if (search_key->result == BTREE_KEY_FOUND)
33693  {
33694  /* Get the record. */
33695  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &record, COPY) != S_SUCCESS)
33696  {
33697  assert_release (false);
33698  error_code = ER_FAILED;
33699  return error_code;
33700  }
33701 
33702  /* Read the record. */
33703  error_code =
33704  btree_read_record (thread_p, btid_int, *leaf_page, &record, NULL, &leaf_info, BTREE_LEAF_NODE,
33705  &dummy_clear_key, &offset_after_key, PEEK_KEY_VALUE, NULL);
33706  if (error_code != NO_ERROR)
33707  {
33708  ASSERT_ERROR ();
33709  return error_code;
33710  }
33711 
33712  error_code =
33713  btree_find_oid_with_page_and_record (thread_p, btid_int, &helper->insert_helper.obj_info.oid, *leaf_page,
33714  helper->insert_helper.purpose, NULL, &record, &leaf_info,
33715  offset_after_key, &page_found, &prev_page, &offset_to_object,
33716  &btree_mvcc_info, &new_record);
33717  if (error_code != NO_ERROR)
33718  {
33719  ASSERT_ERROR ();
33720  goto end;
33721  }
33722 
33723  node_type = (page_found == *leaf_page) ? BTREE_LEAF_NODE : BTREE_OVERFLOW_NODE;
33724 
33725  if (node_type == BTREE_OVERFLOW_NODE)
33726  {
33727  slotid = 1;
33728  }
33729  else
33730  {
33731  slotid = search_key->slotid;
33732  }
33733 
33734  if (offset_to_object != NOT_FOUND)
33735  {
33736  /* Inspect the object and its MVCC_INFO. */
33737  /* This is the index builder, therefore if there is already an OID that matches the one that needs to be
33738  * inserted, then the already inserted one should have either DELETE_FLAG or INSERT_FLAG set.
33739  */
33741 
33743  {
33744  /* INSERT_FLAG is set. It means we have to remove the flag, according to the state machine. */
33746 
33747  /* Prepare logging data. */
33748  addr.offset = slotid;
33749  addr.pgptr = page_found;
33750  addr.vfid = &btid_int->sys_btid->vfid;
33751 
33752  if (node_type == BTREE_OVERFLOW_NODE)
33753  {
33755  }
33757 
33758  btree_online_index_change_state (thread_p, btid_int, &new_record, node_type, offset_to_object,
33759  btree_mvcc_info.insert_mvccid, NULL,
33760  &helper->insert_helper.rv_redo_data_ptr);
33761 
33762  /* Add the logged info. */
33763  /* Update in page. */
33764  if (spage_update (thread_p, page_found, slotid, &new_record) != SP_SUCCESS)
33765  {
33766  /* Unexpected. */
33767  assert_release (false);
33768  error_code = ER_FAILED;
33769  goto end;
33770  }
33771 
33773 
33774  /* We need to log previous lsa. */
33775  LSA_COPY (&prev_lsa, pgbuf_get_lsa (page_found));
33776 
33777  /* Logging. */
33779  rv_redo_data_length);
33780  log_append_redo_data (thread_p, RVBT_RECORD_MODIFY_NO_UNDO, &addr, rv_redo_data_length,
33781  helper->insert_helper.rv_redo_data);
33782 
33783  btree_insert_log (&helper->insert_helper,
33784  BTREE_INSERT_MODIFY_MSG ("IB insert change from INSERT_FLAG to NORMAL_STATE"),
33785  BTREE_INSERT_MODIFY_ARGS (thread_p, &helper->insert_helper, page_found, &prev_lsa,
33786  node_type == BTREE_LEAF_NODE, slotid, new_record.length,
33787  btid_int->sys_btid));
33788 
33790 
33791  pgbuf_set_dirty (thread_p, page_found, DONT_FREE);
33792 
33793  goto end;
33794  }
33795  else
33796  {
33798 
33802  assert (helper->delete_helper.rv_keyval_data == NULL); // otherwise, it will be leaked.
33803 
33804  if (btree_is_single_object_key (thread_p, btid_int, node_type, &new_record, offset_after_key))
33805  {
33806  /* Only one OID in the key, we will remove the key as well. */
33807  n_keys = -1;
33808  }
33809  n_oids = -1;
33810 
33811  error_code =
33812  btree_key_remove_object (thread_p, key, btid_int, &helper->delete_helper, *leaf_page, &record,
33813  &leaf_info, offset_after_key, search_key, &page_found, prev_page,
33814  node_type, offset_to_object);
33815  goto end;
33816  }
33817  }
33818  else
33819  {
33820  /* Key was found but the object wasn't. We must append the object to the current key. */
33821 
33822  /* Safeguards. */
33823  assert (search_key->result == BTREE_KEY_FOUND && offset_to_object == NOT_FOUND);
33824 
33825  n_oids = 1;
33826 
33827  error_code =
33828  btree_key_append_object_non_unique (thread_p, btid_int, key, *leaf_page, search_key, &new_record,
33829  offset_after_key, &leaf_info, &helper->insert_helper.obj_info,
33830  &helper->insert_helper);
33831  }
33832  }
33833  else
33834  {
33835  /* Key was not found, we must insert it. */
33836  n_keys = 1;
33837  n_oids = 1;
33838 
33839  error_code = btree_key_insert_new_key (thread_p, btid_int, key, *leaf_page, &helper->insert_helper, search_key);
33840  }
33841 
33842 end:
33843  if (error_code == NO_ERROR && BTREE_IS_UNIQUE (btid_int->unique_pk))
33844  {
33845  logtb_tran_update_unique_stats (thread_p, btid_int->sys_btid, n_keys, n_oids, 0, false);
33846  }
33847 
33848  if (page_found != NULL && page_found != *leaf_page)
33849  {
33850  pgbuf_unfix_and_init (thread_p, page_found);
33851  }
33852 
33853  if (prev_page != NULL && prev_page != *leaf_page)
33854  {
33855  pgbuf_unfix_and_init (thread_p, prev_page);
33856  }
33857 
33858  return error_code;
33859 }
33860 
33861 /*
33862  * btree_key_online_index_tran_insert () - BTREE_PROCESS_KEY_FUNCTION used for inserting a new object
33863  * in b-tree during online index loading.
33864  *
33865  * return : Error code.
33866  * thread_p (in) : Thread entry.
33867  * btid_int (in) : B-tree info.
33868  * key (int) : Key info
33869  * leaf_page (in) : Pointer to the leaf page.
33870  * search_key (in) : Search helper
33871  * restart (in/out): Restart
33872  * args (in/out) : BTREE_INSERT_HELPER *.
33873  */
33874 static int
33876  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
33877  void *other_args)
33878 {
33879  BTREE_HELPER *helper = (BTREE_HELPER *) other_args;
33880  int error_code = NO_ERROR; /* Error code. */
33881  RECDES record; /* Record descriptor for leaf key record. */
33882  LEAF_REC leaf_info; /* Leaf record info. */
33883  int offset_after_key; /* Offset in record data where packed key is ended. */
33884  bool dummy_clear_key; /* Dummy field used as argument for btree_read_record. */
33885  PAGE_PTR page_found = NULL;
33886  int offset_to_object = 0;
33888  BTREE_NODE_TYPE node_type;
33889  RECDES new_record;
33890  PGSLOTID slotid;
33891  LOG_LSA prev_lsa;
33892 
33893  LOG_DATA_ADDR addr;
33894 
33895  char rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
33896  record.data = PTR_ALIGN (rec_buf, BTREE_MAX_ALIGN);
33897  record.area_size = IO_MAX_PAGE_SIZE;
33898 
33899  char new_rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
33900  new_record.data = PTR_ALIGN (new_rec_buf, BTREE_MAX_ALIGN);
33901  new_record.area_size = IO_MAX_PAGE_SIZE;
33902 
33903  char *rv_undo_data = NULL;
33904  int rv_undo_data_capacity = IO_MAX_PAGE_SIZE;
33905  char rv_undo_data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
33906  char *rv_undo_data_bufalign = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
33907 
33908  char rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
33909  char *rv_redo_data = PTR_ALIGN (rv_redo_data_buffer, BTREE_MAX_ALIGN);
33911  int rv_redo_data_length = 0;
33912 
33915 
33916  helper->insert_helper.leaf_addr.offset = search_key->slotid;
33917  helper->insert_helper.leaf_addr.pgptr = *leaf_page;
33918  helper->insert_helper.leaf_addr.vfid = &btid_int->sys_btid->vfid;
33919 
33920  helper->insert_helper.rv_keyval_data = rv_undo_data_bufalign;
33921 
33922  /* Undo logging. */
33925  {
33926  error_code =
33928  BTREE_INSERT_OID (&helper->insert_helper),
33930  rv_undo_data_bufalign, &helper->insert_helper.rv_keyval_data,
33931  &rv_undo_data_capacity, &helper->insert_helper.rv_keyval_data_length);
33932  if (error_code != NO_ERROR)
33933  {
33934  ASSERT_ERROR ();
33935  goto end;
33936  }
33937  }
33938 
33939  /* We are in leaf level now, and we must inspect if we have found the OID inside the key. */
33940  if (search_key->result == BTREE_KEY_FOUND)
33941  {
33942  /* We search the key for the OID. If we find it, we should find it with DELETE_FLAG set, therefore we must
33943  * delete it in place.
33944  */
33945 
33946  /* Get the record. */
33947  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &record, COPY) != S_SUCCESS)
33948  {
33949  assert_release (false);
33950  error_code = ER_FAILED;
33951  goto end;
33952  }
33953 
33954  /* Read the record. */
33955  error_code =
33956  btree_read_record (thread_p, btid_int, *leaf_page, &record, NULL, &leaf_info, BTREE_LEAF_NODE,
33957  &dummy_clear_key, &offset_after_key, PEEK_KEY_VALUE, NULL);
33958  if (error_code != NO_ERROR)
33959  {
33960  ASSERT_ERROR ();
33961  goto end;
33962  }
33963 
33964  error_code =
33965  btree_find_oid_with_page_and_record (thread_p, btid_int, &helper->insert_helper.obj_info.oid, *leaf_page,
33966  helper->insert_helper.purpose, NULL, &record, &leaf_info, offset_after_key,
33967  &page_found, NULL, &offset_to_object, &btree_mvcc_info, &new_record);
33968 
33969  if (error_code != NO_ERROR)
33970  {
33971  ASSERT_ERROR ();
33972  goto end;
33973  }
33974 
33975  node_type = (page_found == *leaf_page) ? BTREE_LEAF_NODE : BTREE_OVERFLOW_NODE;
33976 
33977  if (offset_to_object != NOT_FOUND)
33978  {
33979  /* Inspect the key and its MVCC_INFO. This is the transactional insert, which means that if we can find the
33980  * object, then the object must have DELETE_FLAG set.
33981  */
33984 
33985  /* Here we must change the state to insert flag. */
33986  if (node_type == BTREE_LEAF_NODE)
33987  {
33988  slotid = search_key->slotid;
33989  }
33990  else
33991  {
33992  slotid = 1;
33993  }
33994 
33995  /* Prepare logging. */
33996  addr.offset = slotid;
33997  addr.pgptr = page_found;
33998  addr.vfid = &btid_int->sys_btid->vfid;
33999 
34000  /* Redo logging. */
34001  if (node_type == BTREE_OVERFLOW_NODE)
34002  {
34004  }
34006 
34007  /* Set the new state to INSERT_FLAG. */
34009 
34010  /* Change the state of the record. */
34011  btree_online_index_change_state (thread_p, btid_int, &new_record, node_type, offset_to_object,
34012  btree_mvcc_info.insert_mvccid, NULL, &rv_redo_data_ptr);
34013 
34014  if (spage_update (thread_p, page_found, slotid, &new_record) != SP_SUCCESS)
34015  {
34016  assert_release (false);
34017  error_code = ER_FAILED;
34018  goto end;
34019  }
34020 
34021  /* We need to log previous lsa. */
34022  LSA_COPY (&prev_lsa, pgbuf_get_lsa (page_found));
34023 
34024  /* Logging. */
34025  BTREE_RV_GET_DATA_LENGTH (rv_redo_data_ptr, rv_redo_data, rv_redo_data_length);
34026 
34027  btree_insert_log (&helper->insert_helper,
34028  BTREE_INSERT_MODIFY_MSG ("Tran insert change from DELETE_FLAG to INSERT_FLAG"),
34029  BTREE_INSERT_MODIFY_ARGS (thread_p, &helper->insert_helper, page_found, &prev_lsa,
34030  node_type == BTREE_LEAF_NODE, slotid, new_record.length,
34031  btid_int->sys_btid));
34032 
34033  btree_rv_log_insert_object (thread_p, helper->insert_helper, addr, 0, rv_redo_data_length, NULL,
34034  rv_redo_data);
34035 
34036  pgbuf_set_dirty (thread_p, page_found, DONT_FREE);
34037 
34038  goto end;
34039  }
34040  else
34041  {
34042  /* Key was found but the object wasn't. We must append the object to the current key. */
34043  /* Safeguards. */
34044  assert (search_key->result == BTREE_KEY_FOUND && offset_to_object == NOT_FOUND);
34045 
34046  error_code =
34047  btree_key_append_object_non_unique (thread_p, btid_int, key, *leaf_page, search_key, &new_record,
34048  offset_after_key, &leaf_info, &helper->insert_helper.obj_info,
34049  &helper->insert_helper);
34050  if (error_code == NO_ERROR && BTREE_IS_UNIQUE (btid_int->unique_pk))
34051  {
34052  // Append a single object.
34053  logtb_tran_update_unique_stats (thread_p, btid_int->sys_btid, 0, 1, 0, false);
34054  }
34055 
34056  goto end;
34057  }
34058  }
34059  else
34060  {
34061  /* Key was not found, we must insert it. */
34062  error_code = btree_key_insert_new_key (thread_p, btid_int, key, *leaf_page, &helper->insert_helper, search_key);
34063  if (error_code == NO_ERROR && BTREE_IS_UNIQUE (btid_int->unique_pk))
34064  {
34065  /* Insert a key with an object. */
34066  logtb_tran_update_unique_stats (thread_p, btid_int->sys_btid, 1, 1, 0, false);
34067  }
34068 
34069  goto end;
34070  }
34071 
34072 end:
34073  if (helper->insert_helper.rv_keyval_data != NULL && helper->insert_helper.rv_keyval_data != rv_undo_data_bufalign)
34074  {
34076  }
34077  helper->insert_helper.rv_keyval_data = NULL;
34079 
34080  if (page_found != NULL && page_found != *leaf_page)
34081  {
34082  pgbuf_unfix_and_init (thread_p, page_found);
34083  }
34084 
34085  return error_code;
34086 }
34087 
34088 /*
34089  * btree_key_online_index_tran_delete () - BTREE_PROCESS_KEY_FUNCTION used for deleting an object
34090  * in b-tree during online index loading.
34091  *
34092  * return : Error code.
34093  * thread_p (in) : Thread entry.
34094  * btid_int (in) : B-tree info.
34095  * key (int) : Key info
34096  * leaf_page (in) : Pointer to the leaf page.
34097  * search_key (in) : Search helper
34098  * restart (in/out): Restart
34099  * args (in/out) : BTREE_INSERT_HELPER *.
34100  */
34101 static int
34103  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
34104  void *other_args)
34105 {
34106  BTREE_HELPER *helper = (BTREE_HELPER *) other_args;
34107  int error_code = NO_ERROR; /* Error code. */
34108  RECDES record; /* Record descriptor for leaf key record. */
34109  LEAF_REC leaf_info; /* Leaf record info. */
34110  int offset_after_key; /* Offset in record data where packed key is ended. */
34111  bool dummy_clear_key; /* Dummy field used as argument for btree_read_record. */
34112  PAGE_PTR page_found = NULL;
34113  int offset_to_object = 0;
34115  PAGE_PTR prev_page = NULL;
34116  BTREE_NODE_TYPE node_type;
34117  char *rv_dummy_undo_data = NULL;
34118  char rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
34119 
34120  LOG_DATA_ADDR addr;
34121  LOG_LSA prev_lsa;
34122  PGSLOTID slotid;
34123  RECDES new_record;
34124 
34125  char new_rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
34126  new_record.data = PTR_ALIGN (new_rec_buf, BTREE_MAX_ALIGN);
34127  new_record.area_size = IO_MAX_PAGE_SIZE;
34128 
34129  record.data = PTR_ALIGN (rec_buf, BTREE_MAX_ALIGN);
34130  record.area_size = IO_MAX_PAGE_SIZE;
34131 
34132  char *rv_undo_data = NULL;
34133  int rv_undo_data_capacity = IO_MAX_PAGE_SIZE;
34134  char rv_undo_data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
34135  char *rv_undo_data_bufalign = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
34136 
34137  char rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
34138  char *rv_redo_data = PTR_ALIGN (rv_redo_data_buffer, BTREE_MAX_ALIGN);
34140  int rv_redo_data_length = 0;
34141 
34142  int n_keys = 0;
34143  int n_oids = 0;
34144 
34145  int key_len;
34146 
34147  bool switched_to_insert_helper = false;
34148 
34149  helper->delete_helper.rv_keyval_data = rv_undo_data_bufalign;
34151  {
34152  error_code =
34154  BTREE_DELETE_OID (&helper->delete_helper),
34156  rv_undo_data_bufalign, &helper->delete_helper.rv_keyval_data,
34157  &rv_undo_data_capacity, &helper->delete_helper.rv_keyval_data_length);
34158 
34159  if (error_code != NO_ERROR)
34160  {
34161  ASSERT_ERROR ();
34162  goto end;
34163  }
34164  }
34165 
34166  helper->delete_helper.leaf_addr.offset = search_key->slotid;
34167  helper->delete_helper.leaf_addr.pgptr = *leaf_page;
34168  helper->delete_helper.leaf_addr.vfid = &btid_int->sys_btid->vfid;
34169 
34172 
34173  /* We are in leaf level now, and we must inspect if we have found the OID inside the key. */
34174  if (search_key->result == BTREE_KEY_FOUND)
34175  {
34176  /* We search the key for the OID. If we find it, we should find it with DELETE_FLAG set, therefore we must
34177  * delete it in place.
34178  */
34179 
34180  /* Get the record. */
34181  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &record, COPY) != S_SUCCESS)
34182  {
34183  assert_release (false);
34184  error_code = ER_FAILED;
34185  goto end;
34186  }
34187 
34188  /* Read the record. */
34189  error_code =
34190  btree_read_record (thread_p, btid_int, *leaf_page, &record, NULL, &leaf_info, BTREE_LEAF_NODE,
34191  &dummy_clear_key, &offset_after_key, PEEK_KEY_VALUE, NULL);
34192  if (error_code != NO_ERROR)
34193  {
34194  ASSERT_ERROR ();
34195  goto end;
34196  }
34197 
34198  error_code =
34199  btree_find_oid_with_page_and_record (thread_p, btid_int, &helper->delete_helper.object_info.oid, *leaf_page,
34200  helper->delete_helper.purpose, NULL, &record, &leaf_info, offset_after_key,
34201  &page_found, &prev_page, &offset_to_object, &btree_mvcc_info, &new_record);
34202  if (error_code != NO_ERROR)
34203  {
34204  ASSERT_ERROR ();
34205  goto end;
34206  }
34207 
34208  node_type = (page_found == *leaf_page) ? BTREE_LEAF_NODE : BTREE_OVERFLOW_NODE;
34209 
34210  if (offset_to_object != NOT_FOUND)
34211  {
34212  /* Inspect the key and its MVCC_INFO. If we find the object, then the object should have either INSERT_FLAG
34213  * set, or it should be without any flags set.
34214  */
34216 
34217  if (node_type == BTREE_LEAF_NODE)
34218  {
34219  slotid = search_key->slotid;
34220  }
34221  else
34222  {
34223  slotid = 1;
34224  }
34225 
34227  {
34228  /* Insert flag set. We must change the flag to DELETE_FLAG. */
34229 
34230  /* Prepare Logging. */
34231  addr.pgptr = page_found;
34232  addr.offset = slotid;
34233  addr.vfid = &btid_int->sys_btid->vfid;
34234 
34235  /* Redo logging. */
34236  if (node_type == BTREE_OVERFLOW_NODE)
34237  {
34239  }
34241 
34242  /* Set the new state to DELETE_FLAG. */
34244 
34245  /* Change the state of the record. */
34246  btree_online_index_change_state (thread_p, btid_int, &new_record, node_type, offset_to_object,
34247  btree_mvcc_info.insert_mvccid, NULL, &rv_redo_data_ptr);
34248 
34249  if (spage_update (thread_p, page_found, slotid, &new_record) != SP_SUCCESS)
34250  {
34251  assert_release (false);
34252  error_code = ER_FAILED;
34253  goto end;
34254  }
34255 
34256  /* We need to log previous lsa. */
34257  LSA_COPY (&prev_lsa, pgbuf_get_lsa (page_found));
34258 
34259  /* Logging. */
34260  BTREE_RV_GET_DATA_LENGTH (rv_redo_data_ptr, rv_redo_data, rv_redo_data_length);
34261 
34262  btree_delete_log (&helper->delete_helper,
34263  BTREE_DELETE_MODIFY_MSG ("Tran delete change from INSERT_FLAG to DELETE_FLAG"),
34264  BTREE_DELETE_MODIFY_ARGS (thread_p, &helper->delete_helper, page_found, &prev_lsa,
34265  node_type == BTREE_LEAF_NODE, slotid, new_record.length,
34266  btid_int->sys_btid));
34267 
34268  btree_rv_log_delete_object (thread_p, helper->delete_helper, addr, 0,
34269  rv_redo_data_length, NULL, rv_redo_data);
34270 
34271  pgbuf_set_dirty (thread_p, page_found, DONT_FREE);
34272 
34273  goto end;
34274  }
34275  else
34276  {
34277  /* Normal state. We need to physically delete the object. */
34279  if (btree_is_single_object_key (thread_p, btid_int, node_type, &new_record, offset_after_key))
34280  {
34281  /* Only one OID in the key, we will remove the key as well. */
34282  n_keys = -1;
34283  }
34284  n_oids = -1;
34285 
34286  error_code =
34287  btree_key_remove_object (thread_p, key, btid_int, &helper->delete_helper, *leaf_page, &record,
34288  &leaf_info, offset_after_key, search_key, &page_found, prev_page, node_type,
34289  offset_to_object);
34290 
34291  if (error_code == NO_ERROR && BTREE_IS_UNIQUE (btid_int->unique_pk))
34292  {
34293  logtb_tran_update_unique_stats (thread_p, btid_int->sys_btid, n_keys, n_oids, 0, false);
34294  }
34295 
34296  goto end;
34297  }
34298  }
34299  else
34300  {
34301  ; /* Fall through and do the usual case. */
34302  }
34303  }
34304 
34305  /* We did not find the object. We have to check if there is enough space in the leaf for the object. If there is,
34306  * we insert it in place without any restarts.
34307  */
34308 
34310  switched_to_insert_helper = true;
34313 
34314  /* delete_helper does not hold information regarding the length of the key in page.
34315  * We need this information so that we can check whether we have enough space to insert the new object.
34316  */
34317 
34318  key_len = btree_get_disk_size_of_key (key);
34320 
34321  if (!btree_key_insert_does_leaf_need_split (thread_p, btid_int, *leaf_page, &helper->insert_helper, search_key))
34322  {
34323  /* There is enough space. */
34324 
34325  /* We have to check if we have an overflow key and if the btid can handle it. If not, restart the traverse. */
34326  if (key_len >= BTREE_MAX_KEYLEN_INPAGE && VFID_ISNULL (&btid_int->ovfid))
34327  {
34328  /* We have to restart to ensure the key is correctly handled. */
34329  search_key->result = BTREE_KEY_NOTFOUND;
34330  goto end;
34331  }
34332 
34333  /* Set DELETE_FLAG in the helper structure. */
34336 
34338  if (search_key->result == BTREE_KEY_FOUND)
34339  {
34340  error_code =
34341  btree_key_append_object_non_unique (thread_p, btid_int, key, *leaf_page, search_key, &new_record,
34342  offset_after_key, &leaf_info, &helper->insert_helper.obj_info,
34343  &helper->insert_helper);
34344  }
34345  else
34346  {
34347  error_code = btree_key_insert_new_key (thread_p, btid_int, key, *leaf_page, &helper->insert_helper,
34348  search_key);
34349  n_keys = 1;
34350  }
34351 
34352  n_oids = 1;
34353 
34354  if (error_code == NO_ERROR && BTREE_IS_UNIQUE (btid_int->unique_pk))
34355  {
34356  logtb_tran_update_unique_stats (thread_p, btid_int->sys_btid, n_keys, n_oids, 0, false);
34357  }
34358 
34359  goto end;
34360  }
34361 
34362  /* Not enough space. We have to restart the traverse and try to insert the object with DELETE_FLAG set. */
34363  search_key->result = BTREE_KEY_NOTFOUND;
34364 
34365 end:
34366  if (switched_to_insert_helper)
34367  {
34368  if (helper->insert_helper.rv_keyval_data != NULL && helper->insert_helper.rv_keyval_data != rv_undo_data_bufalign)
34369  {
34371  }
34372  helper->insert_helper.rv_keyval_data = NULL;
34374  }
34375  else
34376  {
34377  if (helper->delete_helper.rv_keyval_data != NULL && helper->delete_helper.rv_keyval_data != rv_undo_data_bufalign)
34378  {
34380  }
34381  helper->delete_helper.rv_keyval_data = NULL;
34383  }
34384 
34385  if (page_found != NULL && page_found != *leaf_page)
34386  {
34387  pgbuf_unfix_and_init (thread_p, page_found);
34388  }
34389 
34390  if (prev_page != NULL && prev_page != *leaf_page)
34391  {
34392  pgbuf_unfix_and_init (thread_p, prev_page);
34393  }
34394 
34395  return error_code;
34396 }
34397 
34398 /*
34399  * btree_key_online_index_tran_insert_DF () - BTREE_PROCESS_KEY_FUNCTION used for inserting a new object
34400  * with DELETE_FLAG set in b-tree during online index loading.
34401  *
34402  * return : Error code.
34403  * thread_p (in) : Thread entry.
34404  * btid_int (in) : B-tree info.
34405  * key (int) : Key info
34406  * leaf_page (in) : Pointer to the leaf page.
34407  * search_key (in) : Search helper
34408  * restart (in/out): Restart
34409  * args (in/out) : BTREE_INSERT_HELPER *.
34410  */
34411 static int
34413  PAGE_PTR * leaf_page, BTREE_SEARCH_KEY_HELPER * search_key, bool * restart,
34414  void *other_args)
34415 {
34416  BTREE_HELPER *helper = (BTREE_HELPER *) other_args;
34417  int error_code = NO_ERROR; /* Error code. */
34418  RECDES record; /* Record descriptor for leaf key record. */
34419  LEAF_REC leaf_info; /* Leaf record info. */
34420  int offset_after_key; /* Offset in record data where packed key is ended. */
34421  bool dummy_clear_key; /* Dummy field used as argument for btree_read_record. */
34422  PAGE_PTR page_found = NULL;
34423  int offset_to_object = 0;
34425  PAGE_PTR prev_page = NULL;
34426  BTREE_NODE_TYPE node_type;
34427 
34428  LOG_DATA_ADDR addr;
34429  LOG_LSA prev_lsa;
34430  PGSLOTID slotid;
34431  RECDES new_record;
34432  char rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
34433  char new_rec_buf[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
34434 
34435  new_record.data = PTR_ALIGN (new_rec_buf, BTREE_MAX_ALIGN);
34436  new_record.area_size = IO_MAX_PAGE_SIZE;
34437 
34438  record.data = PTR_ALIGN (rec_buf, BTREE_MAX_ALIGN);
34439  record.area_size = IO_MAX_PAGE_SIZE;
34440 
34441  char *rv_undo_data = NULL;
34442  int rv_undo_data_capacity = IO_MAX_PAGE_SIZE;
34443  char rv_undo_data_buffer[IO_MAX_PAGE_SIZE + BTREE_MAX_ALIGN];
34444  char *rv_undo_data_bufalign = PTR_ALIGN (rv_undo_data_buffer, BTREE_MAX_ALIGN);
34445 
34446  char rv_redo_data_buffer[BTREE_RV_BUFFER_SIZE + BTREE_MAX_ALIGN];
34447  char *rv_redo_data = PTR_ALIGN (rv_redo_data_buffer, BTREE_MAX_ALIGN);
34449  int rv_redo_data_length = 0;
34450 
34451  int n_keys = 0;
34452  int n_oids = 0;
34453 
34454  bool switched_to_delete_helper = false;
34455 
34456  /* Save the key for undo process. */
34457  helper->insert_helper.rv_keyval_data = rv_undo_data_bufalign;
34459  {
34460  error_code =
34462  BTREE_INSERT_OID (&helper->insert_helper),
34464  rv_undo_data_bufalign, &helper->insert_helper.rv_keyval_data,
34465  &rv_undo_data_capacity, &helper->insert_helper.rv_keyval_data_length);
34466  if (error_code != NO_ERROR)
34467  {
34468  ASSERT_ERROR ();
34469  goto end;
34470  }
34471  }
34472 
34473  helper->insert_helper.leaf_addr.offset = search_key->slotid;
34474  helper->insert_helper.leaf_addr.pgptr = *leaf_page;
34475  helper->insert_helper.leaf_addr.vfid = &btid_int->sys_btid->vfid;
34476 
34477  /* Redo logging. */
34480 
34481  /* We are in leaf level now, and we must inspect if we have found the OID inside the key. */
34482  if (search_key->result == BTREE_KEY_FOUND)
34483  {
34484  /* We search the key for the OID. */
34485 
34486  /* Get the record. */
34487  if (spage_get_record (thread_p, *leaf_page, search_key->slotid, &record, COPY) != S_SUCCESS)
34488  {
34489  assert_release (false);
34490  error_code = ER_FAILED;
34491  goto end;
34492  }
34493 
34494  /* Read the record. */
34495  error_code =
34496  btree_read_record (thread_p, btid_int, *leaf_page, &record, NULL, &leaf_info, BTREE_LEAF_NODE,
34497  &dummy_clear_key, &offset_after_key, PEEK_KEY_VALUE, NULL);
34498  if (error_code != NO_ERROR)
34499  {
34500  ASSERT_ERROR ();
34501  goto end;
34502  }
34503 
34504  error_code =
34505  btree_find_oid_with_page_and_record (thread_p, btid_int, &helper->insert_helper.obj_info.oid, *leaf_page,
34506  helper->insert_helper.purpose, NULL, &record, &leaf_info, offset_after_key,
34507  &page_found, &prev_page, &offset_to_object, &btree_mvcc_info, &new_record);
34508  if (error_code != NO_ERROR)
34509  {
34510  ASSERT_ERROR ();
34511  goto end;
34512  }
34513 
34514  node_type = (page_found == *leaf_page) ? BTREE_LEAF_NODE : BTREE_OVERFLOW_NODE;
34515 
34516  if (offset_to_object != NOT_FOUND)
34517  {
34518  /* Inspect the key and its MVCC_INFO. This is the transactional insert with DELETE_FLAG, which means
34519  * that if we can find the object, then the object must have either INSERT_FLAG set, or the object
34520  * should be in normal state.
34521  */
34523 
34524  if (node_type == BTREE_LEAF_NODE)
34525  {
34526  slotid = search_key->slotid;
34527  }
34528  else
34529  {
34530  slotid = 1;
34531  }
34532 
34533  if (btree_online_index_is_normal_state (btree_mvcc_info.insert_mvccid))
34534  {
34535  /* This translates into a physical delete as the object has already been inserted into the btree. */
34536  /* Normal state. We need to physically delete the object. */
34538 
34540  switched_to_delete_helper = true;
34541 
34544 
34545  if (btree_is_single_object_key (thread_p, btid_int, node_type, &new_record, offset_after_key))
34546  {
34547  /* Only one OID in the key, we will remove the key as well. */
34548  n_keys = -1;
34549  }
34550  n_oids = -1;
34551 
34552  error_code =
34553  btree_key_remove_object (thread_p, key, btid_int, &helper->delete_helper, *leaf_page, &record,
34554  &leaf_info, offset_after_key, search_key, &page_found, prev_page, node_type,
34555  offset_to_object);
34556 
34557  if (error_code == NO_ERROR && BTREE_IS_UNIQUE (btid_int->unique_pk))
34558  {
34559  logtb_tran_update_unique_stats (thread_p, btid_int->sys_btid, n_keys, n_oids, 0, false);
34560  }
34561  goto end;
34562  }
34563  else
34564  {
34565  /* We must have INSERT_FLAG set. */
34567 
34568  /* We have to change the state to DELETE_FLAG. */
34569 
34570  /* Prepare logging. */
34571  addr.offset = slotid;
34572  addr.pgptr = page_found;
34573  addr.vfid = &btid_int->sys_btid->vfid;
34574 
34575  /* Redo logging. */
34576  if (node_type == BTREE_OVERFLOW_NODE)
34577  {
34579  }
34581 
34582  /* Set the new state to INSERT_FLAG. */
34584 
34585  /* Change the state of the record. */
34586  btree_online_index_change_state (thread_p, btid_int, &new_record, node_type, offset_to_object,
34587  btree_mvcc_info.insert_mvccid, NULL, &rv_redo_data_ptr);
34588 
34589  if (spage_update (thread_p, page_found, slotid, &new_record) != SP_SUCCESS)
34590  {
34591  assert_release (false);
34592  error_code = ER_FAILED;
34593  goto end;
34594  }
34595 
34596  /* We need to log previous lsa. */
34597  LSA_COPY (&prev_lsa, pgbuf_get_lsa (page_found));
34598 
34599  /* Logging. */
34600  BTREE_RV_GET_DATA_LENGTH (rv_redo_data_ptr, rv_redo_data, rv_redo_data_length);
34601 
34602  btree_insert_log (&helper->insert_helper,
34603  BTREE_INSERT_MODIFY_MSG ("Tran delete change from INSERT_FLAG to DELETE_FLAG"),
34604  BTREE_INSERT_MODIFY_ARGS (thread_p, &helper->insert_helper, page_found, &prev_lsa,
34605  node_type == BTREE_LEAF_NODE, slotid, new_record.length,
34606  btid_int->sys_btid));
34607 
34608  btree_rv_log_insert_object (thread_p, helper->insert_helper, addr, 0, rv_redo_data_length, NULL,
34609  rv_redo_data);
34610 
34611  pgbuf_set_dirty (thread_p, page_found, DONT_FREE);
34612 
34613  goto end;
34614  }
34615  }
34616  else
34617  {
34618  /* Key was found but the object wasn't. We must append the object to the current key. */
34619  /* Safeguards. */
34620  assert (search_key->result == BTREE_KEY_FOUND && offset_to_object == NOT_FOUND);
34621 
34622  /* We did not find the object. We have to insert it with DELETE_FLAG set. */
34625 
34626  error_code =
34627  btree_key_append_object_non_unique (thread_p, btid_int, key, *leaf_page, search_key, &new_record,
34628  offset_after_key, &leaf_info, &helper->insert_helper.obj_info,
34629  &helper->insert_helper);
34630 
34631  if (error_code == NO_ERROR && BTREE_IS_UNIQUE (btid_int->unique_pk))
34632  {
34633  logtb_tran_update_unique_stats (thread_p, btid_int->sys_btid, 0, 1, 0, false);
34634  }
34635  }
34636  }
34637  else
34638  {
34639  /* Key was not found, we must insert it. */
34640  /* We have to insert it with DELETE_FLAG set. */
34643 
34644  error_code = btree_key_insert_new_key (thread_p, btid_int, key, *leaf_page, &helper->insert_helper, search_key);
34645  if (error_code == NO_ERROR && BTREE_IS_UNIQUE (btid_int->unique_pk))
34646  {
34647  logtb_tran_update_unique_stats (thread_p, btid_int->sys_btid, 1, 1, 0, false);
34648  }
34649  }
34650 
34651 end:
34652  if (switched_to_delete_helper)
34653  {
34654  if (helper->delete_helper.rv_keyval_data != NULL && helper->delete_helper.rv_keyval_data != rv_undo_data_bufalign)
34655  {
34657  }
34658  helper->delete_helper.rv_keyval_data = NULL;
34660  }
34661  else
34662  {
34663  if (helper->insert_helper.rv_keyval_data != NULL && helper->insert_helper.rv_keyval_data != rv_undo_data_bufalign)
34664  {
34666  }
34667  helper->insert_helper.rv_keyval_data = NULL;
34669  }
34670 
34671  if (page_found != NULL && page_found != *leaf_page)
34672  {
34673  pgbuf_unfix_and_init (thread_p, page_found);
34674  }
34675 
34676  if (prev_page != NULL && prev_page != *leaf_page)
34677  {
34678  pgbuf_unfix_and_init (thread_p, prev_page);
34679  }
34680 
34681  return error_code;
34682 }
34683 
34684 //
34685 // btree_online_index_change_state () - set new object state during online index
34686 //
34687 // thread_p (in) : thread entry
34688 // btid_int (in) : b-tree info
34689 // record (in) : leaf/overflow record
34690 // node_type (in) : node type
34691 // offset_to_object (in) : offset_to_object
34692 // new_state (in) : new object state
34693 // rv_undo_data (in/out) : buffer to append undo log data
34694 // rv_redo_data (in/out) : buffer to append redo log data
34695 //
34696 void
34698  BTREE_NODE_TYPE node_type, int offset_to_object, MVCCID new_state,
34699  char **rv_undo_data, char **rv_redo_data)
34700 {
34701  int offset_to_insid_mvccid;
34702  char *oid_ptr = NULL;
34703  char *mvccid_ptr = NULL;
34704 
34705  oid_ptr = record->data + offset_to_object;
34706 
34707  offset_to_insid_mvccid = offset_to_object + OR_OID_SIZE;
34708  if (btree_is_class_oid_packed (btid_int, record, node_type, (offset_to_object == 0)))
34709  {
34710  /* Class OID is also packed. */
34711  offset_to_insid_mvccid += OR_OID_SIZE;
34712  }
34713  /* Set mvccid_ptr. */
34714  mvccid_ptr = record->data + offset_to_insid_mvccid;
34715 
34716  /* Assign the new mvcc_insid. */
34718  {
34719  // todo - compare to old state and make sure it changes
34720  /* We have MVCC_INSID. */
34721  if (!btree_online_index_is_normal_state (new_state)
34722  || btree_is_fixed_size (btid_int, record, node_type, (offset_to_object == 0)))
34723  {
34724  /* If we have any state set, except the normal state, or if it is a fixed size record. */
34725  btree_set_mvccid (record, offset_to_insid_mvccid, &new_state, rv_undo_data, rv_redo_data);
34726  }
34727  else
34728  {
34729  /* We have normal state of the record and the record is not a fixed size one. */
34730  /* This translates in removing the state. */
34731  btree_record_remove_insid (thread_p, btid_int, record, node_type, offset_to_object, rv_undo_data,
34732  rv_redo_data, NULL);
34733  }
34734  }
34735  else if (!btree_online_index_is_normal_state (new_state))
34736  {
34737  /* We don't have MVCC_INSID. */
34738  btree_add_mvccid (record, offset_to_object, offset_to_insid_mvccid, new_state, BTREE_OID_HAS_MVCC_INSID,
34739  rv_undo_data, rv_redo_data);
34740  }
34741  else
34742  {
34743  // todo - is this possible? basically state is not changed...
34744  assert (false);
34745  }
34746 
34747 #if !defined (NDEBUG)
34748  btree_check_valid_record (thread_p, btid_int, record, node_type, NULL);
34749 #endif
34750 }
34751 
34752 //
34753 // btree_is_class_oid_packed () - is class OID packed with object?
34754 //
34755 // return : true if class oid is packed, false otherwise
34756 // btid_int (in) : b-tree info
34757 // record (in) : record descriptor
34758 // node_type (in) : leaf/overflow node type
34759 // is_first (in) : is object first in record?
34760 //
34761 static bool
34762 btree_is_class_oid_packed (BTID_INT * btid_int, RECDES * record, BTREE_NODE_TYPE node_type, bool is_first)
34763 {
34764  // class oid is packed if:
34765  // 1. index is unique and
34766  // 2.1. is overflow node or
34767  // 2.2. is not first in leaf record or
34768  // 2.3. is first in leaf record and record is flagged with BTREE_LEAF_RECORD_CLASS_OID
34769 
34770  if (!btid_int->unique_pk)
34771  {
34772  // not unique, no class is saved
34773  return false;
34774  }
34775 
34776  // is unique
34777 
34778  if (node_type == BTREE_OVERFLOW_NODE)
34779  {
34780  // all overflow objects save class
34781  return true;
34782  }
34783 
34784  // is leaf
34785 
34786  if (!is_first)
34787  {
34788  // non-first in leaf record saves class
34789  return true;
34790  }
34791 
34792  // first saves class only if flagged
34794 }
34795 
34796 static inline bool
34797 btree_is_fixed_size (BTID_INT * btid_int, RECDES * record, BTREE_NODE_TYPE node_type, bool is_first)
34798 {
34799  return ((node_type == BTREE_OVERFLOW_NODE) || (!is_first && BTREE_IS_UNIQUE (btid_int->unique_pk))
34800  || (is_first && btree_leaf_is_flaged (record, BTREE_LEAF_RECORD_OVERFLOW_OIDS)));
34801 }
34802 
34803 static bool
34805 {
34806  switch (purpose)
34807  {
34816  return true;
34817  default:
34818  return false;
34819  }
34820 }
34821 
34822 static bool
34824 {
34825  switch (purpose)
34826  {
34833  return true;
34834  default:
34835  return false;
34836  }
34837 }
34838 
34839 static bool
34841 {
34842  switch (purpose)
34843  {
34846  return true;
34847  default:
34848  return false;
34849  }
34850 }
34851 
34852 static bool
34854 {
34855  switch (purpose)
34856  {
34867  return true;
34868  default:
34869  return false;
34870  }
34871 }
34872 
34873 static bool
34875 {
34876  switch (purpose)
34877  {
34886  return true;
34887  default:
34888  return false;
34889  }
34890 }
34891 
34892 //
34893 // btree_rv_log_delete_object () - log b-tree delete operation according to purpose
34894 //
34895 // thread_p (in) : thread entry
34896 // delete_helper (in) : delete helper
34897 // addr (in) : address for logging
34898 // undo_length (in) : physical undo log size
34899 // redo_length (in) : redo log size (is always physical)
34900 // undo_data (in) : physical undo log
34901 // redo_data (in) : redo log (is always physical)
34902 //
34903 static void
34905  int undo_length, int redo_length, const char *undo_data, const char *redo_data)
34906 {
34907  TDE_ALGORITHM tde_algo = TDE_ALGORITHM_NONE;
34908  assert (btree_is_delete_object_purpose (delete_helper.purpose));
34909 
34910  if (delete_helper.is_system_op_started)
34911  {
34912  // we need to log undoredo physical
34913  log_append_undoredo_data (thread_p, RVBT_RECORD_MODIFY_UNDOREDO, &addr, undo_length, redo_length, undo_data,
34914  redo_data);
34915  }
34916  else
34917  {
34918  switch (delete_helper.purpose)
34919  {
34921  // log undo logical, log redo physical
34923  redo_length, delete_helper.rv_keyval_data, redo_data);
34924  break;
34927  delete_helper.rv_keyval_data_length, redo_length, delete_helper.rv_keyval_data,
34928  redo_data);
34929  break;
34932  redo_length, redo_data, &delete_helper.reference_lsa);
34933  break;
34938  pgbuf_get_vpid_ptr (addr.pgptr), addr.offset, addr.pgptr, redo_length,
34939  redo_data, LOG_FIND_CURRENT_TDES (thread_p),
34940  &delete_helper.reference_lsa);
34941  break;
34944  log_append_redo_data (thread_p, RVBT_DELETE_OBJECT_PHYSICAL, &addr, redo_length, redo_data);
34945  break;
34946  default:
34947  assert (false);
34948  break;
34949  }
34950  }
34951 }
34952 
34953 //
34954 // btree_rv_log_insert_object () - log b-tree insert operation according to purpose
34955 //
34956 // thread_p (in) : thread entry
34957 // insert_helper (in) : insert helper
34958 // addr (in) : address for logging
34959 // undo_length (in) : physical undo log size
34960 // redo_length (in) : redo log size (is always physical)
34961 // undo_data (in) : physical undo log
34962 // redo_data (in) : redo log (is always physical)
34963 //
34964 static void
34966  int undo_length, int redo_length, const char *undo_data, const char *redo_data)
34967 {
34968  TDE_ALGORITHM tde_algo = TDE_ALGORITHM_NONE;
34969  assert (btree_is_insert_object_purpose (insert_helper.purpose));
34970 
34971  if (insert_helper.is_system_op_started)
34972  {
34973  // undo/redo physical
34974  log_append_undoredo_data (thread_p, RVBT_RECORD_MODIFY_UNDOREDO, &addr, undo_length, redo_length, undo_data,
34975  redo_data);
34976  }
34977  else
34978  {
34979  switch (insert_helper.purpose)
34980  {
34982  // undo logical, redo physical
34983  log_append_undoredo_data (thread_p, insert_helper.rcvindex, &addr, insert_helper.rv_keyval_data_length,
34984  redo_length, insert_helper.rv_keyval_data, redo_data);
34985  break;
34986 
34988  /* Safeguard */
34990 
34991  /* Insert with DELETE_FLAG. */
34993  insert_helper.rv_keyval_data_length, redo_length, insert_helper.rv_keyval_data,
34994  redo_data);
34995 
34996  break;
34998  /* Normal insert. */
35000  insert_helper.rv_keyval_data_length, redo_length, insert_helper.rv_keyval_data,
35001  redo_data);
35002 
35003  break;
35005  // redo logging
35006  log_append_redo_data (thread_p, RVBT_RECORD_MODIFY_NO_UNDO, &addr, redo_length, redo_data);
35007  break;
35011  pgbuf_get_vpid_ptr (addr.pgptr), addr.offset, addr.pgptr,
35012  redo_length, redo_data, LOG_FIND_CURRENT_TDES (thread_p),
35013  &insert_helper.compensate_undo_nxlsa);
35014  break;
35015  default:
35016  assert (false);
35017  break;
35018  }
35019  }
35020 }
35021 
35022 /*
35023  * btree_find_oid_with_page_and_record () - Find OID in leaf/overflow pages and output its position and the record.
35024  *
35025  * return : Error code.
35026  * thread_p (in) : Thread entry.
35027  * btid_int (in) : B-tree info.
35028  * oid (in) : Object OID.
35029  * leaf_page (in) : Fixed leaf page (where object's key is found).
35030  * purpose (in) : Purpose/context for the function call.
35031  * match_mvccinfo (in) : Non-null value to be matched or null if it doesn't matter.
35032  * record (in) : Key leaf record.
35033  * leaf_rec_info (in) : Key leaf record info.
35034  * after_key_offset (in) : Offset in leaf record where packed key is ended.
35035  * found_page (out) : Outputs leaf or overflow page where object is found.
35036  * prev_page (out) : Previous page of the overflow page where object object is found. If object is in leaf it
35037  * will output NULL. If object is in first overflow, it will output leaf page.
35038  * If argument is NULL, previous overflow page is unfixed.
35039  * offset_to_object (out) : Offset to object in the record of leaf/overflow.
35040  * new_record (out) : The new record in case of overflow pages.
35041  *
35042  */
35043 static int
35044 btree_find_oid_with_page_and_record (THREAD_ENTRY * thread_p, BTID_INT * btid_int, OID * oid, PAGE_PTR leaf_page,
35046  LEAF_REC * leaf_info, int offset_after_key, PAGE_PTR * found_page,
35047  PAGE_PTR * prev_page, int *offset_to_object, BTREE_MVCC_INFO * object_mvcc_info,
35048  RECDES * new_record)
35049 {
35050  int error_code = NO_ERROR;
35051 
35052  error_code = btree_find_oid_and_its_page (thread_p, btid_int, oid, leaf_page, purpose, NULL, record, leaf_info,
35053  offset_after_key, found_page, prev_page, offset_to_object,
35054  object_mvcc_info);
35055  if (error_code != NO_ERROR)
35056  {
35057  ASSERT_ERROR ();
35058  return error_code;
35059  }
35060 
35061 
35062  if (*offset_to_object == NOT_FOUND)
35063  {
35064  /* Object not found, end this. */
35065 
35066  *new_record = *record;
35067  return error_code;
35068  }
35069 
35070  /* We found the object. */
35071 
35072  if (*found_page == leaf_page)
35073  {
35074  /* No overflow, set new_record to the record. */
35075  *new_record = *record;
35076  return error_code;
35077  }
35078 
35079  /* Overflow page. */
35080 
35081  /* Get the new record. */
35082  if (spage_get_record (thread_p, *found_page, 1, new_record, COPY) != S_SUCCESS)
35083  {
35084  assert_release (false);
35085  return ER_FAILED;
35086  }
35087 
35088  return error_code;
35089 }
35090 
35091 /*
35092  * btree_rv_keyval_undo_online_index_tran_delete () -
35093  * return: int
35094  * recv(in): Recovery structure
35095  *
35096  * Note: undo the deletion of a <key, val> pair to the B+tree,
35097  * by inserting the <key, val> pair to the tree during an online index operation.
35098  */
35099 int
35101 {
35102  BTID_INT btid;
35103  BTID sys_btid;
35104  DB_VALUE key;
35105  OID cls_oid;
35106  OID oid;
35107  char *datap;
35108  int datasize;
35110  int error_code = NO_ERROR;
35111 
35112  /* btid needs a place to unpack the sys_btid into. We'll use stack space. */
35113  btid.sys_btid = &sys_btid;
35114 
35115  /* extract the stored btid, key, oid data */
35116  datap = (char *) recv->data;
35117  datasize = recv->length;
35118  error_code = btree_rv_read_keyval_info_nocopy (thread_p, datap, datasize, &btid, &cls_oid, &oid, &mvcc_info, &key);
35119  if (error_code != NO_ERROR)
35120  {
35121  ASSERT_ERROR ();
35122  return error_code;
35123  }
35124 
35125  assert (!OID_ISNULL (&oid));
35126 
35127  /* Insert object and all its info. */
35128  error_code = btree_online_index_dispatcher (thread_p, btid.sys_btid, &key, &cls_oid, &oid, btid.unique_pk,
35130  if (error_code != NO_ERROR)
35131  {
35132  ASSERT_ERROR ();
35133  pr_clear_value (&key);
35134  return error_code;
35135  }
35136 
35137  pr_clear_value (&key);
35138 
35139  return NO_ERROR;
35140 }
35141 
35142 /*
35143  * btree_rv_keyval_undo_online_index_tran_insert () - Undo insert operation for btree during online index.
35144  *
35145  * return : Error code.
35146  * thread_p (in) : Thread entry.
35147  * recv (in) : Recovery data.
35148  */
35149 int
35151 {
35152  BTID_INT btid;
35153  BTID sys_btid;
35154  OID cls_oid;
35155  OID oid;
35156  char *datap;
35157  int datasize;
35158  BTREE_MVCC_INFO dummy_mvcc_info;
35159  int err = NO_ERROR;
35160  DB_VALUE key;
35161 
35162  /* btid needs a place to unpack the sys_btid into. We'll use stack space. */
35163  btid.sys_btid = &sys_btid;
35164 
35165  /* extract the stored btid, key, oid data */
35166  datap = (char *) recv->data;
35167  datasize = recv->length;
35168  err = btree_rv_read_keyval_info_nocopy (thread_p, datap, datasize, &btid, &cls_oid, &oid, &dummy_mvcc_info, &key);
35169  if (err != NO_ERROR)
35170  {
35171  ASSERT_ERROR ();
35172  return err;
35173  }
35174 
35175  assert (!OID_ISNULL (&oid));
35176 
35177  /* Undo insert: just delete object and all its information. */
35178  err = btree_online_index_dispatcher (thread_p, btid.sys_btid, &key, &cls_oid, &oid, btid.unique_pk,
35180  if (err != NO_ERROR)
35181  {
35182  ASSERT_ERROR ();
35183  pr_clear_value (&key);
35184  return err;
35185  }
35186 
35187  pr_clear_value (&key);
35188 
35189  return NO_ERROR;
35190 }
35191 
35192 void
35194 {
35195  /* oid, classoid and mvcc info */
35196  delete_helper->object_info.oid = insert_helper->obj_info.oid;
35197  delete_helper->object_info.class_oid = insert_helper->obj_info.class_oid;
35198  delete_helper->object_info.mvcc_info = insert_helper->obj_info.mvcc_info;
35199 
35200  /* save the LSA needed for recovery */
35201  LSA_COPY (&delete_helper->reference_lsa, &insert_helper->compensate_undo_nxlsa);
35202 
35203  /* Leaf addr. */
35204  delete_helper->leaf_addr.offset = insert_helper->leaf_addr.offset;
35205  delete_helper->leaf_addr.pgptr = insert_helper->leaf_addr.pgptr;
35206  delete_helper->leaf_addr.vfid = insert_helper->leaf_addr.vfid;
35207 
35208  /* Undo logging. */
35209  delete_helper->rv_keyval_data = insert_helper->rv_keyval_data;
35210  delete_helper->rv_keyval_data_length = insert_helper->rv_keyval_data_length;
35211 
35212  /* Redo logging. */
35213  delete_helper->rv_redo_data = insert_helper->rv_redo_data;
35214  delete_helper->rv_redo_data_ptr = delete_helper->rv_redo_data;
35215 
35216  /* Error logging. */
35217  delete_helper->log_operations = insert_helper->log_operations;
35218  delete_helper->printed_key = insert_helper->printed_key;
35219  delete_helper->printed_key_sha1 = insert_helper->printed_key_sha1;
35220 }
35221 
35222 void
35224 {
35225  /* oid, classoid and mvcc info */
35226  insert_helper->obj_info.oid = delete_helper->object_info.oid;
35227  insert_helper->obj_info.class_oid = delete_helper->object_info.class_oid;
35228  insert_helper->obj_info.mvcc_info = delete_helper->object_info.mvcc_info;
35229 
35230  /* save the LSA needed for recovery */
35231  LSA_COPY (&insert_helper->compensate_undo_nxlsa, &delete_helper->reference_lsa);
35232 
35233  /* Leaf addr. */
35234  insert_helper->leaf_addr.offset = delete_helper->leaf_addr.offset;
35235  insert_helper->leaf_addr.pgptr = delete_helper->leaf_addr.pgptr;
35236  insert_helper->leaf_addr.vfid = delete_helper->leaf_addr.vfid;
35237 
35238  /* Undo logging. */
35239  insert_helper->rv_keyval_data = delete_helper->rv_keyval_data;
35240  insert_helper->rv_keyval_data_length = delete_helper->rv_keyval_data_length;
35241 
35242  /* Redo logging. */
35243  insert_helper->rv_redo_data = delete_helper->rv_redo_data;
35244  insert_helper->rv_redo_data_ptr = insert_helper->rv_redo_data;
35245 
35246  /* Error logging. */
35247  insert_helper->log_operations = delete_helper->log_operations;
35248  insert_helper->printed_key = delete_helper->printed_key;
35249  insert_helper->printed_key_sha1 = delete_helper->printed_key_sha1;
35250 }
35251 
35252 static inline bool
35254 {
35255  switch (purpose)
35256  {
35264  return true;
35265  default:
35266  return false;
35267  }
35268 
35269  return false;
35270 }
35271 
35272 int
35273 btree_online_index_check_unique_constraint (THREAD_ENTRY * thread_p, BTID * btid, const char *index_name,
35274  OID * class_oid)
35275 {
35276  int ret = NO_ERROR;
35277  int g_num_oids = 0, g_num_nulls = 0, g_num_keys = 0;
35278  LOG_TRAN_BTID_UNIQUE_STATS *unique_stats = logtb_tran_find_btid_stats (thread_p, btid, true);
35279 
35280  if (unique_stats == NULL)
35281  {
35282  return ER_FAILED;
35283  }
35284 
35285  ret = logtb_get_global_unique_stats (thread_p, btid, &g_num_oids, &g_num_nulls, &g_num_keys);
35286  if (ret != NO_ERROR)
35287  {
35288  ASSERT_ERROR ();
35289  return ret;
35290  }
35291 
35292  if ((g_num_oids + unique_stats->tran_stats.num_oids)
35293  != (g_num_keys + unique_stats->tran_stats.num_keys) + (g_num_nulls + unique_stats->tran_stats.num_nulls))
35294  {
35295  /* Unique constraint violation. */
35296  BTREE_SET_UNIQUE_VIOLATION_ERROR (thread_p, NULL, NULL, class_oid, btid, index_name);
35297  return ER_BTREE_UNIQUE_FAILED;
35298  }
35299 
35300  return NO_ERROR;
35301 }
35302 
35303 int
35304 btree_get_class_oid_of_unique_btid (THREAD_ENTRY * thread_p, BTID * btid, OID * class_oid)
35305 {
35306  PAGE_PTR root_page;
35307  BTREE_ROOT_HEADER *root_header = NULL;
35308 
35309  OID_SET_NULL (class_oid);
35310 
35311  root_page = btree_fix_root_with_info (thread_p, btid, PGBUF_LATCH_READ, NULL, &root_header, NULL);
35312  if (root_page == NULL)
35313  {
35314  return ER_FAILED;
35315  }
35316 
35317  if (BTREE_IS_UNIQUE (root_header->unique_pk))
35318  {
35319  /* Copy the class oid */
35320  COPY_OID (class_oid, &root_header->topclass_oid);
35321  }
35322 
35323  pgbuf_unfix_and_init (thread_p, root_page);
35324 
35325  return NO_ERROR;
35326 }
35327 
35328 bool
35329 btree_is_btid_online_index (THREAD_ENTRY * thread_p, OID * class_oid, BTID * btid)
35330 {
35331  OR_CLASSREP *rep = NULL;
35332  int idx_incache = -1;
35333  bool result = false;
35334  int i;
35335 
35336  rep = heap_classrepr_get (thread_p, class_oid, NULL, NULL_REPRID, &idx_incache);
35337  if (rep == NULL)
35338  {
35339  assert (false);
35340  return false;
35341  }
35342 
35343  /* Iterate through indexes of current class_oid and check if the one matching the btid is an online one. */
35344  for (i = 0; i < rep->n_indexes; i++)
35345  {
35346  if (BTID_IS_EQUAL (btid, &rep->indexes[i].btid))
35347  {
35349  {
35350  result = true;
35351  }
35352  break;
35353  }
35354  }
35355 
35356  heap_classrepr_free_and_init (rep, &idx_incache);
35357 
35358  return result;
35359 }
35360 
35361 //
35362 // btree_is_single_object_key () - returns true if there is only one object in key, false otherwise; parameters
35363 // offer details on object location
35364 //
35365 // return : true if single object
35366 // thread_p (in) : thread entry
35367 // btid_int (in) : b-tree info
35368 // node_type (in) : node type - overflow or leaf
35369 // record (in) : current record (overflow or leaf)
35370 // offset_after_key (in) : offset after key (only for leaf)
35371 //
35372 static bool
35374  RECDES * record, int offset_after_key)
35375 {
35376  if (node_type == BTREE_OVERFLOW_NODE)
35377  {
35378  // has overflows, must have at least two
35379  return false;
35380  }
35381  // leaf
35382  assert (node_type == BTREE_LEAF_NODE);
35383  if (offset_after_key < record->length)
35384  {
35385  // it has more than one object!
35386  // this is a hack to avoid counting objects; maybe it is not safe
35387  return false;
35388  }
35389  assert (offset_after_key == record->length);
35390  return true;
35391 }
35392 
35393 static bool
35395 {
35396  int has_class_bu_lock;
35397  int has_instance_lock;
35398 
35399  /* The insert operation in index has to check if the object is currently inserting is locked by the transaction.
35400  * However, after the introduction of the BU_LOCK this is no longer valid. For this case, the inserter should
35401  * make sure that he has a BU_LOCK on the class he is inserting into.
35402  *
35403  * Now in order to correctly insert into the b-tree the transaction should either have and X_LOCK on the object,
35404  * or a BU_LOCK on the class.
35405  */
35406 
35407  has_class_bu_lock = lock_has_lock_on_object (BTREE_INSERT_CLASS_OID (insert_helper), oid_Root_class_oid, BU_LOCK);
35408  if (has_class_bu_lock > 0)
35409  {
35410  return true;
35411  }
35412 
35413  has_instance_lock = lock_has_lock_on_object (BTREE_INSERT_OID (insert_helper),
35414  BTREE_INSERT_CLASS_OID (insert_helper), X_LOCK);
35415  if (has_instance_lock > 0)
35416  {
35417  return true;
35418  }
35419 
35420  return false;
35421 }
35422 
35423 static bool
35425 {
35426  int has_class_bu_lock;
35427  int has_instance_lock;
35428 
35429  /* The insert operation in index has to check if the object is currently inserting is locked by the transaction.
35430  * However, after the introduction of the BU_LOCK this is no longer valid. For this case, the inserter should
35431  * make sure that he has a BU_LOCK on the class he is inserting into.
35432  *
35433  * Now in order to correctly insert into the b-tree the transaction should either have and X_LOCK on the object,
35434  * or a BU_LOCK on the class.
35435  */
35436 
35437  has_class_bu_lock = lock_has_lock_on_object (BTREE_DELETE_CLASS_OID (delete_helper), oid_Root_class_oid, BU_LOCK);
35438  if (LOG_ISTRAN_ABORTED (LOG_FIND_CURRENT_TDES (thread_p)) && has_class_bu_lock > 0)
35439  {
35440  return true;
35441  }
35442 
35443  has_instance_lock = lock_has_lock_on_object (BTREE_DELETE_OID (delete_helper),
35444  BTREE_DELETE_CLASS_OID (delete_helper), X_LOCK);
35445  if (has_instance_lock > 0)
35446  {
35447  return true;
35448  }
35449 
35450  return false;
35451 }
35452 
35453 // *INDENT-OFF*
35455  : m_is_inf_left_key (true)
35456  , m_is_inf_right_key (true)
35457 {
35460 }
35461 
35463 {
35466 }
35467 
35468 void
35469 page_key_boundary::set_value (DB_VALUE &dest_value, DB_VALUE &src_value, bool &clear_src_value)
35470 {
35471  pr_clear_value (&dest_value);
35472  pr_clone_value (&src_value, &dest_value);
35473  btree_clear_key_value (&clear_src_value, &src_value);
35474 }
35475 
35476 int
35477 page_key_boundary::set_value (THREAD_ENTRY * thread_p, DB_VALUE &dest_value, BTID_INT * btid, PAGE_PTR page_ptr,
35478  const INT16 slot)
35479 {
35480  RECDES rec;
35481  if (spage_get_record (thread_p, page_ptr, slot, &rec, PEEK) != S_SUCCESS)
35482  {
35483  return ER_FAILED;
35484  }
35485 
35486  return set_value (thread_p, dest_value, btid, page_ptr, rec);
35487 }
35488 
35489 int
35490 page_key_boundary::set_value (THREAD_ENTRY * thread_p, DB_VALUE &dest_value, BTID_INT * btid, PAGE_PTR page_ptr,
35491  RECDES &rec)
35492 {
35493  DB_VALUE boundary_value;
35495  bool clear_boundary_value = false;
35496  int offset;
35497 
35498  db_make_null (&boundary_value);
35499 
35500  pr_clear_value (&dest_value);
35501 
35502  if (btree_read_record_without_decompression (thread_p, btid, &rec, &boundary_value, &non_leaf_rec,
35503  BTREE_NON_LEAF_NODE, &clear_boundary_value, &offset,
35505  {
35506  return ER_FAILED;
35507  }
35508 
35509  pr_clone_value (&boundary_value, &dest_value);
35510 
35511  return NO_ERROR;
35512 }
35513 
35514 /*
35515  * update_boundary_eq : helper function used in context of btree insert advance functions
35516  * Updates the left/right boundary values of the search path down to a leaf page.
35517  * This handles the case when the key to insert is equal to current value stored
35518  * in non-leaf record.
35519  *
35520  * thread_p (in) :
35521  * btid (in) :
35522  * page_ptr (in) : current page (should be a non-leaf)
35523  * subtree_value (in) : value of non-leaf record pointing to a descending sub-tree
35524  * clear_subtree_value (in) : flag to clear subtree_value
35525  * subtree_slot(in): slot of the non-leaf pointer record
35526  */
35527 int
35529  DB_VALUE &subtree_value, bool &clear_subtree_value, const INT16 subtree_slot)
35530 {
35531  int error = NO_ERROR;
35532 
35533  /* value [subtree_slot - 1] < search_key <= subtree_value
35534  * search_key == subtree_value */
35535  set_value (m_right_key, subtree_value, clear_subtree_value);
35536  m_is_inf_right_key = false;
35537 
35538  /* update left value boundary only if there is a slot sitting left to current subtree entry */
35539  if (subtree_slot > 0)
35540  {
35541  error = set_value (thread_p, m_left_key, btid, page_ptr, subtree_slot - 1);
35542  m_is_inf_left_key = false;
35543  }
35544 
35545  return error;
35546 }
35547 
35548 /*
35549  * update_boundary_lt : helper function used in context of btree insert advance functions
35550  * Updates the left/right boundary values of the search path down to a leaf page.
35551  * This handles the case when the key to insert is less than the value of current sub-tree.
35552  *
35553  * thread_p (in) :
35554  * btid (in) :
35555  * page_ptr (in) : current page (should be a non-leaf)
35556  * left_subtree_rec (in) : record left to current subtree value
35557  * subtree_value (in) : value of current non-leaf record pointing to a descending sub-tree
35558  * clear_subtree_value (in) : flag to clear subtree_value
35559  */
35560 int
35562  RECDES &left_subtree_rec, DB_VALUE &subtree_value, bool &clear_subtree_value)
35563 {
35564  int error = NO_ERROR;
35565 
35566  /* value (left_subtree_rec) < search_key <= subtree_value */
35567  set_value (m_right_key, subtree_value, clear_subtree_value);
35568  m_is_inf_right_key = false;
35569 
35570  error = set_value (thread_p, m_left_key, btid, page_ptr, left_subtree_rec);
35571  m_is_inf_left_key = false;
35572 
35573  return error;
35574 }
35575 
35576 /*
35577  * update_boundary_gt_or_eq : helper function used in context of btree insert advance functions
35578  * Updates the left/right boundary values of the search path down to a leaf page.
35579  * This handles the case when the key to insert is greater of equal than the value
35580  * of current sub-tree.
35581  *
35582  * thread_p (in) :
35583  * btid (in) :
35584  * page_ptr (in) : current page (should be a non-leaf)
35585  * subtree_value (in) : value of current non-leaf record pointing to a descending sub-tree
35586  * clear_subtree_value (in) : flag to clear subtree_value
35587  * subtree_slot (in): slot location of current subtree value
35588  * key_cnt (in): number of keys in non-leaf page
35589  */
35590 int
35592  DB_VALUE &subtree_value, bool &clear_subtree_value,
35593  const INT16 subtree_slot, const int key_cnt)
35594 {
35595  int error = NO_ERROR;
35596 
35597  /* subtree_value <= search_key < value [subtree_slot + 1] */
35598  set_value (m_left_key, subtree_value, clear_subtree_value);
35599  m_is_inf_left_key = false;
35600 
35601  if (subtree_slot + 1 < key_cnt)
35602  {
35603  error = set_value (thread_p, m_right_key, btid, page_ptr, subtree_slot + 1);
35604  m_is_inf_right_key = false;
35605  }
35606 
35607  return error;
35608 }
35609 
35611  : m_curr_pos (0)
35612  , m_key_type (&tp_Null_domain)
35613  , m_use_page_boundary_check (false)
35614  , m_use_sorted_bulk_insert (false)
35615 {
35616  m_curr_key = key;
35617  m_curr_oid = oid;
35618 }
35619 
35620 size_t
35621 btree_insert_list::add_key (const DB_VALUE *key, const OID &oid)
35622 {
35623  size_t memsize = 0;
35624  m_keys_oids.emplace_back ();
35625 
35626  m_keys_oids.back ().m_oid = oid;
35627 
35628  db_value &last_key = m_keys_oids.back ().m_key;
35629  db_make_null (&last_key);
35630 
35632 
35633  /* Switch to global heapID. */
35634  HL_HEAPID prev_id = db_change_private_heap (thread_p, 0);
35635 
35636  qdata_copy_db_value (&last_key, key);
35637  memsize += m_key_type->type->get_disk_size_of_value (&last_key);
35638 
35639  /* reset back to previous heapID. */
35640  db_change_private_heap (thread_p, prev_id);
35641 
35642  memsize += OR_OID_SIZE;
35643  memsize = DB_ALIGN (memsize, BTREE_MAX_ALIGN);
35644 
35645  return memsize;
35646 }
35647 
35649 {
35651 
35652  if (m_curr_key == NULL)
35653  {
35654  assert (m_curr_oid == NULL);
35655 
35656  assert (m_sorted_keys_oids.size () > 0);
35657 
35658  m_curr_pos = 0;
35661 
35662  return KEY_AVAILABLE;
35663  }
35664  else if (++m_curr_pos < (int) m_sorted_keys_oids.size ())
35665  {
35668 
35669  return KEY_AVAILABLE;
35670  }
35671 
35672  return KEY_NOT_AVAILABLE;
35673 }
35674 
35676 {
35677  HL_HEAPID save_id;
35678 
35679  save_id = db_change_private_heap (NULL, 0);
35680 
35681  for (auto key_oid : m_keys_oids)
35682  {
35684  }
35685 
35686  (void) db_change_private_heap (NULL, save_id);
35687 
35689 }
35690 
35692 {
35694  {
35697  }
35698 
35700  {
35703  }
35704 }
35705 
35707 {
35708  /* initialize sorted list with the same order as unsorted */
35709  for (auto &key_oid : m_keys_oids)
35710  {
35711  m_sorted_keys_oids.push_back (&key_oid);
35712  }
35713 
35714  auto compare_fn = [&] (key_oid *a, key_oid *b)
35715  {
35716  DB_VALUE_COMPARE_RESULT result;
35717  result = btree_compare_key (&a->m_key, &b->m_key, const_cast<TP_DOMAIN *>(m_key_type), 1, 1, NULL);
35718 
35719  return (result == DB_LT) ? true : false;
35720  };
35721 
35722  std::sort (m_sorted_keys_oids.begin (), m_sorted_keys_oids.end (), compare_fn);
35723  m_use_sorted_bulk_insert = true;
35725 
35726  int status = next_key ();
35727  assert (status == KEY_AVAILABLE);
35728 }
35729 
35730 bool btree_insert_list::check_release_latch (THREAD_ENTRY * thread_p, void *arg, PAGE_PTR leaf_page)
35731 {
35732  bool check_latch_waiters = false;
35733  BTREE_INSERT_HELPER *insert_helper = (BTREE_INSERT_HELPER *)arg;
35734 
35735  assert (insert_helper != NULL);
35736  assert (insert_helper->insert_list == this);
35737 
35738  int cost = m_keep_page_iterations + m_ovf_appends * 10 + m_ovf_appends_new_page * 1000;
35739 
35740  if (insert_helper->is_root)
35741  {
35742  if (cost > 50)
35743  {
35744  check_latch_waiters = true;
35745  }
35746  }
35747  else
35748  {
35749  if (cost > 100 && cost > (int) btree_get_node_header (thread_p, leaf_page)->node_level * 1000)
35750  {
35751  check_latch_waiters = true;
35752  }
35753  }
35754 
35755  if (check_latch_waiters)
35756  {
35757  return pgbuf_has_any_waiters (leaf_page);
35758  }
35759 
35760  return false;
35761 }
35762 // *INDENT-ON*
#define HEADER
Definition: btree_load.h:100
#define LEAF_FENCE_MAX_SIZE(n)
Definition: btree_load.h:77
PGLENGTH offset
Definition: recovery.h:201
static void btree_record_object_clear_mvcc_flags(char *rec_data, short mvcc_flags)
Definition: btree.c:3493
static int btree_advance_and_find_key(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *crt_page, PAGE_PTR *advance_to_page, bool *is_leaf, BTREE_SEARCH_KEY_HELPER *search_key, bool *stop, bool *restart, void *other_args)
Definition: btree.c:22766
void btree_rv_roothdr_dump(FILE *fp, int length, void *data)
Definition: btree.c:17011
int btree_change_root_header_delta(THREAD_ENTRY *thread_p, VFID *vfid, PAGE_PTR page_ptr, int null_delta, int oid_delta, int key_delta)
Definition: btree_load.c:461
regu_variable_node * key1
Definition: access_spec.hpp:67
int btree_coerce_key(DB_VALUE *keyp, int keysize, TP_DOMAIN *btree_domainp, int key_minmax)
Definition: btree.c:14798
char * PAGE_PTR
DISK_ISVALID btree_keyoid_checkscan_check(THREAD_ENTRY *thread_p, BTREE_CHECKSCAN *btscan, OID *cls_oid, DB_VALUE *key, OID *oid)
Definition: btree.c:8364
static int btree_record_process_objects(THREAD_ENTRY *thread_p, BTID_INT *btid_int, BTREE_NODE_TYPE node_type, RECDES *record, int after_key_offset, bool *stop, BTREE_PROCESS_OBJECT_FUNCTION *func, void *args)
Definition: btree.c:23728
MIN_MAX_COLUMN_INFO min_max_val
Definition: dbtype_def.h:867
#define OID_INITIALIZER
Definition: oid.h:36
DB_VALUE * get_key()
Definition: btree.h:621
TP_DOMAIN * btree_generate_prefix_domain(BTID_INT *btid)
Definition: btree.c:5749
static int btree_range_scan_advance_over_filtered_keys(THREAD_ENTRY *thread_p, BTREE_SCAN *bts)
Definition: btree.c:24479
Definition: sha1.h:50
int func_index_col_id
Definition: scan_manager.h:135
static int btree_or_get_object(OR_BUF *buf, BTID_INT *btid_int, BTREE_NODE_TYPE node_type, int after_key_offset, OID *oid, OID *class_oid, BTREE_MVCC_INFO *mvcc_info)
Definition: btree.c:21553
void * bts_other
Definition: btree.h:248
VPID ovfl
Definition: btree.h:113
static void btree_leaf_record_handle_first_overflow(THREAD_ENTRY *thread_p, RECDES *recp, BTID_INT *btid_int, char **rv_undo_data_ptr, char **rv_redo_data_ptr)
Definition: btree.c:2990
DISK_ISVALID pgbuf_is_valid_page(THREAD_ENTRY *thread_p, const VPID *vpid, bool no_error, DISK_ISVALID(*fun)(const VPID *vpid, void *args), void *args)
char * or_unpack_oid(char *ptr, OID *oid)
#define OR_BTID_ALIGNED_SIZE
static int btree_key_delete_remove_object(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:30584
#define TP_IS_DATE_OR_TIME_TYPE(typeid)
bool check_not_vacuumed
Definition: scan_manager.h:229
TP_DOMAIN_STATUS tp_value_coerce(const DB_VALUE *src, DB_VALUE *dest, const TP_DOMAIN *desired_domain)
#define OR_PUT_OID(ptr, oid)
enum mvcc_satisfies_delete_result MVCC_SATISFIES_DELETE_RESULT
Definition: mvcc.h:224
OID * oid_Root_class_oid
Definition: oid.c:73
char * or_pack_btid(char *buf, const BTID *btid)
#define BTREE_OBJINFO_AS_ARGS(objinfo)
Definition: btree.c:1164
#define BTREE_NODE_SCAN_POP_PAGE_FROM_QUEUE(bns, node)
Definition: btree.h:424
#define ER_TP_CANT_COERCE
Definition: error_code.h:250
#define BTREE_NODE_SCAN_ADD_PAGE_TO_QUEUE(bns, node)
Definition: btree.h:412
static int btree_scan_update_range(THREAD_ENTRY *thread_p, BTREE_SCAN *bts, key_val_range *kv_range)
Definition: btree.c:15351
MVCCID highest_completed_mvccid
Definition: mvcc.h:172
static void btree_online_index_change_state(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record, BTREE_NODE_TYPE node_type, int offset_to_object, MVCCID new_state, char **rv_undo_data, char **rv_redo_data)
Definition: btree.c:34697
cubthread::entry * thread_get_thread_entry_info(void)
#define NO_ERROR
Definition: error_code.h:46
MVCC_SATISFIES_DELETE_RESULT mvcc_satisfies_delete(THREAD_ENTRY *thread_p, MVCC_REC_HEADER *rec_header)
Definition: mvcc.c:377
btree_unique_stats * unique_stats_info
Definition: btree.c:820
int btree_set_error(THREAD_ENTRY *thread_p, const DB_VALUE *key, const OID *obj_oid, const OID *class_oid, const BTID *btid, const char *bt_name, int severity, int err_id, const char *filename, int lineno)
Definition: btree.c:18312
int area_size
void log_append_redo_data(THREAD_ENTRY *thread_p, LOG_RCVINDEX rcvindex, LOG_DATA_ADDR *addr, int length, const void *data)
Definition: log_manager.c:1979
static int btree_search_leaf_page(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr, DB_VALUE *key, BTREE_SEARCH_KEY_HELPER *search_key)
Definition: btree.c:5343
static int btree_get_stats_key(THREAD_ENTRY *thread_p, BTREE_STATS_ENV *env, MVCC_SNAPSHOT *mvcc_snapshot)
Definition: btree.c:6504
BTREE_OP_PURPOSE purpose
Definition: btree.c:712
const MVCCID BTREE_ONLINE_INDEX_FLAG_MASK
Definition: btree.c:1240
#define __attribute__(X)
Definition: porting.h:36
#define BTREE_DELETE_OID(helper)
Definition: btree.c:879
#define MVCC_GET_INSID(header)
Definition: mvcc.h:51
bool is_btid_int_valid
Definition: btree.h:242
char buf[DB_SMALL_CHAR_BUF_SIZE]
Definition: dbtype_def.h:991
ATTR_ID * btree_attr_ids
MVCC_SNAPSHOT * logtb_get_mvcc_snapshot(THREAD_ENTRY *thread_p)
int or_get_mvccid(OR_BUF *buf, MVCCID *mvccid)
int btree_create_file(THREAD_ENTRY *thread_p, const OID *class_oid, int attrid, BTID *btid)
Definition: btree.c:32892
PERF_UTIME_TRACKER time_track
Definition: btree.h:246
INT32 mvcc_flag
Definition: mvcc.h:40
bool is_key_partially_processed
Definition: btree.h:229
FILTER_INFO key_filter_storage
Definition: btree.h:196
#define BTID_AS_ARGS(btid)
#define ER_DESC_ISCAN_ABORTED
Definition: error_code.h:1344
void log_append_undoredo_data2(THREAD_ENTRY *thread_p, LOG_RCVINDEX rcvindex, const VFID *vfid, PAGE_PTR pgptr, PGLENGTH offset, int undo_length, int redo_length, const void *undo_data, const void *redo_data)
Definition: log_manager.c:1861
static DISK_ISVALID btree_find_key_from_leaf(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR pg_ptr, int key_cnt, OID *oid, DB_VALUE *key, bool *clear_key)
Definition: btree.c:18121
static void btree_perf_track_traverse_time(THREAD_ENTRY *thread_p, Helper *helper)
Definition: btree.c:934
char * copy_buf
Definition: btree.h:130
REGU_VARIABLE_LIST operand
Definition: regu_var.hpp:143
DB_MIDXKEY * db_get_midxkey(const DB_VALUE *value)
BTREE_ISCAN_OID_LIST * next_list
Definition: btree.h:343
size_t add_key(const DB_VALUE *key, const OID &oid)
Definition: btree.c:35621
static PAGE_PTR btree_get_new_page(THREAD_ENTRY *thread_p, BTID_INT *btid, VPID *vpid, VPID *near_vpid)
Definition: btree.c:4943
BTREE_MVCC_INFO match_mvccinfo
Definition: btree.c:822
static int btree_delete_internal(THREAD_ENTRY *thread_p, BTID *btid, OID *oid, OID *class_oid, BTREE_MVCC_INFO *mvcc_info, DB_VALUE *key, OR_BUF *buffered_key, int *unique, int op_type, btree_unique_stats *unique_stat_info, BTREE_MVCC_INFO *match_mvccinfo, LOG_LSA *undo_nxlsa, BTREE_OBJECT_INFO *second_obj_info, BTREE_OP_PURPOSE purpose)
Definition: btree.c:29616
float sum_key_len
Definition: btree.h:365
static int btree_rv_write_log_record(char *log_rec, int *log_length, RECDES *recp, BTREE_NODE_TYPE node_type)
Definition: btree.c:10996
DB_VALUE_COMPARE_RESULT tp_value_compare(const DB_VALUE *value1, const DB_VALUE *value2, int allow_coercion, int total_order)
LOG_UNIQUE_STATS global_stats
Definition: log_impl.h:377
BTREE_NODE_HEADER * btree_get_node_header(THREAD_ENTRY *thread_p, PAGE_PTR page_ptr)
Definition: btree_load.c:275
#define BTREE_INSERT_MODIFY_MSG(desc)
Definition: btree.c:1202
bool use_desc_index
Definition: btree.h:198
#define BTREE_SPLIT_UPPER_BOUND
Definition: btree.c:71
BTID * sys_btid
Definition: btree.h:121
#define ASSERT_ERROR()
SCAN_CODE
#define OID_GT(oidp1, oidp2)
Definition: oid.h:97
int index_readval(struct or_buf *buf, DB_VALUE *value, const tp_domain *domain, int size, bool copy, char *copy_buf, int copy_buf_len) const
static int btree_verify_node(THREAD_ENTRY *thread_p, BTID_INT *btid_int, PAGE_PTR page_ptr)
Definition: btree.c:19474
#define OR_MULTI_ATT_IS_UNBOUND(bitptr, element)
#define BTREE_IS_PART_KEY_DESC(btid_int)
Definition: btree.h:90
RANGE range
Definition: btree.h:140
const log_rv_record_flag_type LOG_RV_RECORD_DELETE
Definition: log_append.hpp:135
#define BTREE_NORMAL_KEY
Definition: btree.h:93
FILE_BTREE_DES btree
Definition: file_manager.h:134
int btree_rv_nodehdr_redo_insert(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17142
int or_put_oid(OR_BUF *buf, const OID *oid)
#define BTREE_RV_OVERFLOW_FLAG
Definition: btree.c:1001
#define BTREE_GET_KEY_LEN_IN_PAGE(key_len)
Definition: btree_load.h:155
bool LSA_EQ(const log_lsa *plsa1, const log_lsa *plsa2)
Definition: log_lsa.hpp:160
ATTR_ID * attr_ids
int btree_rv_read_keyval_info_nocopy(THREAD_ENTRY *thread_p, char *datap, int data_size, BTID_INT *btid, OID *cls_oid, OID *oid, BTREE_MVCC_INFO *mvcc_info, DB_VALUE *key)
Definition: btree.c:17473
int pkeys_val_num
Definition: btree.c:348
static int xbtree_test_unique(THREAD_ENTRY *thread_p, BTID *btid)
Definition: btree.c:6099
#define BTREE_INVALID_INDEX_ID(btid)
Definition: btree_load.h:103
void btree_mvcc_info_from_heap_mvcc_header(MVCC_REC_HEADER *mvcc_header, BTREE_MVCC_INFO *mvcc_info)
Definition: btree.c:28752
int file_dealloc(THREAD_ENTRY *thread_p, const VFID *vfid, const VPID *vpid, FILE_TYPE file_type_hint)
valptr_list_node * output_val_list
Definition: scan_manager.h:131
const MVCCID BTREE_ONLINE_INDEX_NORMAL_FLAG_STATE
Definition: btree.c:1237
static int btree_rv_record_modify_internal(THREAD_ENTRY *thread_p, LOG_RCV *rcv, bool is_undo)
Definition: btree.c:28849
int btree_rv_pagerec_insert(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17347
int btree_write_record(THREAD_ENTRY *thread_p, BTID_INT *btid, void *node_rec, DB_VALUE *key, BTREE_NODE_TYPE node_type, int key_type, int key_len, bool during_loading, OID *class_oid, OID *oid, BTREE_MVCC_INFO *mvcc_info, RECDES *rec)
Definition: btree.c:4076
#define LOG_DATA_ADDR_INITIALIZER
Definition: log_append.hpp:63
STATIC_INLINE void btree_delete_sysop_end(THREAD_ENTRY *thread_p, BTREE_DELETE_HELPER *helper) __attribute__((ALWAYS_INLINE))
Definition: btree.c:32963
int db_string_unique_prefix(const DB_VALUE *db_string1, const DB_VALUE *db_string2, DB_VALUE *db_result, TP_DOMAIN *key_domain)
int dis_key_cnt
Definition: btree.h:357
void LSA_COPY(log_lsa *plsa1, const log_lsa *plsa2)
Definition: log_lsa.hpp:139
void log_sysop_end_logical_compensate(THREAD_ENTRY *thread_p, LOG_LSA *undo_nxlsa)
Definition: log_manager.c:3963
static int btree_range_scan_resume(THREAD_ENTRY *thread_p, BTREE_SCAN *bts)
Definition: btree.c:24304
#define SHA1_HASH_INITIALIZER
Definition: sha1.h:54
static int btree_overflow_record_replace_object(THREAD_ENTRY *thread_p, BTID_INT *btid_int, BTREE_DELETE_HELPER *delete_helper, PAGE_PTR overflow_page, RECDES *overflow_record, int *offset_to_replaced_object, BTREE_OBJECT_INFO *replacing_object)
Definition: btree.c:32430
static int btree_undo_insert_object(THREAD_ENTRY *thread_p, BTID *btid, OR_BUF *buffered_key, OID *oid, OID *class_oid, MVCCID insert_mvccid, LOG_LSA *undo_nxlsa)
Definition: btree.c:29494
VPID C_vpid
Definition: btree.h:173
#define BTREE_OID_GET_MVCC_FLAGS(oid_ptr)
Definition: btree.c:155
static char * btree_unpack_object(char *ptr, BTID_INT *btid_int, BTREE_NODE_TYPE node_type, RECDES *record, int after_key_offset, OID *oid, OID *class_oid, BTREE_MVCC_INFO *mvcc_info)
Definition: btree.c:21490
int oid_pos
Definition: btree.h:189
DISK_ISVALID btree_check_tree(THREAD_ENTRY *thread_p, const OID *class_oid_p, BTID *btid, const char *btname)
Definition: btree.c:7711
SCAN_PRED * scan_pred
static void BTREE_RV_GET_DATA_LENGTH(const char *rv_ptr, const char *rv_start, int &rv_length)
Definition: btree.c:1111
char * rv_keyval_data
Definition: btree.c:744
static int btree_range_scan_count_oids_leaf_and_one_ovf(THREAD_ENTRY *thread_p, BTREE_SCAN *bts)
Definition: btree.c:24178
int get_index_size_of_value(const DB_VALUE *value) const
int spage_insert(THREAD_ENTRY *thread_p, PAGE_PTR page_p, RECDES *record_descriptor_p, PGSLOTID *out_slot_id_p)
static int btree_find_next_index_record(THREAD_ENTRY *thread_p, BTREE_SCAN *bts)
Definition: btree.c:15442
#define BTREE_DELETE_MODIFY_ARGS(thread_p, helper, page, save_lsa, is_leaf, slotid, new_size, btid)
Definition: btree.c:1224
static int btree_key_remove_insert_mvccid(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:31728
LOG_RCVINDEX rcvindex
Definition: btree.c:743
SHA1Hash printed_key_sha1
Definition: btree.c:825
#define VPID_COPY(dest_ptr, src_ptr)
Definition: dbtype_def.h:909
int db_get_int(const DB_VALUE *value)
static void btree_record_remove_object_internal(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record, BTREE_NODE_TYPE node_type, int offset_to_object, char **rv_undo_data, char **rv_redo_data, int *displacement)
Definition: btree.c:31321
int ils_prefix_len
const log_rv_record_flag_type LOG_RV_RECORD_UPDATE_ALL
Definition: log_append.hpp:136
int partition_load_pruning_context(THREAD_ENTRY *thread_p, const OID *class_oid, int pruning_type, PRUNING_CONTEXT *pinfo)
Definition: partition.c:2249
static void btree_print_space(FILE *fp, int n)
Definition: btree.c:8820
static int btree_leaf_record_replace_first_with_last(THREAD_ENTRY *thread_p, BTID_INT *btid_int, BTREE_DELETE_HELPER *delete_helper, PAGE_PTR leaf_page, RECDES *leaf_record, BTREE_SEARCH_KEY_HELPER *search_key, OID *last_oid, OID *last_class_oid, BTREE_MVCC_INFO *last_mvcc_info, int offset_to_last_object)
Definition: btree.c:31137
int tp_more_general_type(const DB_TYPE type1, const DB_TYPE type2)
int btree_get_stats(THREAD_ENTRY *thread_p, BTREE_STATS *stat_info_p, bool with_fullscan)
Definition: btree.c:6959
static int btree_node_common_prefix(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr)
Definition: btree.c:12380
DB_TYPE
Definition: dbtype_def.h:670
LOG_RCVINDEX
Definition: recovery.h:36
#define BTS_IS_SOFT_CAPACITY_ENOUGH(bts, count)
Definition: btree.c:625
#define MVCCID_IS_NOT_ALL_VISIBLE(id)
BTREE_NODE_TYPE node_type
Definition: btree.c:374
unsigned int btree_hash_btid(void *btid, int hash_size)
Definition: btree.c:32874
void log_rv_dump_hexa(FILE *fp, int length, void *data)
Definition: log_manager.c:8734
#define ER_FAILED
Definition: error_code.h:47
BTREE_ROOT_HEADER * btree_get_root_header(THREAD_ENTRY *thread_p, PAGE_PTR page_ptr)
Definition: btree_load.c:309
VFID ovfid
Definition: btree.h:129
static bool btree_is_online_index_loading(BTREE_OP_PURPOSE purpose)
Definition: btree.c:35253
void LOG_RV_RECORD_SET_MODIFY_MODE(log_data_addr *addr, log_rv_record_flag_type mode)
Definition: log_append.hpp:198
void set_value(DB_VALUE &dest_value, DB_VALUE &src_value, bool &clear_src_value)
Definition: btree.c:35469
void spage_initialize(THREAD_ENTRY *thread_p, PAGE_PTR page_p, INT16 slot_type, unsigned short alignment, bool is_saving)
STATIC_INLINE const char * btree_purpose_to_string(BTREE_OP_PURPOSE purpose) __attribute__((ALWAYS_INLINE))
Definition: btree.c:33076
static void btree_record_object_set_mvcc_flags(char *data, short mvcc_flags)
Definition: btree.c:3456
bool mvcc_is_mvcc_disabled_class(const OID *class_oid)
Definition: mvcc.c:616
#define BTREE_SPLIT_MIN_PIVOT
Definition: btree.c:73
LOG_GLOBAL log_Gl
LOG_HEADER hdr
Definition: log_impl.h:653
static int btree_insert_internal(THREAD_ENTRY *thread_p, BTID *btid, DB_VALUE *key, OID *class_oid, OID *oid, int op_type, btree_unique_stats *unique_stat_info, int *unique, BTREE_MVCC_INFO *mvcc_info, LOG_LSA *undo_nxlsa, BTREE_OP_PURPOSE purpose)
Definition: btree.c:26069
int db_get_string_collation(const DB_VALUE *value)
int pr_midxkey_unique_prefix(const DB_VALUE *db_midxkey1, const DB_VALUE *db_midxkey2, DB_VALUE *db_result)
#define ALWAYS_INLINE
#define BTREE_DEBUG_TEST_SPLIT
Definition: btree.c:68
#define BTREE_NODE_MAX_SPLIT_SIZE(thread_p, page_ptr)
Definition: btree.c:79
#define ER_BTREE_UNKNOWN_KEY
Definition: error_code.h:483
char * log_rv_pack_undo_record_changes(char *ptr, int offset_to_data, int old_data_size, int new_data_size, char *old_data)
int pr_midxkey_common_prefix(DB_VALUE *key1, DB_VALUE *key2)
static int object_size(SM_CLASS *class_, MOBJ obj, int *offset_size_ptr)
Definition: transform_cl.c:655
DISK_ISVALID file_check_vpid(THREAD_ENTRY *thread_p, const VFID *vfid, const VPID *vpid_lookup)
int btree_rv_keyval_undo_insert(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17696
BTREE_TYPE
struct tp_domain * setdomain
Definition: object_domain.h:82
#define NON_LEAF_RECORD_SIZE
static int btree_key_insert_new_object(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:27213
BTREE_MERGE_STATUS
Definition: btree.c:306
int btree_reflect_global_unique_statistics(THREAD_ENTRY *thread_p, GLOBAL_UNIQUE_STATS *unique_stat_info, bool only_active_tran)
Definition: btree.c:14049
bool check_key_deleted
Definition: btree.c:829
#define DISK_VPID_SIZE
DB_VALUE max_key
Definition: btree_load.h:239
static int btree_key_append_object_non_unique(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR leaf, BTREE_SEARCH_KEY_HELPER *search_key, RECDES *leaf_record, int offset_after_key, LEAF_REC *leaf_info, BTREE_OBJECT_INFO *btree_obj, BTREE_INSERT_HELPER *insert_helper)
Definition: btree.c:28020
MVCCID logtb_find_current_mvccid(THREAD_ENTRY *thread_p)
static int btree_key_remove_object_and_keep_visible_first(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:30844
bool end_one_iteration
Definition: btree.h:227
static int btree_key_online_index_tran_insert(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:33875
static bool btree_online_index_is_delete_flag_state(MVCCID state)
Definition: btree.c:33229
bool * is_desc_order
Definition: scan_manager.h:156
int update_boundary_lt(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr, RECDES &left_subtree_rec, DB_VALUE &subtree_value, bool &clear_subtree_value)
Definition: btree.c:35561
int btree_rv_ovfid_undoredo_update(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17039
#define pgbuf_unfix(thread_p, pgptr)
Definition: page_buffer.h:276
#define OR_BUF_INIT(buf, data, size)
OID class_oid
Definition: btree.c:318
MULTI_RANGE_OPT multi_range_opt
Definition: scan_manager.h:225
void btree_rv_ovfid_dump(FILE *fp, int length, void *data)
Definition: btree.c:17079
#define BTREE_SET_CREATED_OVERFLOW_KEY_NOTIFICATION(THREAD, KEY, OID, C_OID, BTID, BTNM)
Definition: btree_load.h:159
static PAGE_PTR btree_find_rightmost_leaf(THREAD_ENTRY *thread_p, BTID *btid, VPID *pg_vpid, BTREE_STATS *stat_info_p)
Definition: btree.c:14326
int logtb_rv_update_global_unique_stats_by_abs(THREAD_ENTRY *thread_p, BTID *btid, int num_oids, int num_nulls, int num_keys)
static bool btree_online_index_is_normal_state(MVCCID state)
Definition: btree.c:33235
static void btree_delete_helper_to_insert_helper(BTREE_DELETE_HELPER *delete_helper, BTREE_INSERT_HELPER *insert_helper)
Definition: btree.c:35223
DB_VALUE_COMPARE_RESULT tp_value_compare_with_error(const DB_VALUE *value1, const DB_VALUE *value2, int do_coercion, int total_order, bool *can_compare)
DB_LOGICAL eval_key_filter(THREAD_ENTRY *thread_p, DB_VALUE *value, FILTER_INFO *filterp)
#define BTREE_RV_HAS_DEBUG_INFO(flags)
Definition: btree.c:1082
struct btree_node_header BTREE_NODE_HEADER
Definition: btree_load.h:193
BTREE_MVCC_INFO mvcc_info
Definition: btree.h:531
int xbtree_get_unique_pk(THREAD_ENTRY *thread_p, BTID *btid)
Definition: btree.c:6129
static SCAN_CODE btree_scan_for_show_index_header(THREAD_ENTRY *thread_p, DB_VALUE **out_values, int out_cnt, const char *class_name, OR_INDEX *index_p, OID *class_oid_p)
Definition: btree.c:20806
static int btree_fix_root_for_insert(THREAD_ENTRY *thread_p, BTID *btid, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *root_page, bool *is_leaf, BTREE_SEARCH_KEY_HELPER *search_key, bool *stop, bool *restart, void *other_args)
Definition: btree.c:26247
static int btree_key_online_index_tran_delete(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:34102
#define ASSERT_ERROR_AND_SET(error_code)
#define btree_insert_log(helper, msg,...)
Definition: btree.c:1149
int or_get_oid(OR_BUF *buf, OID *oid)
static int btree_key_insert_new_key(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR leaf_page, BTREE_INSERT_HELPER *insert_helper, BTREE_SEARCH_KEY_HELPER *search_key)
Definition: btree.c:27395
void thread_sleep(double millisec)
char * printed_key
Definition: btree.c:824
SPAGE_SLOT * spage_get_slot(PAGE_PTR page_p, PGSLOTID slot_id)
BTREE_OBJECT_INFO object_info
Definition: btree.c:815
#define LSA_INITIALIZER
Definition: log_lsa.hpp:76
#define OR_MVCC_FLAG_VALID_INSID
TP_DOMAIN * nonleaf_key_type
Definition: btree.h:125
int btree_glean_root_header_info(THREAD_ENTRY *thread_p, BTREE_ROOT_HEADER *root_header, BTID_INT *btid)
Definition: btree.c:5797
static int btree_or_get_mvccinfo(OR_BUF *buf, BTREE_MVCC_INFO *mvcc_info, short btree_mvcc_flags)
Definition: btree.c:21426
#define BTREE_NODE_SCAN_IS_QUEUE_EMPTY(bns)
Definition: btree.h:447
char packed_key_domain[1]
Definition: btree_load.h:219
#define BTREE_REC_SATISFIES_SNAPSHOT_HELPER_INITIALIZER
Definition: btree.c:495
int btree_attrinfo_read_dbvalues(THREAD_ENTRY *thread_p, DB_VALUE *curr_key, int *btree_att_ids, int btree_num_att, HEAP_CACHE_ATTRINFO *attr_info, int func_index_col_id)
Definition: btree.c:15938
#define assert_release(e)
Definition: error_manager.h:96
#define BTREE_DEBUG_DUMP_SIMPLE
Definition: btree.c:62
#define LOG_CHECK_LOG_APPLIER(thread_p)
Definition: log_impl.h:240
void pgbuf_set_dirty(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, bool free_page)
Definition: page_buffer.c:4280
void scan_init_index_scan(INDX_SCAN_ID *isidp, struct btree_iscan_oid_list *oid_list, MVCC_SNAPSHOT *mvcc_snapshot)
Definition: scan_manager.c:283
int btree_insert(THREAD_ENTRY *thread_p, BTID *btid, DB_VALUE *key, OID *cls_oid, OID *oid, int op_type, btree_unique_stats *unique_stat_info, int *unique, MVCC_REC_HEADER *p_mvcc_rec_header)
Definition: btree.c:25969
MVCCID delete_mvccid
Definition: btree.h:518
#define BTREE_OID_IS_RECORD_FLAG_SET(oid_ptr, mvcc_flag)
Definition: btree.c:145
SCAN_CODE overflow_get(THREAD_ENTRY *thread_p, const VPID *ovf_vpid, RECDES *recdes, MVCC_SNAPSHOT *mvcc_snapshot)
int util_byte_to_size_string(char *buf, size_t len, UINT64 size_num)
Definition: util_common.c:955
#define VACUUM_IS_THREAD_VACUUM
Definition: vacuum.h:215
int btree_rv_remove_marked_for_delete(THREAD_ENTRY *thread_p, LOG_RCV *rcv)
Definition: btree.c:17914
static bool btree_is_class_oid_packed(BTID_INT *btid_int, RECDES *record, BTREE_NODE_TYPE node_type, bool is_first)
Definition: btree.c:34762
void pgbuf_notify_vacuum_follows(THREAD_ENTRY *thread_p, PAGE_PTR page)
union regu_variable_node::regu_data_value value
bool is_crt_node_write_latched
Definition: btree.c:723
const TP_DOMAIN * m_key_type
Definition: btree.h:584
#define BTREE_RV_IS_UPDATE_MAX_KEY_LEN(flags)
Definition: btree.c:1090
#define BTREE_RV_FLAGS_MASK
Definition: btree.c:1014
#define LOFFS2
Definition: btree.c:338
void log_append_undo_data2(THREAD_ENTRY *thread_p, LOG_RCVINDEX rcvindex, const VFID *vfid, PAGE_PTR pgptr, PGLENGTH offset, int length, const void *data)
Definition: log_manager.c:1933
int btree_rv_roothdr_undo_update(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:16966
static int btree_range_opt_check_add_index_key(THREAD_ENTRY *thread_p, BTREE_SCAN *bts, MULTI_RANGE_OPT *multi_range_opt, OID *p_new_oid, bool *key_added)
Definition: btree.c:18845
bool iss_get_first_result_only
Definition: btree.c:375
DB_VALUE_COMPARE_RESULT cmpval(const DB_VALUE *value, const DB_VALUE *value2, int do_coercion, int total_order, int *start_colp, int collation) const
int pkeys_size
Definition: statistics.h:66
static int btree_merge_node(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR P, PAGE_PTR Q, PAGE_PTR R, INT16 p_slot_id, VPID *child_vpid, BTREE_MERGE_STATUS status)
Definition: btree.c:10026
int lock_object(THREAD_ENTRY *thread_p, const OID *oid, const OID *class_oid, LOCK lock, int cond_flag)
#define MVCCID_NULL
void log_sysop_start(THREAD_ENTRY *thread_p)
Definition: log_manager.c:3578
int avg_key_len
Definition: btree.h:366
int heap_scancache_quick_start_root_hfid(THREAD_ENTRY *thread_p, HEAP_SCANCACHE *scan_cache)
Definition: heap_file.c:19255
#define OR_MVCC_FLAG_VALID_DELID
#define OR_MVCCID_SIZE
static const char * node_type_to_string(short node_type)
Definition: btree.c:20320
#define OID_MSG_BUF_SIZE
Definition: btree.c:82
GLOBAL_UNIQUE_STATS_TABLE unique_stats_table
Definition: log_impl.h:685
int btree_rv_keyval_undo_online_index_tran_insert(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:35150
QFILE_TUPLE_RECORD * tplrec
Definition: scan_manager.h:128
static void btree_online_index_set_insert_flag_state(MVCCID &state)
Definition: btree.c:33241
page_key_boundary m_boundaries
Definition: btree.h:588
static int btree_node_size_uncompressed(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr)
Definition: btree.c:10537
static void btree_perf_ovf_oids_fix_time(THREAD_ENTRY *thread_p, PERF_UTIME_TRACKER *track)
Definition: btree.c:979
static int btree_delete_meta_record(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr, int slot_id)
Definition: btree.c:9617
bool oid_is_serial(const OID *oid)
Definition: oid.c:159
int index_writeval(struct or_buf *buf, const DB_VALUE *value) const
#define OID_SET_NULL(oidp)
Definition: oid.h:85
#define os_free_and_init(ptr)
Definition: memory_alloc.h:153
#define BTREE_MVCC_INFO_DELID(mvcc_info)
Definition: btree.c:185
int tp_value_coerce_strict(const DB_VALUE *src, DB_VALUE *dest, const TP_DOMAIN *desired_domain)
#define OID_LT(oidp1, oidp2)
Definition: oid.h:113
#define NULL_SLOTID
#define OR_PUT_VPID_ALIGNED(ptr, vpid)
#define ER_BTREE_INVALID_INDEX_ID
Definition: error_code.h:482
#define LOFFS3
Definition: btree.c:339
DISK_ISVALID not_vacuumed_res
Definition: scan_manager.h:231
bool btree_multicol_key_is_null(DB_VALUE *key)
Definition: btree.c:18033
#define PEEK_KEY_VALUE
Definition: btree_load.h:44
bool LOG_RV_RECORD_IS_INSERT(log_rv_record_flag_type flags)
Definition: log_append.hpp:174
TP_DOMAIN * tp_domain_resolve_value(const DB_VALUE *val, TP_DOMAIN *dbuf)
int btree_node_number_of_keys(THREAD_ENTRY *thread_p, PAGE_PTR page_ptr)
Definition: btree_load.c:3755
char * data
static DISK_ISVALID btree_repair_prev_link_by_btid(THREAD_ENTRY *thread_p, BTID *btid, bool repair, char *index_name)
Definition: btree.c:7949
#define TP_IS_STRING_TYPE(typeid)
#define BTREE_MVCC_INFO_SET_DELID(mvcc_info, delid)
Definition: btree.c:231
bool clear_cur_key
Definition: btree.h:192
MVCCID mvcc_id
Definition: recovery.h:198
INT16 slot_id
Definition: btree.h:186
int32_t pageid
Definition: dbtype_def.h:879
#define FI_SET(th, code, state)
INT32 root_pageid
INT16 rec_cnt
Definition: btree.c:290
#define ER_BTREE_INVALID_RANGE
Definition: error_code.h:488
#define BTREE_LEAF_RECORD_FENCE
Definition: btree.c:104
static int btree_key_append_object_to_overflow(THREAD_ENTRY *thread_p, BTID_INT *btid_int, PAGE_PTR ovfl_page, BTREE_OBJECT_INFO *object_info, BTREE_INSERT_HELPER *insert_helper)
Definition: btree.c:10892
#define MULTI_ROW_DELETE
Definition: btree.h:57
BTID_INT btid_int
Definition: btree.h:391
#define BTID_IS_EQUAL(b1, b2)
static int btree_verify_leaf_node(THREAD_ENTRY *thread_p, BTID_INT *btid_int, PAGE_PTR page_ptr)
Definition: btree.c:19647
void btree_init_temp_key_value(bool *clear_flag, DB_VALUE *key_value)
Definition: btree.c:1938
int SHA1Compute(const unsigned char *message_array, size_t length, SHA1Hash *hash)
Definition: sha1.c:372
int btree_rv_undo_global_unique_stats_commit(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:22356
int er_errid(void)
#define BTREE_OID_CLEAR_RECORD_FLAGS(oid_ptr)
Definition: btree.c:131
int btree_find_min_or_max_key(THREAD_ENTRY *thread_p, BTID *btid, DB_VALUE *key, int find_min_key)
Definition: btree.c:16284
#define BTREE_MVCC_INFO_SET_FIXED_SIZE(mvcc_info)
Definition: btree.c:191
#define VACUUM_ER_LOG_WORKER
Definition: vacuum.h:53
DISK_ISVALID btree_check_by_class_oid(THREAD_ENTRY *thread_p, OID *cls_oid, BTID *idx_btid)
Definition: btree.c:7885
int file_get_sticky_first_page(THREAD_ENTRY *thread_p, const VFID *vfid, VPID *vpid_out)
#define MVCC_SET_DELID(header, mvcc_id)
Definition: mvcc.h:60
int btree_get_asc_desc(THREAD_ENTRY *thread_p, BTID *btid, int col_idx, int *asc_desc)
Definition: btree.c:18408
#define SP_SUCCESS
Definition: slotted_page.h:50
#define VPID_INITIALIZER
Definition: dbtype_def.h:894
void btree_keyoid_checkscan_end(THREAD_ENTRY *thread_p, BTREE_CHECKSCAN *btscan)
Definition: btree.c:8445
#define OFFS3
Definition: btree.c:334
#define BTREE_ID_MSG
Definition: btree.c:1155
MVCC_SATISFIES_SNAPSHOT_RESULT mvcc_satisfies_dirty(THREAD_ENTRY *thread_p, MVCC_REC_HEADER *rec_header, MVCC_SNAPSHOT *snapshot)
Definition: mvcc.c:501
char * or_pack_oid(char *ptr, const OID *oid)
bool log_operations
Definition: btree.c:826
struct recset_header RECSET_HEADER
Definition: btree.c:287
#define OR_MULTI_BOUND_BIT_BYTES(count)
#define BTREE_IS_UNIQUE(unique_pk)
Definition: btree.h:89
#define BTREE_SPLIT_MAX_PIVOT
Definition: btree.c:74
RANGE_OPT_ITEM ** buffer
Definition: scan_manager.h:161
#define OR_SHORT_SIZE
int tot_val_cnt
Definition: btree.h:358
#define COMMON_PREFIX_UNKNOWN
Definition: btree.h:251
#define PTR_ALIGN(addr, boundary)
Definition: memory_alloc.h:77
void prepare_list(void)
Definition: btree.c:35706
struct btree_overflow_header BTREE_OVERFLOW_HEADER
Definition: btree_load.h:223
void lock_unlock_object_donot_move_to_non2pl(THREAD_ENTRY *thread_p, const OID *oid, const OID *class_oid, LOCK lock)
const char * name
#define MULTI_ROW_INSERT
Definition: btree.h:56
#define FORCE_MERGE_WHEN_EMPTY
Definition: btree.c:95
#define OID_AS_ARGS(oidp)
Definition: oid.h:39
char * rv_redo_data
Definition: btree.c:837
OID * get_oid()
Definition: btree.h:616
int btree_check_valid_record(THREAD_ENTRY *thread_p, BTID_INT *btid, RECDES *recp, BTREE_NODE_TYPE node_type, DB_VALUE *key)
Definition: btree.c:21828
#define OR_OID_SLOTID
DB_VALUE * lower_key
Definition: btree.h:141
#define BTREE_MAX_OIDCOUNT_IN_OVERFLOW_RECORD(btid)
Definition: btree_load.h:149
bool LSA_LT(const log_lsa *plsa1, const log_lsa *plsa2)
Definition: log_lsa.hpp:174
TP_DOMAIN tp_Null_domain
void log_sysop_end_logical_undo(THREAD_ENTRY *thread_p, LOG_RCVINDEX rcvindex, const VFID *vfid, int undo_size, const char *undo_data)
Definition: log_manager.c:3920
enum tp_domain_status TP_DOMAIN_STATUS
bool spage_check_num_slots(THREAD_ENTRY *thread_p, PAGE_PTR page_p)
#define er_log_debug(...)
int nleaf_pg_cnt
Definition: btree.h:361
PGSLOTID spage_delete_for_recovery(THREAD_ENTRY *thread_p, PAGE_PTR page_p, PGSLOTID slot_id)
HL_HEAPID db_change_private_heap(THREAD_ENTRY *thread_p, HL_HEAPID heap_id)
Definition: memory_alloc.c:337
#define VPID_AS_ARGS(vpidp)
Definition: dbtype_def.h:896
static INLINE bool btree_record_object_is_flagged(char *data, short mvcc_flag) __attribute__((ALWAYS_INLINE))
Definition: btree.c:3423
DB_VALUE_COMPARE_RESULT btree_compare_key(DB_VALUE *key1, DB_VALUE *key2, TP_DOMAIN *key_domain, int do_coercion, int total_order, int *start_colp)
Definition: btree.c:18636
#define BTREE_OBJECT_INFO_INITIALIZER
Definition: btree.h:533
static void btree_insert_object_ordered_by_oid(THREAD_ENTRY *thread_p, RECDES *record, BTID_INT *btid_int, BTREE_OBJECT_INFO *object_info, char **rv_undo_data_ptr, char **rv_redo_data_ptr, int *offset_to_objptr)
Definition: btree.c:3849
stat_type get_key_count() const
int btree_index_start_scan(THREAD_ENTRY *thread_p, int show_type, DB_VALUE **arg_values, int arg_cnt, void **ptr)
Definition: btree.c:20523
MVCCID lowest_active_mvccid
Definition: mvcc.h:171
key_val_range * key_vals
Definition: scan_manager.h:215
int heap_scancache_end(THREAD_ENTRY *thread_p, HEAP_SCANCACHE *scan_cache)
Definition: heap_file.c:7195
bool is_system_op_started
Definition: btree.c:840
#define BTREE_INSERT_MODIFY_ARGS(thread_p, helper, page, save_lsa, is_leaf, slotid, new_size, btid)
Definition: btree.c:1209
int rv_keyval_data_length
Definition: btree.c:836
const MVCCID BTREE_ONLINE_INDEX_INSERT_FLAG_STATE
Definition: btree.c:1238
short key_len
Definition: btree.h:114
static int btree_rv_write_log_record_for_key_insert(char *log_rec, int *log_length, INT16 key_len, RECDES *recp)
Definition: btree.c:10983
void _er_log_debug(const char *file_name, const int line_no, const char *fmt,...)
BTID * xbtree_add_index(THREAD_ENTRY *thread_p, BTID *btid, TP_DOMAIN *key_type, OID *class_oid, int attr_id, int unique_pk, int num_oids, int num_nulls, int num_keys)
Definition: btree.c:5579
char * rv_redo_data_ptr
Definition: btree.c:838
INDX_COV indx_cov
Definition: scan_manager.h:224
ATTR_ID * vstr_ids
#define OR_GET_MVCCID
static int btree_record_remove_object(THREAD_ENTRY *thread_p, BTID_INT *btid_int, BTREE_DELETE_HELPER *delete_helper, PAGE_PTR page, RECDES *record, BTREE_SEARCH_KEY_HELPER *search_key, BTREE_NODE_TYPE node_type, int offset_to_object, LOG_DATA_ADDR *addr)
Definition: btree.c:31228
static int btree_find_oid_with_page_and_record(THREAD_ENTRY *thread_p, BTID_INT *btid_int, OID *oid, PAGE_PTR leaf_page, BTREE_OP_PURPOSE purpose, BTREE_MVCC_INFO *match_mvccinfo, RECDES *record, LEAF_REC *leaf_info, int offset_after_key, PAGE_PTR *found_page, PAGE_PTR *prev_page, int *offset_to_object, BTREE_MVCC_INFO *object_mvcc_info, RECDES *new_record)
Definition: btree.c:35044
#define MAX_ALIGNMENT
Definition: memory_alloc.h:70
DB_VALUE m_right_key
Definition: btree.h:546
static int btree_fix_ovfl_oid_pages_tree(THREAD_ENTRY *thread_p, BTID *btid, char *btname)
Definition: btree.c:19284
static int btree_find_next_index_record_holding_current_helper(THREAD_ENTRY *thread_p, BTREE_SCAN *bts, PAGE_PTR first_page)
Definition: btree.c:15622
#define COPY_OID(dest_oid_ptr, src_oid_ptr)
Definition: oid.h:63
static int btree_key_online_index_tran_insert_DF(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:34412
#define BTREE_MAX_KEYLEN_INPAGE
Definition: btree_load.h:135
char * log_rv_pack_redo_record_changes(char *ptr, int offset_to_data, int old_data_size, int new_data_size, char *new_data)
SCAN_CODE spage_get_record(THREAD_ENTRY *thread_p, PAGE_PTR page_p, PGSLOTID slot_id, RECDES *record_descriptor_p, int is_peeking)
char * or_pack_mvccid(char *ptr, const MVCCID mvccid)
RECDES key_record
Definition: btree.h:217
int btree_rv_undo_mark_dealloc_page(THREAD_ENTRY *thread_p, LOG_RCV *rcv)
Definition: btree.c:32847
int btree_range_scan_select_visible_oids(THREAD_ENTRY *thread_p, BTREE_SCAN *bts)
Definition: btree.c:25122
int xlogtb_reset_wait_msecs(THREAD_ENTRY *thread_p, int wait_msecs)
#define vacuum_er_log_warning(er_log_level, msg,...)
Definition: vacuum.h:73
static bool btree_is_single_object_key(THREAD_ENTRY *thread_p, BTID_INT *btid_int, BTREE_NODE_TYPE node_type, RECDES *record, int offset_after_key)
Definition: btree.c:35373
PERF_UTIME_TRACKER time_track
Definition: btree.c:452
int m_ovf_appends
Definition: btree.h:594
#define BTREE_MVCC_INFO_IS_INSID_NOT_ALL_VISIBLE(mvcc_info)
Definition: btree.c:173
static int btree_find_oid_and_its_page(THREAD_ENTRY *thread_p, BTID_INT *btid_int, OID *oid, PAGE_PTR leaf_page, BTREE_OP_PURPOSE purpose, BTREE_MVCC_INFO *match_mvccinfo, RECDES *leaf_record, LEAF_REC *leaf_rec_info, int after_key_offset, PAGE_PTR *found_page, PAGE_PTR *prev_page, int *offset_to_object, BTREE_MVCC_INFO *object_mvcc_info)
Definition: btree.c:11086
#define DONT_SAFEGUARD_RVSPACE
Definition: slotted_page.h:54
int tot_pg_cnt
Definition: btree.h:362
int file_create_with_npages(THREAD_ENTRY *thread_p, FILE_TYPE file_type, int npages, FILE_DESCRIPTORS *des, VFID *vfid)
#define FI_RESET(th, code)
#define VFID_ISNULL(vfid_ptr)
Definition: file_manager.h:72
int copy_buf_len
Definition: btree.h:131
void THREAD_ENTRY
PGBUF_LATCH_MODE nonleaf_latch_mode
Definition: btree.c:818
static int btree_read_record_without_decompression(THREAD_ENTRY *thread_p, BTID_INT *btid, RECDES *Rec, DB_VALUE *key, void *rec_header, BTREE_NODE_TYPE node_type, bool *clear_key, int *offset, int copy)
Definition: btree.c:4323
int btree_index_end_scan(THREAD_ENTRY *thread_p, void **ptr)
Definition: btree.c:20769
static int btree_delete_postponed(THREAD_ENTRY *thread_p, BTID *btid, OR_BUF *buffered_key, BTREE_OBJECT_INFO *btree_obj, MVCCID tran_mvccid, LOG_LSA *reference_lsa)
Definition: btree.c:29572
bool need_to_check_null
Definition: btree.h:219
#define NULL_PAGEID
#define MVCCID_ALL_VISIBLE
#define pgbuf_unfix_and_init(thread_p, pgptr)
Definition: page_buffer.h:63
static void btree_key_record_check_no_visible(THREAD_ENTRY *thread_p, BTID_INT *btid_int, PAGE_PTR leaf_page, PGSLOTID slotid)
Definition: btree.c:28712
#define MVCC_SET_INSID(header, mvcc_id)
Definition: mvcc.h:54
#define BTREE_DELETE_MVCC_INFO(helper)
Definition: btree.c:883
#define BTREE_RV_DEBUG_INFO_MAX_SIZE
Definition: btree.c:1071
static int btree_record_get_num_visible_oids(THREAD_ENTRY *thread_p, BTID_INT *btid, RECDES *rec, int oid_offset, BTREE_NODE_TYPE node_type, int *max_visible_oids, MVCC_SNAPSHOT *mvcc_snapshot, int *num_visible)
Definition: btree.c:2629
#define BTREE_DELETE_MODIFY_MSG(desc)
Definition: btree.c:1217
int btree_rv_save_keyval_for_undo_two_objects(BTID_INT *btid, DB_VALUE *key, BTREE_OBJECT_INFO *first_version, BTREE_OBJECT_INFO *second_version, BTREE_OP_PURPOSE purpose, char *preallocated_buffer, char **data, int *capacity, int *length)
Definition: btree.c:16714
#define COPY_KEY_VALUE
Definition: btree_load.h:45
static int btree_get_next_page_vpid(THREAD_ENTRY *thread_p, PAGE_PTR leaf_page, VPID *next_vpid)
Definition: btree.c:18535
void vacuum_log_add_dropped_file(THREAD_ENTRY *thread_p, const VFID *vfid, const OID *class_oid, bool pospone_or_undo)
Definition: vacuum.c:6024
#define BTREE_INSERT_HELPER_AS_ARGS(helper)
Definition: btree.c:1180
void btree_dump(THREAD_ENTRY *thread_p, FILE *fp, BTID *btid, int level)
Definition: btree.c:9029
int btree_create_overflow_key_file(THREAD_ENTRY *thread_p, BTID_INT *btid)
Definition: btree.c:1953
int or_put_mvccid(OR_BUF *buf, MVCCID mvccid)
#define ER_BTREE_CORRUPT_PREV_LINK
Definition: error_code.h:1450
int read_keys
Definition: btree.h:204
static int btree_split_next_pivot(BTREE_NODE_SPLIT_INFO *split_info, float new_value, int max_index)
Definition: btree.c:12296
int spage_update(THREAD_ENTRY *thread_p, PAGE_PTR page_p, PGSLOTID slot_id, const RECDES *record_descriptor_p)
LOCK
int btree_keyoid_checkscan_start(THREAD_ENTRY *thread_p, BTID *btid, BTREE_CHECKSCAN *btscan)
Definition: btree.c:8330
MIN_MAX_COLUMN_TYPE type
Definition: dbtype_def.h:857
static void btree_record_append_object(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record, BTREE_NODE_TYPE node_type, BTREE_OBJECT_INFO *object_info, char **rv_undo_data_ptr, char **rv_redo_data_ptr)
Definition: btree.c:3774
bool is_unique_key_added_or_deleted
Definition: btree.c:727
#define FREE(PTR)
Definition: cas_common.h:56
int btree_rv_newpage_redo_init(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17442
int n_oids_read
Definition: btree.h:231
static char * btree_pack_object(char *ptr, BTID_INT *btid_int, BTREE_NODE_TYPE node_type, RECDES *record, BTREE_OBJECT_INFO *object_info)
Definition: btree.c:21518
void log_append_redo_data2(THREAD_ENTRY *thread_p, LOG_RCVINDEX rcvindex, const VFID *vfid, PAGE_PTR pgptr, PGLENGTH offset, int length, const void *data)
Definition: log_manager.c:1995
#define BTREE_FIND_FK_OBJECT_INITIALIZER
Definition: btree.c:700
#define BTREE_DEBUG_HEALTH_FULL
Definition: btree.c:66
int db_make_string(DB_VALUE *value, DB_CONST_C_CHAR str)
static int btree_undo_mvcc_delete(THREAD_ENTRY *thread_p, BTID *btid, OR_BUF *buffered_key, OID *oid, OID *class_oid, BTREE_MVCC_INFO *match_mvccinfo, LOG_LSA *undo_nxlsa)
Definition: btree.c:29462
#define BTREE_OBJECT_MAX_SIZE
Definition: btree_load.h:123
static int fixed_pages
Definition: btree.c:19239
#define ER_UNEXPECTED
Definition: error_code.h:1254
static int btree_find_free_overflow_oids_page(THREAD_ENTRY *thread_p, BTID_INT *btid, VPID *first_ovfl_vpid, PAGE_PTR *overflow_page)
Definition: btree.c:11019
static int btree_key_process_objects(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *leaf_record, int after_key_offset, LEAF_REC *leaf_info, BTREE_PROCESS_OBJECT_FUNCTION *func, void *args)
Definition: btree.c:23802
bool pr_is_set_type(DB_TYPE type)
int file_tracker_interruptable_iterate(THREAD_ENTRY *thread_p, FILE_TYPE desired_ftype, VFID *vfid, OID *class_oid)
DB_DATA data
Definition: dbtype_def.h:1083
int file_descriptor_get(THREAD_ENTRY *thread_p, const VFID *vfid, FILE_DESCRIPTORS *desc_out)
void partition_clear_pruning_context(PRUNING_CONTEXT *pinfo)
Definition: partition.c:2380
BTREE_OBJECT_INFO obj_info
Definition: btree.c:711
static int btree_get_max_new_data_size(THREAD_ENTRY *thread_p, BTID_INT *btid_int, PAGE_PTR page, BTREE_NODE_TYPE node_type, int key_len, BTREE_INSERT_HELPER *helper, bool known_to_be_found)
Definition: btree.c:26522
#define OR_GET_BTID(ptr, btid)
int btree_dump_capacity(THREAD_ENTRY *thread_p, FILE *fp, BTID *btid)
Definition: btree.c:8734
DISK_ISVALID btree_check_by_btid(THREAD_ENTRY *thread_p, BTID *btid)
Definition: btree.c:7773
TP_DOMAIN * btree_read_key_type(THREAD_ENTRY *thread_p, BTID *btid)
Definition: btree.c:9079
#define BTREE_SPLIT_DEFAULT_PIVOT
Definition: btree.c:76
bool is_interrupted
Definition: btree.h:228
#define RECORD_MOVE_DATA(rec, dest_offset, src_offset)
int heap_get_class_tde_algorithm(THREAD_ENTRY *thread_p, const OID *class_oid, TDE_ALGORITHM *tde_algo)
Definition: heap_file.c:10737
int file_alloc_sticky_first_page(THREAD_ENTRY *thread_p, const VFID *vfid, FILE_INIT_PAGE_FUNC f_init, void *f_init_args, VPID *vpid_out, PAGE_PTR *page_out)
float tot_space
Definition: btree.h:369
int pr_index_writeval_disk_size(DB_VALUE *value)
#define BTREE_GET_MVCC_INFO_SIZE_FROM_FLAGS(mvcc_flags)
Definition: btree.c:269
OR_BUF * buffered_key
Definition: btree.c:823
static int btree_ils_adjust_range(THREAD_ENTRY *thread_p, BTREE_SCAN *bts)
Definition: btree.c:19904
PR_TYPE * pr_type_from_id(DB_TYPE id)
static bool btree_key_insert_does_leaf_need_split(THREAD_ENTRY *thread_p, BTID_INT *btid_int, PAGE_PTR leaf_page, BTREE_INSERT_HELPER *insert_helper, BTREE_SEARCH_KEY_HELPER *search_key)
Definition: btree.c:27625
static DISK_ISVALID btree_find_key_from_nleaf(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR pg_ptr, int key_cnt, OID *oid, DB_VALUE *key, bool *clear_key)
Definition: btree.c:18184
static int btree_range_scan_start(THREAD_ENTRY *thread_p, BTREE_SCAN *bts)
Definition: btree.c:24216
btree_insert_list()=delete
int spage_check(THREAD_ENTRY *thread_p, PAGE_PTR page_p)
static int btree_find_oid_from_leaf(THREAD_ENTRY *thread_p, BTID_INT *btid, RECDES *leaf_record, int after_key_offset, OID *oid, BTREE_MVCC_INFO *match_mvccinfo, BTREE_OP_PURPOSE purpose, int *offset_to_object, BTREE_MVCC_INFO *mvcc_info)
Definition: btree.c:11380
#define RECDES_INITIALIZER
DISK_ISVALID btree_find_key(THREAD_ENTRY *thread_p, BTID *btid, OID *oid, DB_VALUE *key, bool *clear_key)
Definition: btree.c:18273
static void btree_perf_track_time(THREAD_ENTRY *thread_p, Helper *helper)
Definition: btree.c:889
int btree_rv_redo_record_modify(THREAD_ENTRY *thread_p, LOG_RCV *rcv)
Definition: btree.c:28822
#define VPID_SET(vpid_ptr, volid_value, pageid_value)
Definition: dbtype_def.h:899
void er_set(int severity, const char *file_name, const int line_no, int err_id, int num_args,...)
void btree_clear_mvcc_flags_from_oid(OID *oid)
Definition: btree.c:21763
int spage_max_space_for_new_record(THREAD_ENTRY *thread_p, PAGE_PTR page_p)
Definition: slotted_page.c:984
int part_key_desc
Definition: btree.h:123
char * btree_pack_mvccinfo(char *ptr, BTREE_MVCC_INFO *mvcc_info)
Definition: btree.c:21369
#define BTREE_OID_CLEAR_MVCC_FLAGS(oid_ptr)
Definition: btree.c:128
static bool btree_is_fixed_size(BTID_INT *btid_int, RECDES *record, BTREE_NODE_TYPE node_type, bool is_first)
Definition: btree.c:34797
#define BTREE_INIT_MVCC_HEADER(p_mvcc_rec_header)
Definition: btree.h:451
static int btree_range_scan_find_fk_any_object(THREAD_ENTRY *thread_p, BTREE_SCAN *bts)
Definition: btree.c:25658
int logtb_get_global_unique_stats(THREAD_ENTRY *thread_p, BTID *btid, int *num_oids, int *num_nulls, int *num_keys)
static int btree_merge_root(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR P, PAGE_PTR Q, PAGE_PTR R)
Definition: btree.c:9753
int btree_rv_keyval_undo_delete(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17864
static int btree_key_remove_object(THREAD_ENTRY *thread_p, DB_VALUE *key, BTID_INT *btid_int, BTREE_DELETE_HELPER *delete_helper, PAGE_PTR leaf_page, RECDES *leaf_record, LEAF_REC *leaf_info, int offset_after_key, BTREE_SEARCH_KEY_HELPER *search_key, PAGE_PTR *overflow_page, PAGE_PTR prev_page, BTREE_NODE_TYPE node_type, int offset_to_object)
Definition: btree.c:31389
BTREE_SEARCH xbtree_find_unique(THREAD_ENTRY *thread_p, BTID *btid, SCAN_OPERATION_TYPE scan_op_type, DB_VALUE *key, OID *class_oid, OID *oid, bool is_all_class_srch)
Definition: btree.c:23990
#define BTREE_OBJECT_FIXED_SIZE(btree_info)
Definition: btree_load.h:118
int pr_midxkey_add_elements(DB_VALUE *keyval, DB_VALUE *dbvals, int num_dbvals, struct tp_domain *dbvals_domain_list)
PAGE_FETCH_MODE
Definition: page_buffer.h:160
BTREE_OP_PURPOSE purpose
Definition: btree.c:817
PAGE_PTR pgptr
Definition: recovery.h:199
#define assert(x)
#define BTREE_MAX_OIDCOUNT_IN_LEAF_RECORD(btid)
Definition: btree_load.h:143
void btree_leaf_record_change_overflow_link(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *leaf_record, VPID *new_overflow_vpid, char **rv_undo_data_ptr, char **rv_redo_data_ptr)
Definition: btree.c:2296
#define BTREE_SET_DELETED_OVERFLOW_PAGE_NOTIFICATION(THREAD, KEY, OID, C_OID, BTID)
Definition: btree_load.h:167
int btree_update(THREAD_ENTRY *thread_p, BTID *btid, DB_VALUE *old_key, DB_VALUE *new_key, OID *cls_oid, OID *oid, int op_type, btree_unique_stats *unique_stat_info, int *unique, MVCC_REC_HEADER *p_mvcc_rec_header)
Definition: btree.c:13967
LOG_RECVPHASE rcv_phase
Definition: log_impl.h:662
#define BTREE_MVCC_INFO_IS_DELID_VALID(mvcc_info)
Definition: btree.c:177
BTREE_NODE_HEADER node
Definition: btree_load.h:207
int * pkeys
Definition: statistics.h:67
int btree_online_index_list_dispatcher(THREAD_ENTRY *thread_p, BTID *btid, OID *class_oid, btree_insert_list *insert_list, int unique, BTREE_OP_PURPOSE purpose, LOG_LSA *undo_nxlsa)
Definition: btree.c:33293
TDE_ALGORITHM
Definition: tde.h:71
#define ER_LC_UNKNOWN_CLASSNAME
Definition: error_code.h:121
int use_desc_index
Definition: access_spec.hpp:94
char * copy_buf
Definition: scan_manager.h:200
SCAN_ATTRS pred_attrs
Definition: scan_manager.h:210
int32_t fileid
Definition: dbtype_def.h:886
PERF_UTIME_TRACKER time_track
Definition: btree.c:843
void btree_mvcc_info_to_heap_mvcc_header(BTREE_MVCC_INFO *mvcc_info, MVCC_REC_HEADER *mvcc_header)
Definition: btree.c:28787
BTREE_ISCAN_OID_LIST oid_list
Definition: btree.h:351
bool pgbuf_check_page_ptype(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, PAGE_TYPE ptype)
int btree_rv_keyval_undo_insert_mvcc_delid(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17801
SCAN_CODE btree_index_next_scan(THREAD_ENTRY *thread_p, int cursor, DB_VALUE **out_values, int out_cnt, void *ptr)
Definition: btree.c:20669
#define ASSERT_ALIGN(ptr, alignment)
key_range * skipped_range
Definition: scan_manager.h:182
static void btree_record_remove_insid(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record, BTREE_NODE_TYPE node_type, int offset_to_object, char **rv_undo_data, char **rv_redo_data, int *displacement)
Definition: btree.c:32508
int file_get_num_user_pages(THREAD_ENTRY *thread_p, const VFID *vfid, int *n_user_pages_out)
static int btree_find_oid_from_ovfl(THREAD_ENTRY *thread_p, BTID_INT *btid_int, PAGE_PTR overflow_page, OID *oid, BTREE_OP_PURPOSE purpose, BTREE_MVCC_INFO *match_mvccinfo, int *offset_to_object, BTREE_MVCC_INFO *mvcc_info)
Definition: btree.c:11471
char * or_unpack_btid(char *buf, BTID *btid)
#define ER_BTREE_DUPLICATE_OID
Definition: error_code.h:485
static int btree_find_next_index_record_holding_current(THREAD_ENTRY *thread_p, BTREE_SCAN *bts, RECDES *peek_rec)
Definition: btree.c:15516
#define BTREE_OID_IS_MVCC_FLAG_SET(oid_ptr, mvcc_flag)
Definition: btree.c:142
static void btree_dump_root_header(THREAD_ENTRY *thread_p, FILE *fp, PAGE_PTR page_ptr)
Definition: btree.c:4565
VPID crt_vpid
Definition: btree.h:392
static int btree_get_stats_with_AR_sampling(THREAD_ENTRY *thread_p, BTREE_STATS_ENV *env)
Definition: btree.c:6675
int prm_get_integer_value(PARAM_ID prm_id)
char * db_private_strdup(THREAD_ENTRY *thrd, const char *s)
Definition: memory_alloc.c:675
#define ER_GENERIC_ERROR
Definition: error_code.h:49
char * btid_to_string(char *buf, int buf_size, BTID *btid)
#define STATIC_INLINE
int btree_rv_save_keyval_for_undo(BTID_INT *btid, DB_VALUE *key, OID *cls_oid, OID *oid, BTREE_MVCC_INFO *mvcc_info, BTREE_OP_PURPOSE purpose, char *preallocated_buffer, char **data, int *capacity, int *length)
Definition: btree.c:16504
LOG_LSA * pgbuf_get_lsa(PAGE_PTR pgptr)
Definition: page_buffer.c:4318
PGBUF_LATCH_MODE
Definition: page_buffer.h:176
#define ER_FK_INVALID
Definition: error_code.h:1153
unsigned int record_length
Definition: slotted_page.h:88
int xcallback_console_print(THREAD_ENTRY *thread_p, char *print_str)
int offset
Definition: btree.h:222
#define BTREE_MVCC_INFO_HAS_DELID(mvcc_info)
Definition: btree.c:163
int or_get_short(OR_BUF *buf, int *error)
#define BTREE_DELETE_CLASS_OID(helper)
Definition: btree.c:881
static void btree_append_oid(RECDES *rec, OID *oid)
Definition: btree.c:3613
static int btree_key_online_index_IB_insert(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:33648
#define VACUUM_IS_THREAD_VACUUM_WORKER
Definition: vacuum.h:216
LC_FIND_CLASSNAME xlocator_find_class_oid(THREAD_ENTRY *thread_p, const char *classname, OID *class_oid, LOCK lock)
Definition: locator_sr.c:1033
int partition_prune_unique_btid(PRUNING_CONTEXT *pcontext, DB_VALUE *key, OID *class_oid, HFID *class_hfid, BTID *btid)
Definition: partition.c:3553
bool m_use_page_boundary_check
Definition: btree.h:589
regu_variable_list_node * regu_list
int file_apply_tde_algorithm(THREAD_ENTRY *thread_p, const VFID *vfid, const TDE_ALGORITHM tde_algo)
#define ER_OUT_OF_VIRTUAL_MEMORY
Definition: error_code.h:50
#define BTREE_IS_PAGE_VALID_LEAF(thread_p, page)
Definition: btree.c:280
void log_append_undo_data(THREAD_ENTRY *thread_p, LOG_RCVINDEX rcvindex, LOG_DATA_ADDR *addr, int length, const void *data)
Definition: log_manager.c:1917
REGU_VARIABLE value
Definition: regu_var.hpp:222
#define BTREE_RV_REDO_SET_DEBUG_INFO(addr, rv_ptr, btid_int, id)
Definition: btree.c:1023
#define BTREE_SET_CREATED_OVERFLOW_PAGE_NOTIFICATION(THREAD, KEY, OID, C_OID, BTID)
Definition: btree_load.h:163
int rev_level
Definition: btree.h:132
#define BTREE_LEAF_RECORD_MASK
Definition: btree.c:112
static int btree_get_stats_midxkey(THREAD_ENTRY *thread_p, BTREE_STATS_ENV *env, DB_MIDXKEY *midxkey)
Definition: btree.c:6422
BTID_INT btid_int
Definition: btree.h:165
int btree_init_root_header(THREAD_ENTRY *thread_p, VFID *vfid, PAGE_PTR page_ptr, BTREE_ROOT_HEADER *root_header, TP_DOMAIN *key_type)
Definition: btree_load.c:540
int btree_get_unique_statistics(THREAD_ENTRY *thread_p, BTID *btid, int *oid_cnt, int *null_cnt, int *key_cnt)
Definition: btree.c:6199
static char * key_type_to_string(char *buf, int buf_size, TP_DOMAIN *key_type)
Definition: btree.c:20334
#define BTREE_MVCC_INFO_HAS_INSID(mvcc_info)
Definition: btree.c:161
PGBUF_LATCH_MODE pgbuf_get_latch_mode(PAGE_PTR pgptr)
Definition: page_buffer.c:4633
static DISK_ISVALID btree_check_pages(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR pg_ptr, VPID *pg_vpid)
Definition: btree.c:7621
static int btree_or_put_object(OR_BUF *buf, BTID_INT *btid_int, BTREE_NODE_TYPE node_type, BTREE_OBJECT_INFO *object_info)
Definition: btree.c:21646
static void btree_online_index_set_normal_state(MVCCID &state)
Definition: btree.c:33253
static void btree_online_index_set_delete_flag_state(MVCCID &state)
Definition: btree.c:33247
int btree_compare_btids(void *mem_btid1, void *mem_btid2)
Definition: btree.c:21777
int btree_get_next_overflow_vpid(THREAD_ENTRY *thread_p, PAGE_PTR page_ptr, VPID *vpid)
Definition: btree_load.c:677
TRAN_ISOLATION logtb_find_current_isolation(THREAD_ENTRY *thread_p)
PGBUF_LATCH_CONDITION
Definition: page_buffer.h:185
static bool btree_is_insert_object_purpose(BTREE_OP_PURPOSE purpose)
Definition: btree.c:34823
#define SINGLE_ROW_UPDATE
Definition: btree.h:54
static int btree_key_find_and_lock_unique_of_unique(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:22983
unsigned is_desc
DB_VALUE pkeys_val[BTREE_STATS_PKEYS_NUM]
Definition: btree.c:349
int m_ovf_appends_new_page
Definition: btree.h:595
#define OR_GET_OID(ptr, oid)
void btree_get_root_vpid_from_btid(THREAD_ENTRY *thread_p, BTID *btid, VPID *root_vpid)
Definition: btree.c:6912
RANGE_OPT_ITEM ** top_n_items
Definition: scan_manager.h:160
int or_put_int(OR_BUF *buf, int num)
BTREE_SEARCH
int intl_identifier_casecmp(const char *str1, const char *str2)
#define DB_VALUE_DOMAIN_TYPE(value)
Definition: dbtype.h:70
#define BTREE_STATS_PKEYS_NUM
Definition: statistics.h:41
bool m_use_sorted_bulk_insert
Definition: btree.h:591
#define pgbuf_promote_read_latch(thread_p, pgptr_p, condition)
Definition: page_buffer.h:270
unsigned int record_type
Definition: slotted_page.h:89
static int btree_range_scan_read_record(THREAD_ENTRY *thread_p, BTREE_SCAN *bts)
Definition: btree.c:24462
#define BTREE_RV_SET_OVERFLOW_NODE(addr)
Definition: btree.c:1018
bool is_ha_enabled
Definition: btree.c:731
static bool btree_check_locking_for_insert_unique(THREAD_ENTRY *thread_p, const BTREE_INSERT_HELPER *insert_helper)
Definition: btree.c:35394
int btree_rv_noderec_undo_insert(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17271
BTREE_NODE_TYPE
Definition: btree.h:81
STATIC_INLINE PAGE_PTR btree_fix_root_with_info(THREAD_ENTRY *thread_p, BTID *btid, PGBUF_LATCH_MODE latch_mode, VPID *root_vpid_p, BTREE_ROOT_HEADER **root_header_p, BTID_INT *btid_int_p) __attribute__((ALWAYS_INLINE))
Definition: btree.c:1796
struct function_node * funcp
Definition: regu_var.hpp:190
#define TP_IS_NUMERIC_TYPE(typeid)
bool btree_is_btid_online_index(THREAD_ENTRY *thread_p, OID *class_oid, BTID *btid)
Definition: btree.c:35329
void btree_rv_noderec_dump_slot_id(FILE *fp, int length, void *data)
Definition: btree.c:17334
BTREE_NODE_TYPE node_type
Definition: btree.h:221
void lock_unlock_object(THREAD_ENTRY *thread_p, const OID *oid, const OID *class_oid, LOCK lock, bool force)
void log_append_compensate_with_undo_nxlsa(THREAD_ENTRY *thread_p, LOG_RCVINDEX rcvindex, const VPID *vpid, PGLENGTH offset, PAGE_PTR pgptr, int length, const void *data, LOG_TDES *tdes, const LOG_LSA *undo_nxlsa)
Definition: log_manager.c:2990
BTREE_SEARCH xbtree_find_multi_uniques(THREAD_ENTRY *thread_p, OID *class_oid, int pruning_type, BTID *btids, DB_VALUE *values, int count, SCAN_OPERATION_TYPE op_type, OID **oids, int *oids_count)
Definition: btree.c:5848
int pr_midxkey_get_element_offset(const DB_MIDXKEY *midxkey, int index)
bool logtb_is_current_active(THREAD_ENTRY *thread_p)
bool LOG_RV_RECORD_IS_UPDATE_ALL(log_rv_record_flag_type flags)
Definition: log_append.hpp:186
MVCC_SNAPSHOT * snapshot
Definition: btree.c:449
#define VPID_EQ(vpid_ptr1, vpid_ptr2)
Definition: dbtype_def.h:915
int logtb_update_global_unique_stats_by_delta(THREAD_ENTRY *thread_p, BTID *btid, int oid_delta, int null_delta, int key_delta, bool log)
STATIC_INLINE void btree_insert_sysop_end(THREAD_ENTRY *thread_p, BTREE_INSERT_HELPER *helper) __attribute__((ALWAYS_INLINE))
Definition: btree.c:33021
FILTER_INFO * key_filter
Definition: btree.h:195
int or_overflow(OR_BUF *buf)
#define min(a, b)
bool LOG_RV_RECORD_IS_UPDATE_PARTIAL(log_rv_record_flag_type flags)
Definition: log_append.hpp:192
int n_oids_read_last_iteration
Definition: btree.h:232
int xbtree_class_test_unique(THREAD_ENTRY *thread_p, char *buf, int buf_size)
Definition: btree.c:6065
int or_align(OR_BUF *buf, int alignment)
int or_put_short(OR_BUF *buf, int num)
static BTREE_SEARCH btree_key_find_first_visible_row(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *rec, int offset, BTREE_NODE_TYPE node_type, OID *oid, OID *class_oid, int max_oids)
Definition: btree.c:21003
#define OR_OID_VOLID
static int btree_load_overflow_key(THREAD_ENTRY *thread_p, BTID_INT *btid, VPID *firstpg_vpid, DB_VALUE *key, BTREE_NODE_TYPE node_type)
Definition: btree.c:2109
#define BTREE_OID_MVCC_FLAGS_MASK
Definition: btree.c:119
MVCCID creator_mvccid
Definition: btree_load.h:216
int qexec_insert_tuple_into_list(THREAD_ENTRY *thread_p, qfile_list_id *list_id, valptr_list_node *outptr_list, val_descr *vd, qfile_tuple_record *tplrec)
short volid
Definition: dbtype_def.h:880
LOG_LSA last_log_lsa
Definition: log_impl.h:623
static int btree_fix_ovfl_oid_pages_by_btid(THREAD_ENTRY *thread_p, BTID *btid)
Definition: btree.c:19242
static int btree_dump_curr_key(THREAD_ENTRY *thread_p, BTREE_SCAN *bts, FILTER_INFO *filter, OID *oid, INDX_SCAN_ID *iscan_id)
Definition: btree.c:16042
#define BTREE_NEW_ENTRY_MAX_SIZE(key_disk_size, node_type)
Definition: btree_load.h:88
#define OID_EQ(oidp1, oidp2)
Definition: oid.h:92
bool m_is_inf_right_key
Definition: btree.h:549
#define BTREE_MVCC_INFO_INITIALIZER
Definition: btree.h:520
#define ER_EMERGENCY_ERROR
Definition: error_code.h:632
static int btree_split_find_pivot(int total, BTREE_NODE_SPLIT_INFO *split_info)
Definition: btree.c:12271
#define heap_classrepr_free_and_init(class_repr, idxp)
Definition: heap_file.h:91
TP_DOMAIN_STATUS tp_value_cast(const DB_VALUE *src, DB_VALUE *dest, const TP_DOMAIN *desired_domain, bool implicit_coercion)
BTS_KEY_STATUS key_status
Definition: btree.h:224
int BTREE_ROOT_WITH_KEY_FUNCTION(THREAD_ENTRY *thread_p, BTID *btid, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *root_page, bool *is_leaf, BTREE_SEARCH_KEY_HELPER *search_key, bool *stop, bool *restart, void *other_args)
Definition: btree.c:524
int pr_midxkey_add_prefix(DB_VALUE *result, DB_VALUE *prefix, DB_VALUE *postfix, int n_prefix)
#define TP_DOMAIN_TYPE(dom)
static int rv
Definition: area_alloc.c:52
std::int64_t pageid
Definition: log_lsa.hpp:36
#define STATS_SAMPLING_LEAFS_MAX
Definition: statistics.h:38
#define BTS_IS_INDEX_COVERED(bts)
Definition: btree.c:598
int length
Definition: recovery.h:202
int btree_online_index_dispatcher(THREAD_ENTRY *thread_p, BTID *btid, DB_VALUE *key, OID *cls_oid, OID *oid, int unique, BTREE_OP_PURPOSE purpose, LOG_LSA *undo_nxlsa)
Definition: btree.c:33272
static void btree_set_unknown_key_error(THREAD_ENTRY *thread_p, BTID *btid, DB_VALUE *key, const char *debug_msg)
Definition: btree.c:18487
static void btree_record_remove_last_object(THREAD_ENTRY *thread_p, BTID_INT *btid, RECDES *recp, BTREE_NODE_TYPE node_type, int last_oid_mvcc_offset, char **rv_undo_data_ptr, char **rv_redo_data_ptr)
Definition: btree.c:3317
SCAN_CODE btree_get_next_node_info(THREAD_ENTRY *thread_p, BTID *btid, BTREE_NODE_SCAN *btns, DB_VALUE **node_info)
Definition: btree.c:20162
static void cleanup(int signo)
Definition: broker.c:717
int btree_rv_noderec_undoredo_update(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17197
int btree_init_overflow_header(THREAD_ENTRY *thread_p, PAGE_PTR page_ptr, BTREE_OVERFLOW_HEADER *ovf_header)
Definition: btree_load.c:579
static int btree_key_insert_delete_mvccid(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, BTREE_INSERT_HELPER *insert_helper, RECDES *leaf_record, PAGE_PTR object_page, int offset_to_found_object)
Definition: btree.c:28647
static bool btree_leaf_is_flaged(RECDES *recp, short record_flag)
Definition: btree.c:3408
#define BTREE_MVCC_INFO_CLEAR_DELID(mvcc_info)
Definition: btree.c:169
char * rv_redo_data_ptr
Definition: btree.c:747
static int btree_key_remove_delete_mvccid(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:31909
#define LOG_ISTRAN_ABORTED(tdes)
Definition: log_impl.h:192
const log_rv_record_flag_type LOG_RV_RECORD_UPDATE_PARTIAL
Definition: log_append.hpp:137
VAL_DESCR * val_descr
Definition: scan_manager.h:130
STATIC_INLINE int btree_count_oids(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record, char *object_ptr, OID *oid, OID *class_oid, MVCC_REC_HEADER *mvcc_header, bool *stop, void *args) __attribute__((ALWAYS_INLINE))
Definition: btree.c:24158
#define NULL
Definition: freelistheap.h:34
#define PGBUF_PAGE_MODIFY_MSG(name)
Definition: page_buffer.h:59
#define LEAF_ENTRY_MAX_SIZE(n)
Definition: btree_load.h:69
DB_BIGINT * key_limit_lower
Definition: btree.h:238
int file_alloc(THREAD_ENTRY *thread_p, const VFID *vfid, FILE_INIT_PAGE_FUNC f_init, void *f_init_args, VPID *vpid_out, PAGE_PTR *page_out)
#define BTS_IS_INDEX_ISS(bts)
Definition: btree.c:604
PAGE_PTR P_page
Definition: btree.h:179
#define ER_PAGE_LATCH_PROMOTE_FAIL
Definition: error_code.h:1512
char * rv_keyval_data
Definition: btree.c:835
static BTREE_MERGE_STATUS btree_node_mergeable(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR L, PAGE_PTR R)
Definition: btree.c:10612
static void bts_reset_scan(THREAD_ENTRY *thread_p, BTREE_SCAN *bts)
Definition: btree.c:671
void btree_dump_key(FILE *fp, const DB_VALUE *key)
Definition: btree.c:4615
#define BTREE_DELETE_HELPER_AS_ARGS(helper)
Definition: btree.c:1194
int tp_valid_indextype(DB_TYPE type)
UINT64 MVCCID
void db_value_print(const DB_VALUE *value)
Definition: db_macro.c:1663
static void btree_leaf_clear_flag(RECDES *recp, short record_flag)
Definition: btree.c:3474
void btree_set_mvcc_flags_into_oid(MVCC_REC_HEADER *p_mvcc_header, OID *oid)
Definition: btree.c:21739
#define MVCC_IS_FLAG_SET(rec_header_p, flags)
Definition: mvcc.h:84
stat_type get_null_count() const
struct pr_type * type
Definition: object_domain.h:76
int btree_index_capacity(THREAD_ENTRY *thread_p, BTID *btid, BTREE_CAPACITY *cpc)
Definition: btree.c:8673
#define BTREE_RV_UNDOREDO_SET_DEBUG_INFO(addr, rv_redo_ptr, rv_undo_ptr, btid_int, id)
Definition: btree.c:1057
btree_rv_debug_id
Definition: btree.c:1120
DB_CHAR ch
Definition: dbtype_def.h:1070
#define OR_GET_VPID(ptr, vpid)
SCAN_ATTRS * scan_attrs
PERF_PAGE_TYPE btree_get_perf_btree_page_type(THREAD_ENTRY *thread_p, PAGE_PTR page_ptr)
Definition: btree.c:33161
const char * pr_type_name(DB_TYPE id)
HEAP_SCANCACHE scan_cache
Definition: scan_manager.h:206
#define BTREE_CURRENT_REV_LEVEL
Definition: btree_load.h:50
bool is_unique_multi_update
Definition: btree.c:729
Definition: btree.h:536
DISK_ISVALID btree_verify_tree(THREAD_ENTRY *thread_p, const OID *class_oid_p, BTID_INT *btid_int, const char *btname)
Definition: btree.c:7568
if(extra_options)
Definition: dynamic_load.c:958
static bool btree_node_is_compressed(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr)
Definition: btree.c:12322
PGBUF_PROMOTE_CONDITION
Definition: page_buffer.h:191
int btree_rv_update_tran_stats(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:16922
PAGE_PTR C_page
Definition: btree.h:181
bool log_is_in_crash_recovery(void)
Definition: log_manager.c:476
const VFID * vfid
Definition: log_append.hpp:56
static int btree_undo_insert_object_unique_multiupd(THREAD_ENTRY *thread_p, BTID *btid, OR_BUF *buffered_key, BTREE_OBJECT_INFO *inserted_object, BTREE_OBJECT_INFO *second_object, MVCCID insert_mvccid, LOG_LSA *undo_nxlsa)
Definition: btree.c:29532
bool pgbuf_is_io_stressful(void)
void log_append_run_postpone(THREAD_ENTRY *thread_p, LOG_RCVINDEX rcvindex, LOG_DATA_ADDR *addr, const VPID *rcv_vpid, int length, const void *data, const LOG_LSA *ref_lsa)
Definition: log_manager.c:2860
int xbtree_delete_index(THREAD_ENTRY *thread_p, BTID *btid)
Definition: btree.c:5691
VFID vfid
#define ER_BTREE_REPAIR_PREV_LINK
Definition: error_code.h:1451
#define PGBUF_PAGE_MODIFY_ARGS(pg, prev_lsa)
Definition: page_buffer.h:60
LEAF_RECORD_TYPE
Definition: btree.c:294
MVCCID insert_mvccid
Definition: btree.h:517
short flags
Definition: btree.h:516
int xbtree_get_key_type(THREAD_ENTRY *thread_p, BTID btid, TP_DOMAIN **key_type)
Definition: btree.c:7167
#define ER_OBJ_INDEX_NOT_FOUND
Definition: error_code.h:877
static bool btree_is_delete_data_purpose(BTREE_OP_PURPOSE purpose)
Definition: btree.c:34853
#define LOFFS4
Definition: btree.c:340
static int success()
#define BTS_IS_HARD_CAPACITY_ENOUGH(bts, count)
Definition: btree.c:639
bool LSA_ISNULL(const log_lsa *lsa_ptr)
Definition: log_lsa.hpp:153
static MVCCID btree_get_creator_mvccid(THREAD_ENTRY *thread_p, PAGE_PTR root_page)
Definition: btree.c:32827
static int btree_key_remove_delete_mvccid_non_unique(THREAD_ENTRY *thread_p, BTID_INT *btid_int, BTREE_DELETE_HELPER *delete_helper, PAGE_PTR page, RECDES *record, PGSLOTID slotid, BTREE_NODE_TYPE node_type, int offset_to_object)
Definition: btree.c:32347
#define SINGLE_ROW_DELETE
Definition: btree.h:53
#define SERVER_MODE
SHA1Hash printed_key_sha1
Definition: btree.c:737
#define BTID_SET_NULL(btid)
VPID pnt
Definition: btree.h:105
#define BTREE_OVERFLOW_KEY
Definition: btree.h:94
LC_FIND_CLASSNAME
PAGE_PTR pgptr
Definition: log_append.hpp:57
#define err(fd,...)
Definition: porting.h:431
int btree_get_class_oid_of_unique_btid(THREAD_ENTRY *thread_p, BTID *btid, OID *class_oid)
Definition: btree.c:35304
BTREE_STATS * stat_info
Definition: btree.c:347
int btree_rv_nop(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:18017
#define db_private_free_and_init(thrd, ptr)
Definition: memory_alloc.h:141
int height
Definition: btree.h:363
static short btree_leaf_get_flag(RECDES *recp)
Definition: btree.c:3376
#define ER_UNIQUE_VIOLATION_WITHKEY
Definition: error_code.h:1103
int num_index_term
Definition: btree.h:143
int rv_keyval_data_length
Definition: btree.c:745
void log_append_undoredo_data(THREAD_ENTRY *thread_p, LOG_RCVINDEX rcvindex, LOG_DATA_ADDR *addr, int undo_length, int redo_length, const void *undo_data, const void *redo_data)
Definition: log_manager.c:1837
static int index_attrs_to_string(char *buf, int buf_size, OR_INDEX *index_p, RECDES *recdes)
Definition: btree.c:20431
static int btree_start_overflow_page(THREAD_ENTRY *thread_p, BTID_INT *btid_int, BTREE_OBJECT_INFO *object_info, VPID *first_overflow_vpid, VPID *near_vpid, VPID *new_vpid, PAGE_PTR *new_page_ptr)
Definition: btree.c:3949
#define pgbuf_fix(thread_p, vpid, fetch_mode, requestmode, condition)
Definition: page_buffer.h:255
static void btree_dump_page_with_subtree(THREAD_ENTRY *thread_p, FILE *fp, BTID_INT *btid, PAGE_PTR pg_ptr, VPID *pg_vpid, int depth, int level)
Definition: btree.c:8948
BTREE_SEARCH result
Definition: btree.c:423
char * printed_key
Definition: btree.c:736
#define ISCAN_OID_BUFFER_CAPACITY
static int btree_key_append_object_into_ovf(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR leaf, BTREE_SEARCH_KEY_HELPER *search_key, RECDES *leaf_record, LEAF_REC *leaf_record_info, BTREE_INSERT_HELPER *insert_helper, BTREE_OBJECT_INFO *append_object)
Definition: btree.c:28401
#define MVCC_ID_PRECEDES(id1, id2)
Definition: mvcc.h:137
void partition_init_pruning_context(PRUNING_CONTEXT *pinfo)
Definition: partition.c:2164
DISK_ISVALID disk_is_page_sector_reserved(THREAD_ENTRY *thread_p, VOLID volid, PAGEID pageid)
char * or_unpack_int(char *ptr, int *number)
int logtb_tran_update_unique_stats(THREAD_ENTRY *thread_p, const BTID *btid, int n_keys, int n_oids, int n_nulls, bool write_to_log)
#define db_private_free(thrd, ptr)
Definition: memory_alloc.h:229
int btree_prepare_bts(THREAD_ENTRY *thread_p, BTREE_SCAN *bts, BTID *btid, INDX_SCAN_ID *index_scan_id_p, key_val_range *kv_range, FILTER_INFO *filter, const OID *match_class_oid, DB_BIGINT *key_limit_upper, DB_BIGINT *key_limit_lower, bool need_to_check_null, void *bts_other)
Definition: btree.c:15049
void or_init(OR_BUF *buf, char *data, int length)
SCAN_OPERATION_TYPE
int btree_rv_keyval_undo_insert_unique(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17750
LOG_UNIQUE_STATS tran_stats
Definition: log_impl.h:376
static int btree_key_find_unique_version_oid(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:22848
int BTREE_ADVANCE_WITH_KEY_FUNCTION(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *crt_page, PAGE_PTR *advance_to_page, bool *is_leaf, BTREE_SEARCH_KEY_HELPER *search_key, bool *stop, bool *restart, void *other_args)
Definition: btree.c:551
int btree_physical_delete(THREAD_ENTRY *thread_p, BTID *btid, DB_VALUE *key, OID *oid, OID *class_oid, int *unique, int op_type, btree_unique_stats *unique_stat_info)
Definition: btree.c:29340
stat_type get_row_count() const
static int btree_search_nonleaf_page(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr, DB_VALUE *key, INT16 *slot_id, VPID *child_vpid, page_key_boundary *page_bounds)
Definition: btree.c:4995
#define MVCC_REC_HEADER_INITIALIZER
Definition: mvcc.h:47
int btree_get_unique_statistics_for_count(THREAD_ENTRY *thread_p, BTID *btid, int *oid_cnt, int *null_cnt, int *key_cnt)
Definition: btree.c:6171
#define db_private_alloc(thrd, size)
Definition: memory_alloc.h:227
BTREE_NODE_SCAN_QUEUE_ITEM * next
Definition: btree.h:384
static int btree_get_node_level(THREAD_ENTRY *thread_p, PAGE_PTR page_ptr)
Definition: btree.c:1896
OR_PARTITION * partitions
Definition: partition_sr.h:76
const OID oid_Null_oid
Definition: oid.c:68
static int btree_store_overflow_key(THREAD_ENTRY *thread_p, BTID_INT *btid, DB_VALUE *key, int size, BTREE_NODE_TYPE node_type, VPID *firstpg_vpid)
Definition: btree.c:1999
static void btree_rv_log_delete_object(THREAD_ENTRY *thread_p, const BTREE_DELETE_HELPER &delete_helper, LOG_DATA_ADDR &addr, int undo_length, int redo_length, const char *undo_data, const char *redo_data)
Definition: btree.c:34904
static DISK_ISVALID btree_repair_prev_link_by_class_oid(THREAD_ENTRY *thread_p, OID *oid, BTID *idx_btid, bool repair)
Definition: btree.c:8149
#define NULL_OFFSET
bool end_scan
Definition: btree.h:226
int pr_midxkey_get_element_nocopy(const DB_MIDXKEY *midxkey, int index, DB_VALUE *value, int *prev_indexp, char **prev_ptrp)
INDX_INFO * indx_info
Definition: scan_manager.h:188
need_clear_type need_clear
Definition: dbtype_def.h:1084
#define OR_PUT_SHORT(ptr, val)
bool logtb_set_check_interrupt(THREAD_ENTRY *thread_p, bool flag)
#define CEIL_PTVDIV(dividend, divisor)
Definition: memory_alloc.h:50
int btree_rv_pagerec_delete(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17410
void btree_scan_clear_key(BTREE_SCAN *btree_scan)
Definition: btree.c:6035
static int btree_leaf_remove_object(THREAD_ENTRY *thread_p, DB_VALUE *key, BTID_INT *btid_int, BTREE_DELETE_HELPER *delete_helper, PAGE_PTR leaf_page, RECDES *leaf_record, LEAF_REC *leaf_rec_info, int offset_after_key, BTREE_SEARCH_KEY_HELPER *search_key, int offset_to_object)
Definition: btree.c:31608
OR_CLASSREP * heap_classrepr_get(THREAD_ENTRY *thread_p, const OID *class_oid, RECDES *class_recdes, REPR_ID reprid, int *idx_incache)
Definition: heap_file.c:2299
static int btree_iss_set_key(BTREE_SCAN *bts, INDEX_SKIP_SCAN *iss)
Definition: btree.c:19183
TP_DOMAIN ** sort_col_dom
Definition: scan_manager.h:159
int count(int &result, const cub_regex_object &reg, const std::string &src, const int position, const INTL_CODESET codeset)
static void btree_read_fixed_portion_of_non_leaf_record(RECDES *rec, NON_LEAF_REC *nlf_rec)
Definition: btree.c:3538
int pr_clear_value(DB_VALUE *value)
int btree_rv_noderec_redo_insert(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17234
static int btree_key_find_and_lock_unique(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:22955
void pgbuf_get_vpid(PAGE_PTR pgptr, VPID *vpid)
Definition: page_buffer.c:4579
bool m_is_inf_left_key
Definition: btree.h:548
offset_type offset
Definition: log_append.hpp:58
#define ER_CANNOT_GET_LOCK
Definition: error_code.h:1255
static int btree_replace_first_oid_with_ovfl_oid(THREAD_ENTRY *thread_p, BTID_INT *btid, DB_VALUE *key, BTREE_DELETE_HELPER *delete_helper, PAGE_PTR leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, RECDES *leaf_rec, VPID *ovfl_vpid)
Definition: btree.c:9250
#define BTS_SAVE_OID_IN_BUFFER(bts, oid)
Definition: btree.c:649
DB_VALUE m_key
Definition: btree.h:538
const LOG_LSA * pgbuf_set_lsa(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, const LOG_LSA *lsa_ptr)
Definition: page_buffer.c:4364
int btree_rv_redo_global_unique_stats_commit(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:22425
#define VFID_COPY(vfid_ptr1, vfid_ptr2)
Definition: file_manager.h:69
#define btree_delete_log(helper, msg,...)
Definition: btree.c:1151
void log_sysop_abort(THREAD_ENTRY *thread_p)
Definition: log_manager.c:4017
#define NULL_REPRID
#define max(a, b)
#define MVCC_SET_FLAG_BITS(rec_header_p, flag)
Definition: mvcc.h:95
static bool btree_is_insert_delid_purpose(BTREE_OP_PURPOSE purpose)
Definition: btree.c:34840
static bool btree_online_index_is_insert_flag_state(MVCCID state)
Definition: btree.c:33223
int64_t DB_BIGINT
Definition: dbtype_def.h:751
LOG_LSA reference_lsa
Definition: recovery.h:204
#define BTREE_SPLIT_LOWER_BOUND
Definition: btree.c:70
DB_VALUE cur_key
Definition: btree.h:191
static int btree_merge_node_and_advance(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *crt_page, PAGE_PTR *advance_to_page, bool *is_leaf, BTREE_SEARCH_KEY_HELPER *search_key, bool *stop, bool *restart, void *other_args)
Definition: btree.c:29986
#define BTREE_INSERT_OID(ins_helper)
Definition: btree.c:801
int btree_get_num_visible_from_leaf_and_ovf(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *leaf_record, int offset_after_key, LEAF_REC *leaf_info, int *max_visible_oids, MVCC_SNAPSHOT *mvcc_snapshot, int *num_visible)
Definition: btree.c:2566
MVCCID logtb_get_current_mvccid(THREAD_ENTRY *thread_p)
#define CAST_BUFLEN
Definition: porting.h:471
int ncolumns
Definition: dbtype_def.h:864
static int btree_delete_overflow_key(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr, INT16 slot_id, BTREE_NODE_TYPE node_type)
Definition: btree.c:2180
PGNSLOTS spage_number_of_slots(PAGE_PTR page_p)
Definition: slotted_page.c:879
DB_VALUE index_value
Definition: scan_manager.h:144
#define BTREE_INSERT_HELPER_MSG(tabs)
Definition: btree.c:1174
regu_variable_list_node * rest_regu_list
Definition: scan_manager.h:213
#define ER_BTREE_UNIQUE_FAILED
Definition: error_code.h:811
int btree_rv_util_save_page_records(THREAD_ENTRY *thread_p, PAGE_PTR page_ptr, INT16 first_slotid, int rec_cnt, INT16 ins_slotid, char *data, int *length)
Definition: btree.c:16433
const MVCCID BTREE_ONLINE_INDEX_MVCCID_MASK
Definition: btree.c:1241
int fetch_val_list(THREAD_ENTRY *thread_p, regu_variable_list_node *regu_list, val_descr *vd, OID *class_oid, OID *obj_oid, QFILE_TUPLE tpl, int peek)
Definition: fetch.c:4526
#define BTS_INCREMENT_READ_OIDS(bts)
Definition: btree.c:614
static void error(const char *msg)
Definition: gencat.c:331
void log_append_postpone(THREAD_ENTRY *thread_p, LOG_RCVINDEX rcvindex, LOG_DATA_ADDR *addr, int length, const void *data)
Definition: log_manager.c:2698
#define MULTI_ROW_UPDATE
Definition: btree.h:58
int btree_get_disk_size_of_key(DB_VALUE *key)
Definition: btree.c:4041
static int btree_get_subtree_capacity(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR pg_ptr, BTREE_CAPACITY *cpc)
Definition: btree.c:8468
#define VPID_ISNULL(vpid_ptr)
Definition: dbtype_def.h:925
void btree_rv_read_keybuf_two_objects(THREAD_ENTRY *thread_p, char *datap, int data_size, BTID_INT *btid_int, BTREE_OBJECT_INFO *first_version, BTREE_OBJECT_INFO *second_version, OR_BUF *key_buf)
Definition: btree.c:17631
int btree_get_prefix_separator(const DB_VALUE *key1, const DB_VALUE *key2, DB_VALUE *prefix_key, TP_DOMAIN *key_domain)
Definition: btree.c:11763
char * rv_redo_data
Definition: btree.c:746
static int btree_key_remove_delete_mvccid_unique(THREAD_ENTRY *thread_p, BTID_INT *btid_int, BTREE_DELETE_HELPER *delete_helper, BTREE_SEARCH_KEY_HELPER *search_key, PAGE_PTR leaf_page, RECDES *leaf_record, PAGE_PTR overflow_page, RECDES *overflow_record, BTREE_NODE_TYPE node_type, int offset_to_object)
Definition: btree.c:32077
const char * data
Definition: recovery.h:203
enum btree_op_purpose BTREE_OP_PURPOSE
Definition: btree.h:506
VPID P_vpid
Definition: btree.h:171
static int rc
Definition: serial.c:50
static void btree_record_add_delid(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record, BTREE_NODE_TYPE node_type, int offset_to_object, MVCCID delete_mvccid, char **rv_undo_data, char **rv_redo_data)
Definition: btree.c:32634
char * or_pack_int(char *ptr, int number)
#define FI_TEST(th, code, state)
STATIC_INLINE void perfmon_inc_stat(THREAD_ENTRY *thread_p, PERF_STAT_ID psid) __attribute__((ALWAYS_INLINE))
bool is_system_op_started
Definition: btree.c:749
#define ER_INTERRUPTED
Definition: error_code.h:51
static void btree_write_default_split_info(BTREE_NODE_SPLIT_INFO *info)
Definition: btree.c:9716
static void btree_dump_non_leaf_record(THREAD_ENTRY *thread_p, FILE *fp, BTID_INT *btid, RECDES *rec, int n, int print_key)
Definition: btree.c:4891
void file_postpone_destroy(THREAD_ENTRY *thread_p, const VFID *vfid)
SCAN_PRED scan_pred
Definition: scan_manager.h:209
static int btree_search_key_and_apply_functions(THREAD_ENTRY *thread_p, BTID *btid, BTID_INT *btid_int, DB_VALUE *key, BTREE_ROOT_WITH_KEY_FUNCTION *root_fnct, void *root_args, BTREE_ADVANCE_WITH_KEY_FUNCTION *advance_fnct, void *advance_args, BTREE_PROCESS_KEY_FUNCTION *leaf_fnct, void *process_key_args, BTREE_SEARCH_KEY_HELPER *search_key, PAGE_PTR *leaf_page_ptr)
Definition: btree.c:22497
static int btree_record_satisfies_snapshot(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record, char *object_ptr, OID *oid, OID *class_oid, BTREE_MVCC_INFO *mvcc_info, bool *stop, void *args)
Definition: btree.c:23912
void btree_rv_read_keybuf_nocopy(THREAD_ENTRY *thread_p, char *datap, int data_size, BTID_INT *btid, OID *cls_oid, OID *oid, BTREE_MVCC_INFO *mvcc_info, OR_BUF *key_buf)
Definition: btree.c:17567
LOG_TDES * LOG_FIND_CURRENT_TDES(THREAD_ENTRY *thread_p=NULL)
Definition: log_impl.h:1115
PGSLOTID spage_delete(THREAD_ENTRY *thread_p, PAGE_PTR page_p, PGSLOTID slot_id)
bool log_check_system_op_is_started(THREAD_ENTRY *thread_p)
Definition: log_manager.c:4166
int or_seek(OR_BUF *buf, int psn)
int db_make_midxkey(DB_VALUE *value, DB_MIDXKEY *midxkey)
int btree_rv_keyval_undo_online_index_tran_delete(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:35100
#define pgbuf_fix_if_not_deallocated(thread_p, vpid, latch_mode, latch_condition, page)
Definition: page_buffer.h:441
static int btree_get_root_with_key(THREAD_ENTRY *thread_p, BTID *btid, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *root_page, bool *is_leaf, BTREE_SEARCH_KEY_HELPER *search_key, bool *stop, bool *restart, void *other_args)
Definition: btree.c:22701
#define LOFFS1
Definition: btree.c:337
int spage_get_space_for_record(THREAD_ENTRY *thread_p, PAGE_PTR page_p, PGSLOTID slot_id)
#define btree_log_if_enabled(...)
Definition: btree.c:1144
bool db_value_is_null(const DB_VALUE *value)
int avg_val_per_key
Definition: btree.h:359
void range_reverse(RANGE &range)
BTREE_SCAN btree_scan
Definition: btree.c:346
BTREE_OBJECT_INFO second_object_info
Definition: btree.c:816
static void btree_leaf_set_flag(RECDES *recp, short record_flag)
Definition: btree.c:3437
#define ARG_FILE_LINE
Definition: error_manager.h:44
#define OR_PUT_MVCCID
int btree_online_index_check_unique_constraint(THREAD_ENTRY *thread_p, BTID *btid, const char *index_name, OID *class_oid)
Definition: btree.c:35273
static int btree_modify_overflow_link(THREAD_ENTRY *thread_p, BTID_INT *btid_int, BTREE_DELETE_HELPER *delete_helper, PAGE_PTR ovfl_page, VPID *next_ovfl_vpid)
Definition: btree.c:9523
int btree_vacuum_object(THREAD_ENTRY *thread_p, BTID *btid, OR_BUF *buffered_key, OID *oid, OID *class_oid, MVCCID delete_mvccid)
Definition: btree.c:29429
#define BTREE_OID_HAS_MVCC_INSID_AND_DELID
Definition: btree.c:121
static int btree_seq_find_oid_from_ovfl(THREAD_ENTRY *thread_p, BTID_INT *btid_int, OID *oid, RECDES *ovf_record, char *initial_oid_ptr, char *oid_ptr_lower_bound, char *oid_ptr_upper_bound, BTREE_OP_PURPOSE purpose, BTREE_MVCC_INFO *match_mvccinfo, int *offset_to_object, BTREE_MVCC_INFO *mvcc_info)
Definition: btree.c:11644
BTREE_KEYRANGE key_range
Definition: btree.h:194
int pr_clone_value(const DB_VALUE *src, DB_VALUE *dest)
#define BTREE_IS_PRIMARY_KEY(unique_pk)
Definition: btree.h:88
static const bool COPY
#define BTREE_SEARCH_KEY_HELPER_INITIALIZER
Definition: btree.c:430
STATIC_INLINE void btree_set_mvccid(RECDES *rec, int mvccid_offset, MVCCID *p_mvccid, char **rv_undo_data_ptr, char **rv_redo_data_ptr) __attribute__((ALWAYS_INLINE))
Definition: btree.c:3691
static int btree_fix_ovfl_oid_page(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR pg_ptr, char *btname)
Definition: btree.c:19379
static DISK_ISVALID btree_verify_subtree(THREAD_ENTRY *thread_p, const OID *class_oid_p, BTID_INT *btid, const char *btname, PAGE_PTR pg_ptr, VPID *pg_vpid, BTREE_NODE_INFO *INFO)
Definition: btree.c:7418
OID * m_curr_oid
Definition: btree.h:581
static void btree_record_remove_delid(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record, BTREE_NODE_TYPE node_type, int offset_to_object, char **rv_undo_data, char **rv_redo_data)
Definition: btree.c:32568
regu_variable_node * key2
Definition: access_spec.hpp:68
INT16 PGLENGTH
static DISK_ISVALID btree_find_key_from_page(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR pg_ptr, OID *oid, DB_VALUE *key, bool *clear_key)
Definition: btree.c:18234
bool first_call
Definition: btree.h:394
static void btree_record_replace_object(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record, BTREE_NODE_TYPE node_type, int *offset_to_replaced, BTREE_OBJECT_INFO *replacement, char **rv_undo_data, char **rv_redo_data)
Definition: btree.c:32697
int btree_rv_remove_unique_stats(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:29276
static bool btree_check_locking_for_delete_unique(THREAD_ENTRY *thread_p, const BTREE_DELETE_HELPER *delete_helper)
Definition: btree.c:35424
int btree_vacuum_insert_mvccid(THREAD_ENTRY *thread_p, BTID *btid, OR_BUF *buffered_key, OID *oid, OID *class_oid, MVCCID insert_mvccid)
Definition: btree.c:29397
#define BTREE_RV_SET_UPDATE_MAX_KEY_LEN(addr)
Definition: btree.c:1088
int btree_rv_undoredo_copy_page(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17996
char * or_unpack_mvccid(char *ptr, MVCCID *mvccid)
#define NEXT_MERGE_RECORD()
int btree_node_header_undo_log(THREAD_ENTRY *thread_p, VFID *vfid, PAGE_PTR page_ptr)
Definition: btree_load.c:414
#define VACUUM_LOG_ADD_DROPPED_FILE_POSTPONE
Definition: vacuum.h:78
int pr_is_string_type(DB_TYPE type)
#define BTREE_IS_MULTI_ROW_OP(op)
Definition: btree.h:60
unsigned int offset_to_record
Definition: slotted_page.h:87
bool oid_is_db_class(const OID *oid)
Definition: oid.c:219
MVCC_SNAPSHOT * mvcc_snapshot
Definition: heap_file.h:154
#define OR_GET_INT(ptr)
static void btree_dump_page(THREAD_ENTRY *thread_p, FILE *fp, const OID *class_oid_p, BTID_INT *btid, const char *btname, PAGE_PTR page_ptr, VPID *pg_vpid, int depth, int level)
Definition: btree.c:8842
#define BTS_IS_INDEX_ILS(bts)
Definition: btree.c:607
void btree_rv_noderec_dump(FILE *fp, int length, void *data)
Definition: btree.c:17296
static DISK_ISVALID btree_check_page_key(THREAD_ENTRY *thread_p, const OID *class_oid_p, BTID_INT *btid, const char *btname, PAGE_PTR page_ptr, VPID *page_vpid)
Definition: btree.c:7212
INT16 PGSLOTID
#define CAN_MERGE_WHEN_EMPTY
Definition: btree.c:92
OID * oid_ptr
Definition: btree.h:234
INT16 first_slotid
Definition: btree.c:291
DB_VALUE m_left_key
Definition: btree.h:545
int btree_initialize_new_page(THREAD_ENTRY *thread_p, PAGE_PTR page, void *args)
Definition: btree.c:4971
static int btree_verify_nonleaf_node(THREAD_ENTRY *thread_p, BTID_INT *btid_int, PAGE_PTR page_ptr)
Definition: btree.c:19552
int btree_find_foreign_key(THREAD_ENTRY *thread_p, BTID *btid, DB_VALUE *key, OID *class_oid, OID *found_oid)
Definition: btree.c:5972
#define BTREE_MAX_ALIGN
Definition: btree_load.h:66
static int btree_modify_leaf_ovfl_vpid(THREAD_ENTRY *thread_p, BTID_INT *btid_int, BTREE_DELETE_HELPER *delete_helper, PAGE_PTR leaf_page, RECDES *leaf_record, BTREE_SEARCH_KEY_HELPER *search_key, VPID *next_ovfl_vpid)
Definition: btree.c:9447
int btree_rv_nodehdr_undo_insert(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17175
int btree_range_scan(THREAD_ENTRY *thread_p, BTREE_SCAN *bts, BTREE_RANGE_SCAN_PROCESS_KEY_FUNC *key_func)
Definition: btree.c:24922
#define BTREE_MVCC_INFO_INSID(mvcc_info)
Definition: btree.c:181
static int btree_overflow_remove_object(THREAD_ENTRY *thread_p, DB_VALUE *key, BTID_INT *btid_int, BTREE_DELETE_HELPER *delete_helper, PAGE_PTR *overflow_page, PAGE_PTR prev_page, PAGE_PTR leaf_page, RECDES *leaf_record, BTREE_SEARCH_KEY_HELPER *search_key, int offset_to_object)
Definition: btree.c:31435
bool vacuum_is_mvccid_vacuumed(MVCCID id)
Definition: vacuum.c:7361
void db_fprint_value(FILE *fp, const db_value *value)
void btree_leaf_change_first_object(THREAD_ENTRY *thread_p, RECDES *recp, BTID_INT *btid, OID *oidp, OID *class_oidp, BTREE_MVCC_INFO *mvcc_info, int *key_offset, char **rv_undo_data_ptr, char **rv_redo_data_ptr)
Definition: btree.c:2798
#define free_and_init(ptr)
Definition: memory_alloc.h:147
#define LOG_ISRESTARTED()
Definition: log_impl.h:232
int avg_pg_key_cnt
Definition: btree.h:371
#define DB_ALIGN(offset, align)
Definition: memory_alloc.h:84
#define BTREE_RV_IS_UNDO_MVCCDEL_MYOBJ(flags)
Definition: btree.c:1098
#define strlen(s1)
Definition: intl_support.c:43
#define BTID_COPY(btid_ptr1, btid_ptr2)
int or_get_attrname(RECDES *record, int attrid, char **string, int *alloced_string)
#define MAX_MERGE_ALIGN_WASTE
Definition: btree.c:88
#define BTREE_LEAF_RECORD_OVERFLOW_OIDS
Definition: btree.c:106
OID topclass_oid
Definition: btree.h:133
static bool btree_is_delete_object_purpose(BTREE_OP_PURPOSE purpose)
Definition: btree.c:34874
btree_insert_list * insert_list
Definition: btree.c:739
void LSA_SET_NULL(log_lsa *lsa_ptr)
Definition: log_lsa.hpp:146
int oid_compare(const void *a, const void *b)
Definition: oid.c:243
static int btree_record_get_last_object(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *recp, BTREE_NODE_TYPE node_type, int after_key_offset, OID *oidp, OID *class_oid, BTREE_MVCC_INFO *mvcc_info, int *last_oid_mvcc_offset)
Definition: btree.c:3223
int btree_rv_nodehdr_undoredo_update(THREAD_ENTRY *thread_p, LOG_RCV *recv)
Definition: btree.c:17096
int pr_midxkey_remove_prefix(DB_VALUE *key, int prefix)
const log_rv_record_flag_type LOG_RV_RECORD_INSERT
Definition: log_append.hpp:134
#define MAX_LEAF_REC_NUM
Definition: btree.c:86
int heap_get_class_partitions(THREAD_ENTRY *thread_p, const OID *class_oid, OR_PARTITION **parts, int *parts_count)
Definition: heap_file.c:11016
static int btree_apply_key_range_and_filter(THREAD_ENTRY *thread_p, BTREE_SCAN *bts, bool is_iss, bool *key_range_satisfied, bool *key_filter_satisfied, bool need_to_check_null)
Definition: btree.c:15774
int update_boundary_eq(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr, DB_VALUE &subtree_value, bool &clear_subtree_value, const INT16 subtree_slot)
Definition: btree.c:35528
#define BTREE_LEAF_RECORD_CLASS_OID
Definition: btree.c:110
static int btree_find_oid_does_mvcc_info_match(THREAD_ENTRY *thread_p, BTREE_MVCC_INFO *mvcc_info, BTREE_OP_PURPOSE purpose, BTREE_MVCC_INFO *match_mvccinfo, bool *is_match)
Definition: btree.c:11228
#define BTREE_OID_HAS_MVCC_INSID
Definition: btree.c:115
SCAN_ATTRS rest_attrs
Definition: scan_manager.h:214
LOG_UNIQUE_STATS unique_stats
Definition: log_impl.h:622
#define BTS_NEED_COUNT_ONLY(bts)
Definition: btree.c:610
float tot_used_space
Definition: btree.h:370
DB_DOMAIN * domain
Definition: dbtype_def.h:865
static PAGE_PTR btree_find_boundary_leaf(THREAD_ENTRY *thread_p, BTID *btid, VPID *pg_vpid, BTREE_STATS *stat_info, BTREE_BOUNDARY where)
Definition: btree.c:14341
int btree_get_btid_from_file(THREAD_ENTRY *thread_p, const VFID *vfid, BTID *btid_out)
Definition: btree.c:6930
#define DB_WASTED_ALIGN(offset, align)
Definition: memory_alloc.h:90
int db_make_string_copy(DB_VALUE *value, DB_CONST_C_CHAR str)
#define DB_PAGESIZE
BTREE_BOUNDARY
Definition: btree.c:300
int or_put_align32(OR_BUF *buf)
static int btree_record_get_num_oids(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *rec, int offset, BTREE_NODE_TYPE node_type)
Definition: btree.c:2733
static void btree_insert_helper_to_delete_helper(BTREE_INSERT_HELPER *insert_helper, BTREE_DELETE_HELPER *delete_helper)
Definition: btree.c:35193
INDEX_SKIP_SCAN iss
Definition: scan_manager.h:226
int update_boundary_gt_or_eq(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr, DB_VALUE &subtree_value, bool &clear_subtree_value, const INT16 subtree_slot, const int key_cnt)
Definition: btree.c:35591
#define MVCC_GET_DELID(header)
Definition: mvcc.h:57
static int btree_key_online_index_IB_insert_list(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:33460
#define BTREE_DELETE_HELPER_MSG(tabs)
Definition: btree.c:1187
#define BTREE_END_OF_SCAN(bts)
Definition: btree.h:317
static int btree_leaf_get_vpid_for_overflow_oids(RECDES *rec, VPID *vpid)
Definition: btree.c:2266
#define os_malloc(size)
Definition: memory_alloc.h:167
DB_LOGICAL
Definition: dbtype_def.h:1218
PAGE_PTR O_page
Definition: btree.h:184
void pgbuf_set_page_ptype(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, PAGE_TYPE ptype)
Definition: page_buffer.c:4847
float avg_pg_free_sp
Definition: btree.h:372
int m_keep_page_iterations
Definition: btree.h:593
#define BTREE_INSERT_CLASS_OID(ins_helper)
Definition: btree.c:803
BTREE_DELETE_HELPER delete_helper
Definition: btree.c:1247
bool prm_get_bool_value(PARAM_ID prm_id)
#define INT_ALIGNMENT
Definition: memory_alloc.h:61
#define QSTR_IS_ANY_CHAR_OR_BIT(s)
Definition: string_opfunc.h:47
static DB_VALUE * btree_find_split_point(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr, int *mid_slot, DB_VALUE *key, BTREE_INSERT_HELPER *helper, bool *clear_midkey)
Definition: btree.c:11851
int spage_get_free_space_without_saving(THREAD_ENTRY *thread_p, PAGE_PTR page_p, bool *need_update)
Definition: slotted_page.c:925
#define OR_PUT_INT(ptr, val)
DISK_ISVALID vacuum_check_not_vacuumed_rec_header(THREAD_ENTRY *thread_p, OID *oid, OID *class_oid, MVCC_REC_HEADER *rec_header, int btree_node_type)
Definition: vacuum.c:7314
static int btree_key_find_and_insert_delete_mvccid(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:28493
LOG_LSA reference_lsa
Definition: btree.c:839
int btree_locate_key(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, VPID *pg_vpid, INT16 *slot_id, PAGE_PTR *leaf_page_out, bool *found_p)
Definition: btree.c:14151
bool key_range_max_value_equal
Definition: btree.h:209
void er_clear(void)
int BTREE_RANGE_SCAN_PROCESS_KEY_FUNC(THREAD_ENTRY *thread_p, BTREE_SCAN *bts)
Definition: btree.h:643
static int btree_compare_individual_key_value(DB_VALUE *key1, DB_VALUE *key2, TP_DOMAIN *key_domain)
Definition: btree.c:18784
static BTREE_SEARCH btree_key_find_first_visible_row_from_all_ovf(THREAD_ENTRY *thread_p, BTID_INT *btid_int, VPID *first_ovfl_vpid, OID *oid, OID *class_oid)
Definition: btree.c:22284
static int btree_compress_node(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr)
Definition: btree.c:12539
#define SINGLE_ROW_INSERT
Definition: btree.h:52
void log_sysop_attach_to_outer(THREAD_ENTRY *thread_p)
Definition: log_manager.c:4076
VPID ovfl_vpid
Definition: btree.c:320
VPID O_vpid
Definition: btree.h:176
static int btree_key_append_object_unique(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR leaf, BTREE_SEARCH_KEY_HELPER *search_key, RECDES *leaf_record, LEAF_REC *leaf_record_info, int offset_after_key, BTREE_INSERT_HELPER *insert_helper, BTREE_OBJECT_INFO *first_object)
Definition: btree.c:28135
std::vector< key_oid * > m_sorted_keys_oids
Definition: btree.h:578
int btree_get_pkey_btid(THREAD_ENTRY *thread_p, OID *cls_oid, BTID *pkey_btid)
Definition: btree.c:7833
static int btree_read_fixed_portion_of_non_leaf_record_from_orbuf(OR_BUF *buf, NON_LEAF_REC *nlf_rec)
Definition: btree.c:3581
const MVCCID BTREE_ONLINE_INDEX_DELETE_FLAG_STATE
Definition: btree.c:1239
void log_sysop_commit(THREAD_ENTRY *thread_p)
Definition: log_manager.c:3895
static int btree_get_stats_with_fullscan(THREAD_ENTRY *thread_p, BTREE_STATS_ENV *env)
Definition: btree.c:6822
static int btree_recompress_record(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record, DB_VALUE *fence_key, int old_prefix, int new_prefix)
Definition: btree.c:12457
btree_unique_stats * unique_stats_info
Definition: btree.c:714
#define DISK_VPID_ALIGNED_SIZE
static int btree_split_root(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR P, PAGE_PTR Q, PAGE_PTR R, VPID *P_vpid, VPID *Q_vpid, VPID *R_vpid, BTREE_NODE_TYPE node_type, DB_VALUE *key, BTREE_INSERT_HELPER *helper, VPID *child_vpid)
Definition: btree.c:13484
LOG_DATA_ADDR leaf_addr
Definition: btree.c:742
int btree_leaf_get_first_object(BTID_INT *btid, RECDES *recp, OID *oidp, OID *class_oid, BTREE_MVCC_INFO *mvcc_info)
Definition: btree.c:2429
STATIC_INLINE bool btree_is_fence_key(PAGE_PTR leaf_page, PGSLOTID slotid) __attribute__((ALWAYS_INLINE))
Definition: btree.c:1862
#define ER_INVALID_DATA_FOR_PARTITION
Definition: error_code.h:1402
#define DB_VALUE_TYPE(value)
Definition: dbtype.h:72
int btree_multicol_key_has_null(DB_VALUE *key)
Definition: btree.c:18079
LOG_LSA cur_leaf_lsa
Definition: btree.h:214
int i
Definition: dynamic_load.c:954
#define BTREE_MVCC_INFO_CLEAR_FIXED_SIZE(mvcc_info)
Definition: btree.c:207
bool need_count_only
Definition: scan_manager.h:218
int db_make_null(DB_VALUE *value)
int spage_get_free_space(THREAD_ENTRY *thread_p, PAGE_PTR page_p)
Definition: slotted_page.c:898
static SCAN_CODE btree_scan_for_show_index_capacity(THREAD_ENTRY *thread_p, DB_VALUE **out_values, int out_cnt, const char *class_name, OR_INDEX *index_p)
Definition: btree.c:22135
char * pr_valstring(const DB_VALUE *val)
int qualified_keys
Definition: btree.h:205
PGBUF_LATCH_MODE nonleaf_latch_mode
Definition: btree.c:717
static PAGE_PTR btree_get_next_page(THREAD_ENTRY *thread_p, PAGE_PTR page_p)
Definition: btree.c:18560
DB_TYPE id
FILE_OVF_BTREE_DES btree_key_overflow
Definition: file_manager.h:135
#define NOT_FOUND
Definition: btree.c:101
bool log_operations
Definition: btree.c:734
int spage_header_size(void)
Definition: slotted_page.c:837
#define DB_IS_NULL(value)
Definition: dbtype.h:63
const size_t BTREE_RV_BUFFER_SIZE
Definition: btree.c:1103
int btree_rv_undo_record_modify(THREAD_ENTRY *thread_p, LOG_RCV *rcv)
Definition: btree.c:28835
struct tp_domain * next
Definition: object_domain.h:74
static int btree_key_lock_and_append_object_unique(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf, bool *restart, BTREE_SEARCH_KEY_HELPER *search_key, BTREE_INSERT_HELPER *insert_helper, RECDES *leaf_record)
Definition: btree.c:27665
#define INLINE
for(p=libs;*p;p++)
Definition: dynamic_load.c:968
static DB_VALUE * btree_set_split_point(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr, INT16 mid_slot, DB_VALUE *key, bool *clear_midkey)
Definition: btree.c:13116
#define BTREE_OID_CLEAR_ALL_FLAGS(oid_ptr)
Definition: btree.c:134
int spage_slot_size(void)
Definition: slotted_page.c:827
static void btree_write_fixed_portion_of_non_leaf_record_to_orbuf(OR_BUF *buf, NON_LEAF_REC *nlf_rec)
Definition: btree.c:3563
static PAGE_PTR btree_find_AR_sampling_leaf(THREAD_ENTRY *thread_p, BTID *btid, VPID *pg_vpid, BTREE_STATS *stat_info_p, bool *found_p)
Definition: btree.c:14533
bool is_scan_started
Definition: btree.h:243
constexpr size_t LOG_RV_RECORD_UPDPARTIAL_ALIGNED_SIZE(size_t new_data_size)
Definition: log_append.hpp:204
INT16 type
TP_DOMAIN * tp_domain_resolve(DB_TYPE domain_type, DB_OBJECT *class_obj, int precision, int scale, TP_DOMAIN *setdomain, int collation)
bool qdata_copy_db_value(DB_VALUE *dest_p, const DB_VALUE *src_p)
Definition: query_opfunc.c:310
int leaf_pg_cnt
Definition: btree.h:360
heap_cache_attrinfo * attr_cache
std::vector< key_oid > m_keys_oids
Definition: btree.h:577
#define NULL_VOLID
bool btree_clear_key_value(bool *clear_flag, DB_VALUE *key_value)
Definition: btree.c:1919
#define HA_DISABLED()
MVCC_SNAPSHOT_FUNC snapshot_fnc
Definition: mvcc.h:176
OID * class_oids
Definition: btree.c:406
#define SP_ERROR
Definition: slotted_page.h:49
int lock_has_lock_on_object(const OID *oid, const OID *class_oid, LOCK lock)
static int btree_top_n_items_binary_search(RANGE_OPT_ITEM **top_n_items, int *att_idxs, TP_DOMAIN **domains, bool *desc_order, DB_VALUE *new_key_values, int num_keys, int first, int last, int *new_pos)
Definition: btree.c:19084
static int btree_fk_object_does_exist(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record, char *object_ptr, OID *oid, OID *class_oid, BTREE_MVCC_INFO *mvcc_info, bool *stop, void *args)
Definition: btree.c:25781
float tot_free_space
Definition: btree.h:368
int btree_init_node_header(THREAD_ENTRY *thread_p, const VFID *vfid, PAGE_PTR page_ptr, BTREE_NODE_HEADER *header, bool redo)
Definition: btree_load.c:373
#define IO_MAX_PAGE_SIZE
STATIC_INLINE const char * btree_op_type_to_string(int op_type) __attribute__((ALWAYS_INLINE))
Definition: btree.c:33129
static int btree_split_node(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR P, PAGE_PTR Q, PAGE_PTR R, VPID *P_vpid, VPID *Q_vpid, VPID *R_vpid, INT16 p_slot_id, BTREE_NODE_TYPE node_type, DB_VALUE *key, BTREE_INSERT_HELPER *helper, VPID *child_vpid)
Definition: btree.c:12653
DISK_ISVALID btree_repair_prev_link(THREAD_ENTRY *thread_p, OID *oid, BTID *index_btid, bool repair)
Definition: btree.c:8203
#define OFFS2
Definition: btree.c:333
int avg_rec_len
Definition: btree.h:367
float sum_rec_len
Definition: btree.h:364
int btree_keyval_search(THREAD_ENTRY *thread_p, BTID *btid, SCAN_OPERATION_TYPE scan_op_type, BTREE_SCAN *bts, key_val_range *kv_range, OID *class_oid, FILTER_INFO *filter, INDX_SCAN_ID *isidp, bool is_all_class_srch)
Definition: btree.c:14751
static int btree_fix_root_for_delete(THREAD_ENTRY *thread_p, BTID *btid, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *root_page, bool *is_leaf, BTREE_SEARCH_KEY_HELPER *search_key, bool *stop, bool *restart, void *other_args)
Definition: btree.c:29778
#define BTID_IS_NULL(btid)
static int btree_get_num_visible_oids_from_all_ovf(THREAD_ENTRY *thread_p, BTID_INT *btid, VPID *first_ovfl_vpid, int *num_visible_oids, int *max_visible_oids, MVCC_SNAPSHOT *mvcc_snapshot)
Definition: btree.c:2460
DB_VALUE key1
Definition: access_spec.hpp:58
#define OFFS1
Definition: btree.c:332
INT16 flags
Definition: btree.c:321
DB_MIDXKEY midxkey
Definition: dbtype_def.h:1065
bool is_key_deleted
Definition: btree.c:831
PAGE_PTR crt_page
Definition: btree.h:393
int setval(DB_VALUE *dest, const DB_VALUE *src, bool copy) const
int db_make_int(DB_VALUE *value, const int num)
#define BTREE_OBJINFO_MSG(name)
Definition: btree.c:1162
OR_INDEX_STATUS index_status
bool pgbuf_has_any_waiters(PAGE_PTR pgptr)
struct db_object * class_mop
Definition: object_domain.h:81
#define TP_ARE_COMPARABLE_KEY_TYPES(key1_type, key2_type)
short volid
Definition: dbtype_def.h:887
void log_sysop_end_logical_run_postpone(THREAD_ENTRY *thread_p, LOG_LSA *posp_lsa)
Definition: log_manager.c:3982
static INLINE short btree_record_object_get_mvcc_flags(char *data) __attribute__((ALWAYS_INLINE))
Definition: btree.c:3391
static int btree_select_visible_object_for_range_scan(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record, char *object_ptr, OID *oid, OID *class_oid, BTREE_MVCC_INFO *mvcc_info, bool *stop, void *args)
Definition: btree.c:25444
static PAGE_PTR btree_find_leftmost_leaf(THREAD_ENTRY *thread_p, BTID *btid, VPID *pg_vpid, BTREE_STATS *stat_info_p)
Definition: btree.c:14311
int db_make_oid(DB_VALUE *value, const OID *oid)
PERF_UTIME_TRACKER time_track
Definition: btree.c:752
int unique_pk
Definition: btree.h:122
#define PGBUF_PAGE_STATE_MSG(name)
Definition: page_buffer.h:56
#define OID_ISNULL(oidp)
Definition: oid.h:81
#define BTS_IS_INDEX_MRO(bts)
Definition: btree.c:601
#define DONT_FREE
Definition: page_buffer.h:41
enum btree_rv_debug_id BTREE_RV_DEBUG_ID
Definition: btree.c:1141
#define BTREE_FIND_UNIQUE_HELPER_INITIALIZER
Definition: btree.c:472
int or_get_int(OR_BUF *buf, int *error)
int collation_id
Definition: object_domain.h:92
#define NON_LEAF_ENTRY_MAX_SIZE(n)
Definition: btree_load.h:84
MVCCID mvcc_next_id
int heap_get_indexinfo_of_btid(THREAD_ENTRY *thread_p, const OID *class_oid, const BTID *btid, BTREE_TYPE *type, int *num_attrs, ATTR_ID **attr_ids, int **attrs_prefix_length, char **btnamepp, int *func_index_col_id)
Definition: heap_file.c:13134
static void btree_split_test(THREAD_ENTRY *thread_p, BTID_INT *btid, DB_VALUE *key, VPID *S_vpid, PAGE_PTR S_page, BTREE_NODE_TYPE node_type)
Definition: btree.c:13296
bool need_update_max_key_len
Definition: btree.c:721
static bool btree_leaf_lsa_eq(THREAD_ENTRY *thread_p, LOG_LSA *a, LOG_LSA *b)
Definition: btree.c:22263
static int btree_key_relocate_last_into_ovf(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR leaf, BTREE_SEARCH_KEY_HELPER *search_key, RECDES *leaf_record, LEAF_REC *leaf_record_info, int offset_after_key, BTREE_INSERT_HELPER *insert_helper)
Definition: btree.c:28249
char * oid_to_string(char *buf, int buf_size, OID *oid)
BTREE_INSERT_HELPER insert_helper
Definition: btree.c:1246
int logtb_delete_global_unique_stats(THREAD_ENTRY *thread_p, BTID *btid)
LEAF_REC leaf_rec_info
Definition: btree.h:220
ISS_OP_TYPE current_op
Definition: scan_manager.h:181
void heap_clear_partition_info(THREAD_ENTRY *thread_p, OR_PARTITION *parts, int parts_count)
Definition: heap_file.c:11126
#define BTREE_GET_OID(buf, oid_ptr)
Definition: btree.c:242
static int btree_or_put_mvccinfo(OR_BUF *buf, BTREE_MVCC_INFO *mvcc_info)
Definition: btree.c:21451
#define SINGLE_ROW_MODIFY
Definition: btree.h:55
char * vfid_to_string(char *buf, int buf_size, VFID *vfid)
#define BTREE_RECORD_OR_BUF_INIT(buf, btree_rec)
Definition: btree.c:251
DB_VALUE_COMPARE_RESULT
Definition: dbtype_def.h:199
static void btree_dump_leaf_record(THREAD_ENTRY *thread_p, FILE *fp, BTID_INT *btid, RECDES *rec, int n)
Definition: btree.c:4632
void btree_rv_keyval_dump(FILE *fp, int length, void *data)
Definition: btree.c:17947
TP_DOMAIN * key_type
Definition: btree.h:124
BTREE_ISCAN_OID_LIST * oid_list
Definition: scan_manager.h:201
SCAN_CODE btree_get_next_key_info(THREAD_ENTRY *thread_p, BTID *btid, BTREE_SCAN *bts, int num_classes, OID *class_oids_ptr, INDX_SCAN_ID *index_scan_id_p, DB_VALUE **key_info)
Definition: btree.c:16115
static void btree_online_index_check_state(MVCCID state)
Definition: btree.c:33216
void btree_set_mvcc_header_ids_for_update(THREAD_ENTRY *thread_p, bool do_delete_only, bool do_insert_only, MVCCID *mvcc_id, MVCC_REC_HEADER *mvcc_rec_header)
Definition: btree.c:21297
char * or_unpack_domain(char *ptr, struct tp_domain **domain_ptr, int *is_null)
LOG_DATA_ADDR leaf_addr
Definition: btree.c:834
RANGE
Definition: access_spec.hpp:32
DISK_ISVALID btree_check_all(THREAD_ENTRY *thread_p)
Definition: btree.c:8271
static int btree_undo_delete_physical(THREAD_ENTRY *thread_p, BTID *btid, DB_VALUE *key, OID *class_oid, OID *oid, BTREE_MVCC_INFO *mvcc_info, LOG_LSA *undo_nxlsa)
Definition: btree.c:25934
int btree_read_record(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR pgptr, RECDES *rec, DB_VALUE *key, void *rec_header, BTREE_NODE_TYPE node_type, bool *clear_key, int *offset, int copy_key, BTREE_SCAN *bts)
Definition: btree.c:4233
int overflow_get_length(THREAD_ENTRY *thread_p, const VPID *ovf_vpid)
int heap_get_class_name(THREAD_ENTRY *thread_p, const OID *class_oid, char **class_name)
Definition: heap_file.c:9328
#define db_private_realloc(thrd, ptr, size)
Definition: memory_alloc.h:231
short key_len
Definition: btree.h:106
#define PEEK
Definition: file_io.h:74
PERF_PAGE_TYPE
Definition: perf_monitor.h:209
LOG_TRAN_BTID_UNIQUE_STATS * logtb_tran_find_btid_stats(THREAD_ENTRY *thread_p, const BTID *btid, bool create)
bool check_release_latch(THREAD_ENTRY *thread_p, void *arg, PAGE_PTR leaf_page)
Definition: btree.c:35730
static char * btree_leaf_get_nth_oid_ptr(BTID_INT *btid, RECDES *recp, BTREE_NODE_TYPE node_type, int oid_list_offset, int n)
Definition: btree.c:3133
#define BTREE_OID_HAS_MVCC_DELID
Definition: btree.c:117
static int btree_range_scan_descending_fix_prev_leaf(THREAD_ENTRY *thread_p, BTREE_SCAN *bts, int *key_count, BTREE_NODE_HEADER **node_header_ptr, VPID *next_vpid)
Definition: btree.c:24696
int overflow_insert(THREAD_ENTRY *thread_p, const VFID *ovf_vfid, VPID *ovf_vpid, RECDES *recdes, FILE_TYPE file_type)
Definition: overflow_file.c:95
LOG_LSA compensate_undo_nxlsa
Definition: btree.c:748
DB_VALUE key2
Definition: access_spec.hpp:59
STATIC_INLINE void btree_add_mvccid(RECDES *rec, int oid_offset, int mvccid_offset, MVCCID mvccid, short flag, char **rv_undo_data_ptr, char **rv_redo_data_ptr) __attribute__((ALWAYS_INLINE))
Definition: btree.c:3635
static int btree_delete_key_from_leaf(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR leaf_pg, LEAF_REC *leafrec_pnt, BTREE_DELETE_HELPER *delete_helper, BTREE_SEARCH_KEY_HELPER *search_key)
Definition: btree.c:9122
static int btree_find_lower_bound_leaf(THREAD_ENTRY *thread_p, BTREE_SCAN *BTS, BTREE_STATS *stat_info_p)
Definition: btree.c:14207
#define VPID_SET_NULL(vpid_ptr)
Definition: dbtype_def.h:906
static int btree_key_append_object_as_new_overflow(THREAD_ENTRY *thread_p, BTID_INT *btid_int, PAGE_PTR leaf_page, BTREE_OBJECT_INFO *object_info, BTREE_INSERT_HELPER *insert_helper, BTREE_SEARCH_KEY_HELPER *search_key, RECDES *leaf_rec, VPID *first_ovfl_vpid)
Definition: btree.c:10770
BTREE_SCAN btree_scan
Definition: btree.h:350
BTREE_NODE_SPLIT_INFO split_info
Definition: btree_load.h:196
static int btree_split_node_and_advance(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *crt_page, PAGE_PTR *advance_to_page, bool *is_leaf, BTREE_SEARCH_KEY_HELPER *search_key, bool *stop, bool *restart, void *other_args)
Definition: btree.c:26588
static int btree_remove_delete_mvccid_unique_internal(THREAD_ENTRY *thread_p, BTID_INT *btid_int, BTREE_DELETE_HELPER *helper, PAGE_PTR leaf_page, RECDES *leaf_record, BTREE_NODE_TYPE node_type, PAGE_PTR overflow_page, RECDES *overflow_record, int offset_to_object, char **rv_undo_data, char **rv_redo_data)
Definition: btree.c:32204
bool btree_is_unique_type(BTREE_TYPE type)
Definition: btree.c:6046
OR_ATTRIBUTE ** atts
#define MVCCID_IS_VALID(id)
char * buf
Definition: dbtype_def.h:866
#define BTREE_OID_SET_RECORD_FLAG(oid_ptr, mvcc_flag)
Definition: btree.c:151
#define PGBUF_PAGE_STATE_ARGS(pg)
Definition: page_buffer.h:57
int BTREE_PROCESS_KEY_FUNCTION(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:578
struct indx_scan_id * index_scan_idp
Definition: btree.h:241
static void btree_perf_unique_lock_time(THREAD_ENTRY *thread_p, PERF_UTIME_TRACKER *track, LOCK lock)
Definition: btree.c:985
#define BTREE_MVCC_INFO_AS_ARGS(mvcc_info)
Definition: btree.c:1158
int common_prefix
Definition: btree.h:207
#define STATS_SAMPLING_THRESHOLD
Definition: statistics.h:37
int btree_check_foreign_key(THREAD_ENTRY *thread_p, OID *cls_oid, HFID *hfid, OID *oid, DB_VALUE *keyval, int n_attrs, OID *pk_cls_oid, BTID *pk_btid, const char *fk_name)
Definition: btree.c:22005
bool force_restart_from_root
Definition: btree.h:244
int BTREE_PROCESS_OBJECT_FUNCTION(THREAD_ENTRY *thread_p, BTID_INT *btid_int, RECDES *record, char *object_ptr, OID *oid, OID *class_oid, BTREE_MVCC_INFO *mvcc_info, bool *stop, void *args)
Definition: btree.c:592
#define BTREE_OID_SET_MVCC_FLAG(oid_ptr, mvcc_flag)
Definition: btree.c:149
std::int64_t offset
Definition: log_lsa.hpp:37
int btree_node_header_redo_log(THREAD_ENTRY *thread_p, VFID *vfid, PAGE_PTR page_ptr)
Definition: btree_load.c:436
char * index_name
Definition: btree.c:405
static int btree_key_find_and_lock_unique_of_non_unique(THREAD_ENTRY *thread_p, BTID_INT *btid_int, DB_VALUE *key, PAGE_PTR *leaf_page, BTREE_SEARCH_KEY_HELPER *search_key, bool *restart, void *other_args)
Definition: btree.c:23213
void reset_boundary_keys()
Definition: btree.c:35691
OID match_class_oid
Definition: btree.h:236
static void btree_rv_log_insert_object(THREAD_ENTRY *thread_p, const BTREE_INSERT_HELPER &insert_helper, LOG_DATA_ADDR &addr, int undo_length, int redo_length, const char *undo_data, const char *redo_data)
Definition: btree.c:34965
DB_CLASS_PARTITION_TYPE
#define VACUUM_LOG_ADD_DROPPED_FILE_UNDO
Definition: vacuum.h:79
#define BTREE_INSERT_MVCC_INFO(ins_helper)
Definition: btree.c:805
#define BTREE_RV_SET_UNDO_MVCCDEL_MYOBJ(addr)
Definition: btree.c:1096
#define VFID_SET_NULL(vfid_ptr)
Definition: file_manager.h:65
QFILE_LIST_ID * list_id
Definition: scan_manager.h:126
fence_key_presence has_fence_key
Definition: btree.c:426
static int btree_set_vpid_previous_vpid(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_p, VPID *prev)
Definition: btree.c:18611
char * btree_unpack_mvccinfo(char *ptr, BTREE_MVCC_INFO *mvcc_info, short btree_mvcc_flags)
Definition: btree.c:21338
static void btree_write_fixed_portion_of_non_leaf_record(RECDES *rec, NON_LEAF_REC *nlf_rec)
Definition: btree.c:3514
void tp_domain_free(TP_DOMAIN *dom)
DB_CONST_C_CHAR db_get_string(const DB_VALUE *value)
#define BTREE_MVCC_INFO_SET_INSID(mvcc_info, insid)
Definition: btree.c:222
DB_VALUE * upper_key
Definition: btree.h:142
VPID * pgbuf_get_vpid_ptr(PAGE_PTR pgptr)
Definition: page_buffer.c:4609
int or_advance(OR_BUF *buf, int offset)
DB_BIGINT * key_limit_upper
Definition: btree.h:239
struct tp_domain * or_get_domain(OR_BUF *buf, struct tp_domain *dom, int *is_null)
static int btree_insert_mvcc_delid_into_page(THREAD_ENTRY *thread_p, BTID_INT *btid, PAGE_PTR page_ptr, BTREE_NODE_TYPE node_type, DB_VALUE *key, BTREE_INSERT_HELPER *insert_helper, PGSLOTID slot_id, RECDES *rec, int oid_offset)
Definition: btree.c:21119
bool LOG_RV_RECORD_IS_DELETE(log_rv_record_flag_type flags)
Definition: log_append.hpp:180
DISK_ISVALID
Definition: disk_manager.h:53
struct db_char::@54 medium
int btree_mvcc_delete(THREAD_ENTRY *thread_p, BTID *btid, DB_VALUE *key, OID *class_oid, OID *oid, int op_type, btree_unique_stats *unique_stat_info, int *unique, MVCC_REC_HEADER *p_mvcc_rec_header)
Definition: btree.c:26021
static bool btree_is_insert_data_purpose(BTREE_OP_PURPOSE purpose)
Definition: btree.c:34804
#define OR_GET_SHORT(ptr)
DB_VALUE_COMPARE_RESULT pr_midxkey_compare(DB_MIDXKEY *mul1, DB_MIDXKEY *mul2, int do_coercion, int total_order, int num_index_term, int *start_colp, int *result_size1, int *result_size2, int *diff_column, bool *dom_is_desc, bool *next_dom_is_desc)
int spage_insert_at(THREAD_ENTRY *thread_p, PAGE_PTR page_p, PGSLOTID slot_id, RECDES *record_descriptor_p)
#define VACUUM_ER_LOG_BTREE
Definition: vacuum.h:49
#define BTREE_LEAF_RECORD_OVERFLOW_KEY
Definition: btree.c:108
int btree_packed_mvccinfo_size(BTREE_MVCC_INFO *mvcc_info)
Definition: btree.c:21394
#define HEAP_ISVALID_OID(thread_p, oid)
Definition: heap_file.h:77
int get_disk_size_of_value(const DB_VALUE *value) const
#define BTREE_SET_UNIQUE_VIOLATION_ERROR(THREAD, KEY, OID, C_OID, BTID, BTNM)
Definition: btree.h:96
#define BTREE_INIT_SCAN(bts)
Definition: btree.h:253
static int btree_compare_oid(const void *oid_mem1, const void *oid_mem2)
Definition: btree.c:19458
const VPID * overflow_delete(THREAD_ENTRY *thread_p, const VFID *ovf_vfid, const VPID *ovf_vpid)
int log_rv_undoredo_record_partial_changes(THREAD_ENTRY *thread_p, char *rcv_data, int rcv_data_length, RECDES *record, bool is_undo)
PGNSLOTS num_slots
Definition: slotted_page.h:63
static int btree_leaf_is_key_between_min_max(THREAD_ENTRY *thread_p, BTID_INT *btid_int, PAGE_PTR leaf, DB_VALUE *key, BTREE_SEARCH_KEY_HELPER *search_key)
Definition: btree.c:5175
bool tf_is_catalog_class(OID *class_oid)
Definition: transform.c:523
DB_VALUE * m_curr_key
Definition: btree.h:580
static void btree_remove_mvccid(RECDES *record, int oid_offset, int mvccid_offset, short flag, char **rv_undo_data_ptr, char **rv_redo_data_ptr)
Definition: btree.c:3727
bool is_first_search
Definition: btree.c:828
SCAN_CODE heap_get_class_record(THREAD_ENTRY *thread_p, const OID *class_oid, RECDES *recdes_p, HEAP_SCANCACHE *scan_cache, int ispeeking)
Definition: heap_file.c:24780
#define ER_REPL_MULTI_UPDATE_UNIQUE_VIOLATION
Definition: error_code.h:1217