CUBRID Engine  latest
page_buffer.c
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Search Solution Corporation
3  * Copyright 2016 CUBRID Corporation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 /*
20  * page_buffer.c - Page buffer management module (at the server)
21  */
22 
23 #ident "$Id$"
24 
25 #include "config.h"
26 
27 #include <stdlib.h>
28 #include <stddef.h>
29 #include <string.h>
30 #include <assert.h>
31 
32 #include "page_buffer.h"
33 
34 #include "storage_common.h"
35 #include "memory_alloc.h"
36 #include "system_parameter.h"
37 #include "error_manager.h"
38 #include "file_io.h"
40 #include "log_append.hpp"
41 #include "log_manager.h"
42 #include "log_impl.h"
43 #include "log_volids.hpp"
44 #include "transaction_sr.h"
45 #include "memory_hash.h"
46 #include "critical_section.h"
47 #include "perf_monitor.h"
48 #include "porting_inline.hpp"
49 #include "environment_variable.h"
50 #include "thread_daemon.hpp"
51 #include "thread_entry_task.hpp"
52 #include "thread_manager.hpp"
53 #include "list_file.h"
54 #include "tsc_timer.h"
55 #include "query_manager.h"
56 #include "xserver_interface.h"
57 #include "btree_load.h"
58 #include "boot_sr.h"
59 #include "double_write_buffer.h"
60 #include "resource_tracker.hpp"
61 #include "tde.h"
62 #include "show_scan.h"
63 #include "numeric_opfunc.h"
64 #include "dbtype.h"
65 
66 #if defined(SERVER_MODE)
67 #include "connection_error.h"
68 #endif /* SERVER_MODE */
69 #if defined(ENABLE_SYSTEMTAP)
70 #include "probes.h"
71 #endif /* ENABLE_SYSTEMTAP */
72 #include "thread_entry.hpp"
73 
75 
76 /* minimum number of buffers */
77 #define PGBUF_MINIMUM_BUFFERS (MAX_NTRANS * 10)
78 
79 /* BCB holder list related constants */
80 
81 /* Each thread has its own free BCB holder list.
82  The list has PGBUF_DEFAULT_FIX_COUNT entries by default. */
83 #define PGBUF_DEFAULT_FIX_COUNT 7
84 
85 /* Each BCB holder array, that is allocated from OS,
86  has PGBUF_NUM_ALLOC_HOLDER elements(BCB holder entries). */
87 #define PGBUF_NUM_ALLOC_HOLDER 10
88 
89 #if !defined(SERVER_MODE)
90 /* TODO: do we need to do this? */
91 #define pthread_mutex_init(a, b)
92 #define pthread_mutex_destroy(a)
93 #define pthread_mutex_lock(a) 0
94 #define pthread_mutex_unlock(a)
95 static int rv;
96 #endif /* !SERVER_MODE */
97 
98 /* default timeout seconds for infinite wait */
99 #define PGBUF_TIMEOUT 300 /* timeout seconds */
100 #define PGBUF_FIX_COUNT_THRESHOLD 64 /* fix count threshold. used as indicator for hot pages. */
101 
102 /* size of io page */
103 #if defined(CUBRID_DEBUG)
104 #define SIZEOF_IOPAGE_PAGESIZE_AND_GUARD() (IO_PAGESIZE + sizeof (pgbuf_Guard))
105 #else /* CUBRID_DEBUG */
106 #define SIZEOF_IOPAGE_PAGESIZE_AND_GUARD() (IO_PAGESIZE)
107 #endif /* CUBRID_DEBUG */
108 
109 /* size of one buffer page <BCB, page> */
110 #define PGBUF_BCB_SIZEOF (sizeof (PGBUF_BCB))
111 #define PGBUF_IOPAGE_BUFFER_SIZE \
112  ((size_t)(offsetof (PGBUF_IOPAGE_BUFFER, iopage) + \
113  SIZEOF_IOPAGE_PAGESIZE_AND_GUARD()))
114 /* size of buffer hash entry */
115 #define PGBUF_BUFFER_HASH_SIZEOF (sizeof (PGBUF_BUFFER_HASH))
116 /* size of buffer lock record */
117 #define PGBUF_BUFFER_LOCK_SIZEOF (sizeof (PGBUF_BUFFER_LOCK))
118 /* size of one LRU list structure */
119 #define PGBUF_LRU_LIST_SIZEOF (sizeof (PGBUF_LRU_LIST))
120 /* size of BCB holder entry */
121 #define PGBUF_HOLDER_SIZEOF (sizeof (PGBUF_HOLDER))
122 /* size of BCB holder array that is allocated in one time */
123 #define PGBUF_HOLDER_SET_SIZEOF (sizeof (PGBUF_HOLDER_SET))
124 /* size of BCB holder anchor */
125 #define PGBUF_HOLDER_ANCHOR_SIZEOF (sizeof (PGBUF_HOLDER_ANCHOR))
126 
127 /* get memory address(pointer) */
128 #define PGBUF_FIND_BCB_PTR(i) \
129  ((PGBUF_BCB *) ((char *) &(pgbuf_Pool.BCB_table[0]) + (PGBUF_BCB_SIZEOF * (i))))
130 
131 #define PGBUF_FIND_IOPAGE_PTR(i) \
132  ((PGBUF_IOPAGE_BUFFER *) ((char *) &(pgbuf_Pool.iopage_table[0]) + (PGBUF_IOPAGE_BUFFER_SIZE * (i))))
133 
134 #define PGBUF_FIND_BUFFER_GUARD(bufptr) \
135  (&bufptr->iopage_buffer->iopage.page[DB_PAGESIZE])
136 
137 /* macros for casting pointers */
138 #define CAST_PGPTR_TO_BFPTR(bufptr, pgptr) \
139  do { \
140  (bufptr) = ((PGBUF_BCB *) ((PGBUF_IOPAGE_BUFFER *) \
141  ((char *) pgptr - offsetof (PGBUF_IOPAGE_BUFFER, iopage.page)))->bcb); \
142  assert ((bufptr) == (bufptr)->iopage_buffer->bcb); \
143  } while (0)
144 
145 #define CAST_PGPTR_TO_IOPGPTR(io_pgptr, pgptr) \
146  do { \
147  (io_pgptr) = (FILEIO_PAGE *) ((char *) pgptr - offsetof (FILEIO_PAGE, page)); \
148  } while (0)
149 
150 #define CAST_IOPGPTR_TO_PGPTR(pgptr, io_pgptr) \
151  do { \
152  (pgptr) = (PAGE_PTR) ((char *) (io_pgptr)->page); \
153  } while (0)
154 
155 #define CAST_BFPTR_TO_PGPTR(pgptr, bufptr) \
156  do { \
157  assert ((bufptr) == (bufptr)->iopage_buffer->bcb); \
158  (pgptr) = ((PAGE_PTR) ((char *) (bufptr->iopage_buffer) + offsetof (PGBUF_IOPAGE_BUFFER, iopage.page))); \
159  } while (0)
160 
161 /* check whether the given volume is auxiliary volume */
162 #define PGBUF_IS_AUXILIARY_VOLUME(volid) ((volid) < LOG_DBFIRST_VOLID ? true : false)
163 
164 /************************************************************************/
165 /* Page buffer zones section */
166 /************************************************************************/
167 
168 /* (bcb flags + zone = 2 bytes) + (lru index = 2 bytes); lru index values start from 0. */
169 /* if that changes, make the right updates here. */
170 #define PGBUF_LRU_NBITS 16
171 #define PGBUF_LRU_LIST_MAX_COUNT ((int) 1 << PGBUF_LRU_NBITS) /* 64k */
172 #define PGBUF_LRU_INDEX_MASK (PGBUF_LRU_LIST_MAX_COUNT - 1) /* 0x0000FFFF */
173 
174 /* PGBUF_ZONE - enumeration with all page buffer zones */
175 typedef enum
176 {
177  /* zone values start after reserved values for lru indexes */
178  /* LRU zones explained:
179  * 1. This is hottest zone and this is where most fixed/unfixed bcb's are found. We'd like to keep the page unfix
180  * complexity to a minimum, therefore no boost to top are done here. This zone's bcb's cannot be victimized.
181  * 2. This is a buffer between the hot lru 1 zone and the victimization lru 3 zone. The buffer zone gives bcb's that
182  * fall from first zone a chance to be boosted back to top (if they are still hot). Victimization is still not
183  * allowed.
184  * 3. Third zone is the victimization zone. BCB's can still be boosted if fixed/unfixed, but in aggressive victimizing
185  * systems, non-dirty bcb's rarely survive here.
186  */
190  /* make sure lru zone mask covers all lru zone values */
192 
193  /* other zone values must have a completely different mask than lru zone. so also skip the two bits used for
194  * PGBUF_LRU_ZONE_MASK */
195  PGBUF_INVALID_ZONE = 1 << (PGBUF_LRU_NBITS + 2), /* invalid zone */
196  PGBUF_VOID_ZONE = 2 << (PGBUF_LRU_NBITS + 2), /* void zone: temporary zone after reading bcb from disk until and
197  * until adding to a lru list, or after removing from lru list and
198  * until victimizing. */
199 
200  /* zone mask should cover all zone values */
202 } PGBUF_ZONE;
203 
204 #define PGBUF_MAKE_ZONE(list_id, zone) ((list_id) | (zone))
205 #define PGBUF_GET_ZONE(flags) ((PGBUF_ZONE) ((flags) & PGBUF_ZONE_MASK))
206 #define PGBUF_GET_LRU_INDEX(flags) ((flags) & PGBUF_LRU_INDEX_MASK)
207 
208 /************************************************************************/
209 /* Page buffer BCB section */
210 /************************************************************************/
211 
212 /* bcb flags */
213 /* dirty: false initially, is set to true when page is modified. set to false again when flushed to disk. */
214 #define PGBUF_BCB_DIRTY_FLAG ((int) 0x80000000)
215 /* is flushing: set to true when someone intends to flush the bcb to disk. dirty flag is usually set to false, but
216  * bcb cannot be yet victimized. flush must succeed first. */
217 #define PGBUF_BCB_FLUSHING_TO_DISK_FLAG ((int) 0x40000000)
218 /* flag to mark bcb was directly victimized. we can have certain situations when victimizations fail. the thread goes
219  * to sleep then and waits to be awaken by another thread, which also assigns it a bcb directly. there can be multiple
220  * providers of such bcb's.
221  * there is a small window of opportunity for active workers to fix this bcb. when fixing a direct victim, we need to
222  * replace the flag with PGBUF_BCB_INVALIDATE_DIRECT_VICTIM_FLAG. there is not point of victimizing this bcb to fix it
223  * again. The thread waiting for the bcb will know it was fixed again and will request another bcb. */
224 #define PGBUF_BCB_VICTIM_DIRECT_FLAG ((int) 0x20000000)
225 #define PGBUF_BCB_INVALIDATE_DIRECT_VICTIM_FLAG ((int) 0x10000000)
226 /* flag for unlatch bcb to move it to the bottom of lru when fix count is 0. usually set when page is deallocated */
227 #define PGBUF_BCB_MOVE_TO_LRU_BOTTOM_FLAG ((int) 0x08000000)
228 /* flag for pages that should be vacuumed. */
229 #define PGBUF_BCB_TO_VACUUM_FLAG ((int) 0x04000000)
230 /* flag for asynchronous flush request */
231 #define PGBUF_BCB_ASYNC_FLUSH_REQ ((int) 0x02000000)
232 
233 /* add all flags here */
234 #define PGBUF_BCB_FLAGS_MASK \
235  (PGBUF_BCB_DIRTY_FLAG \
236  | PGBUF_BCB_FLUSHING_TO_DISK_FLAG \
237  | PGBUF_BCB_VICTIM_DIRECT_FLAG \
238  | PGBUF_BCB_INVALIDATE_DIRECT_VICTIM_FLAG \
239  | PGBUF_BCB_MOVE_TO_LRU_BOTTOM_FLAG \
240  | PGBUF_BCB_TO_VACUUM_FLAG \
241  | PGBUF_BCB_ASYNC_FLUSH_REQ)
242 
243 /* add flags that invalidate a victim candidate here */
244 /* 1. dirty bcb's cannot be victimized.
245  * 2. bcb's that are in the process of being flushed cannot be victimized. flush must succeed!
246  * 3. bcb's that are already assigned as victims are not valid victim candidates.
247  */
248 #define PGBUF_BCB_INVALID_VICTIM_CANDIDATE_MASK \
249  (PGBUF_BCB_DIRTY_FLAG \
250  | PGBUF_BCB_FLUSHING_TO_DISK_FLAG \
251  | PGBUF_BCB_VICTIM_DIRECT_FLAG \
252  | PGBUF_BCB_INVALIDATE_DIRECT_VICTIM_FLAG)
253 
254 /* bcb has no flag initially and is in invalid zone */
255 #define PGBUF_BCB_INIT_FLAGS PGBUF_INVALID_ZONE
256 
257 /* fix & avoid dealloc counter... we have one integer and each uses two bytes. fix counter is offset by two bytes. */
258 #define PGBUF_BCB_COUNT_FIX_SHIFT_BITS 16
259 #define PGBUF_BCB_AVOID_DEALLOC_MASK ((int) 0x0000FFFF)
260 
261 /* Activity on each LRU is probed and cumulated;
262  * to avoid long history cumulation effect, the activity indicator is limited (PGBUF_TRAN_MAX_ACTIVITY);
263  * Inactivity threshold is defined: private LRU dropping beneath this threshold are destroyed and its BCBs will be
264  * victimized.
265  */
266 #define PGBUF_TRAN_THRESHOLD_ACTIVITY (pgbuf_Pool.num_buffers / 4)
267 #define PGBUF_TRAN_MAX_ACTIVITY (10 * PGBUF_TRAN_THRESHOLD_ACTIVITY)
268 
269 #define PGBUF_AOUT_NOT_FOUND -2
270 
271 #if defined (SERVER_MODE)
272 /* vacuum workers and checkpoint thread should not contribute to promoting a bcb as active/hot */
273 #define PGBUF_THREAD_SHOULD_IGNORE_UNFIX(th) VACUUM_IS_THREAD_VACUUM_WORKER (th)
274 #else
275 #define PGBUF_THREAD_SHOULD_IGNORE_UNFIX(th) false
276 #endif
277 
278 #define HASH_SIZE_BITS 20
279 #define PGBUF_HASH_SIZE (1 << HASH_SIZE_BITS)
280 
281 #define PGBUF_HASH_VALUE(vpid) pgbuf_hash_func_mirror(vpid)
282 
283 /* Maximum overboost flush multiplier: controls the maximum factor to apply to configured flush ratio,
284  * when the miss rate (victim_request/fix_request) increases.
285  */
286 #define PGBUF_FLUSH_VICTIM_BOOST_MULT 10
287 
288 #define PGBUF_NEIGHBOR_FLUSH_NONDIRTY \
289  (prm_get_bool_value (PRM_ID_PB_NEIGHBOR_FLUSH_NONDIRTY))
290 
291 #define PGBUF_MAX_NEIGHBOR_PAGES 32
292 #define PGBUF_NEIGHBOR_PAGES \
293  (prm_get_integer_value (PRM_ID_PB_NEIGHBOR_FLUSH_PAGES))
294 
295 #define PGBUF_NEIGHBOR_POS(idx) (PGBUF_NEIGHBOR_PAGES - 1 + (idx))
296 
297 /* maximum number of simultaneous fixes a thread may have on the same page */
298 #define PGBUF_MAX_PAGE_WATCHERS 64
299 /* maximum number of simultaneous fixed pages from a single thread */
300 #define PGBUF_MAX_PAGE_FIXED_BY_TRAN 64
301 
302 /* max and min flush rate in pages/sec during checkpoint */
303 #define PGBUF_CHKPT_MAX_FLUSH_RATE 1200
304 #define PGBUF_CHKPT_MIN_FLUSH_RATE 50
305 
306 /* default pages to flush in each interval during log checkpoint */
307 #define PGBUF_CHKPT_BURST_PAGES 16
308 
309 #define INIT_HOLDER_STAT(perf_stat) \
310  do \
311  { \
312  (perf_stat)->dirty_before_hold = 0; \
313  (perf_stat)->dirtied_by_holder = 0; \
314  (perf_stat)->hold_has_write_latch = 0; \
315  (perf_stat)->hold_has_read_latch = 0; \
316  } \
317  while (0)
318 
319 /* use define PGBUF_ORDERED_DEBUG to enable extended debug for ordered fix */
320 // todo - is it better to replace with a system parameter?
321 #undef PGBUF_ORDERED_DEBUG
322 
323 #define PGBUF_LRU_ZONE_MIN_RATIO 0.05f
324 #define PGBUF_LRU_ZONE_MAX_RATIO 0.90f
325 
326 /* buffer lock return value */
327 enum
328 {
330 };
331 
332 /* constants to indicate the content state of buffers */
333 enum
334 {
335  PGBUF_CONTENT_BAD = 0, /* A bug in the system */
336  PGBUF_CONTENT_GOOD, /* Content is consistent */
337  PGBUF_CONTENT_LIKELY_BAD, /* Maybe a bug in the system */
338  PGBUF_CONTENT_ERROR /* Some kind of error */
339 };
340 
341 typedef struct pgbuf_holder PGBUF_HOLDER;
344 
345 typedef struct pgbuf_bcb PGBUF_BCB;
348 
351 
355 
358 
360 
362 
364 
365 typedef struct pgbuf_status PGBUF_STATUS;
368 
370 {
371  unsigned long long num_hit;
372  unsigned long long num_page_request;
373  unsigned long long num_pages_created;
374  unsigned long long num_pages_written;
375  unsigned long long num_pages_read;
377  unsigned int dummy;
378 };
379 
381 {
382  unsigned int free_pages;
384  unsigned int clean_pages;
385  unsigned int dirty_pages;
386  unsigned int num_index_pages;
387  unsigned int num_data_pages;
388  unsigned int num_system_pages;
389  unsigned int num_temp_pages;
390 };
391 
393 {
394  unsigned long long num_hit;
395  unsigned long long num_page_request;
396  unsigned long long num_pages_created;
397  unsigned long long num_pages_written;
398  unsigned long long num_pages_read;
400 };
401 
403 {
404  VPID vpid; /* page to which holder refers */
405  PGBUF_ORDERED_GROUP group_id; /* group (VPID of heap header ) of the page */
406  int rank; /* rank of page (PGBUF_ORDERED_RANK) */
407  int watch_count; /* number of watchers on this holder */
408  PGBUF_WATCHER *watcher[PGBUF_MAX_PAGE_WATCHERS]; /* pointers to all watchers to this holder */
409  PGBUF_LATCH_MODE latch_mode; /* aggregate latch mode of all watchers */
410  PAGE_TYPE ptype; /* page type (should be HEAP or OVERFLOW) */
411  bool prevent_dealloc; /* page is prevented from being deallocated. */
412 };
413 
415 
416 /* Holder flags used by perf module */
418 {
419  unsigned dirty_before_hold:1; /* page was dirty before holder was acquired */
420  unsigned dirtied_by_holder:1; /* page was dirtied by holder */
421  unsigned hold_has_write_latch:1; /* page has/had write latch */
422  unsigned hold_has_read_latch:1; /* page has/had read latch */
423 };
424 
426 
428 {
429  int npages;
432  PGBUF_BCB *pages_bufptr[2 * PGBUF_MAX_NEIGHBOR_PAGES - 1];
434 };
435 
436 /* BCB holder entry */
438 {
439  int fix_count; /* the count of fix by the holder */
440  PGBUF_BCB *bufptr; /* pointer to BCB */
441  PGBUF_HOLDER *thrd_link; /* the next BCB holder entry in the BCB holder list of thread */
442  PGBUF_HOLDER *next_holder; /* free BCB holder list of thread */
444 #if !defined(NDEBUG)
445  char fixed_at[64 * 1024];
447 #endif /* NDEBUG */
448 
452 };
453 
454 /* thread related BCB holder list (it is owned by each thread) */
456 {
457  int num_free_cnt; /* # of free BCB holder entries */
458  int num_hold_cnt; /* # of used BCB holder entries */
459  PGBUF_HOLDER *thrd_free_list; /* free BCB holder list */
460  PGBUF_HOLDER *thrd_hold_list; /* used(or hold) BCB holder list */
461 };
462 
463 /* the entry(array structure) of free BCB holder list shared by threads */
465 {
466  PGBUF_HOLDER element[PGBUF_NUM_ALLOC_HOLDER]; /* BCB holder array */
467  PGBUF_HOLDER_SET *next_set; /* next array */
468 };
469 
470 /* BCB structure */
471 struct pgbuf_bcb
472 {
473 #if defined(SERVER_MODE)
474  pthread_mutex_t mutex; /* BCB mutex */
475  int owner_mutex; /* mutex owner */
476 #endif /* SERVER_MODE */
477  VPID vpid; /* Volume and page identifier of resident page */
478  int fcnt; /* Fix count */
479  PGBUF_LATCH_MODE latch_mode; /* page latch mode */
480  volatile int flags;
481 #if defined(SERVER_MODE)
482  THREAD_ENTRY *next_wait_thrd; /* BCB waiting queue */
483 #endif /* SERVER_MODE */
484  PGBUF_BCB *hash_next; /* next hash chain */
485  PGBUF_BCB *prev_BCB; /* prev LRU chain */
486  PGBUF_BCB *next_BCB; /* next LRU or Invalid(Free) chain */
487  int tick_lru_list; /* age of lru list when this BCB was inserted into. used to decide when bcb has aged
488  * enough to boost to top. */
489  int tick_lru3; /* position in lru zone 3. small numbers are at the bottom. used to update LRU victim
490  * hint. */
491  volatile int count_fix_and_avoid_dealloc; /* two-purpose field:
492  * 1. count fixes up to a threshold (to detect hot pages).
493  * 2. avoid deallocation count.
494  * we don't use two separate shorts because avoid deallocation needs to
495  * be changed atomically... 2-byte sized atomic operations are not
496  * common. */
497  int hit_age; /* age of last hit (used to compute activities and quotas) */
498 
499  LOG_LSA oldest_unflush_lsa; /* The oldest LSA record of the page that has not been written to disk */
500  PGBUF_IOPAGE_BUFFER *iopage_buffer; /* pointer to iopage buffer structure */
501 };
502 
503 /* iopage buffer structure */
505 {
506  PGBUF_BCB *bcb; /* pointer to BCB structure */
507 #if (__WORDSIZE == 32)
508  int dummy; /* for 8byte align of iopage */
509 #elif !defined(LINUX) && !defined(WINDOWS) && !defined(AIX)
510 #error "you must check that iopage is aligned by 8byte !!"
511 #endif
512  FILEIO_PAGE iopage; /* The actual buffered io page */
513 };
514 
515 /* buffer lock record (or entry) structure
516  *
517  * buffer lock table is the array of buffer lock records
518  * # of buffer lock records is fixed as the total # of threads.
519  */
521 {
522  VPID vpid; /* buffer-locked page id */
523  PGBUF_BUFFER_LOCK *lock_next; /* next buffer lock record */
524 #if defined(SERVER_MODE)
525  THREAD_ENTRY *next_wait_thrd; /* buffer-lock waiting queue */
526 #endif /* SERVER_MODE */
527 };
528 
529 /* buffer hash entry structure
530  *
531  * buffer hash table is the array of buffer hash entries.
532  */
534 {
535 #if defined(SERVER_MODE)
536  pthread_mutex_t hash_mutex; /* hash mutex for the integrity of buffer hash chain and buffer lock chain. */
537 #endif /* SERVER_MODE */
538  PGBUF_BCB *hash_next; /* the anchor of buffer hash chain */
539  PGBUF_BUFFER_LOCK *lock_next; /* the anchor of buffer lock chain */
540 };
541 
542 /* buffer LRU list structure : double linked list */
544 {
545 #if defined(SERVER_MODE)
546  pthread_mutex_t mutex; /* LRU mutex for the integrity of LRU list. */
547 #endif /* SERVER_MODE */
548  PGBUF_BCB *top; /* top of the LRU list */
549  PGBUF_BCB *bottom; /* bottom of the LRU list */
550  PGBUF_BCB *bottom_1; /* the last of LRU_1_Zone. NULL if lru1 zone is empty */
551  PGBUF_BCB *bottom_2; /* the last of LRU_2_Zone. NULL if lru2 zone is empty */
552  PGBUF_BCB *volatile victim_hint; /* hint to start searching for victims in lru list. everything below the hint
553  * should be dirty, but the hint is not always the first bcb that can be
554  * victimized. */
555  /* TODO: I have noticed while investigating core files from TPCC that hint is
556  * sometimes before first bcb that can be victimized. this means there is
557  * a logic error somewhere. I don't know where, but there must be. */
558 
559  /* zone counters */
563 
564  /* victim candidate counter */
566 
567  /* zone thresholds. we only need for zones one and two */
570 
571  /* quota (private lists only) */
572  int quota;
573 
574  /* list tick. incremented when new bcb's are added to the list or when bcb's are boosted to top */
575  int tick_list; /* tick incremented whenever bcb is added or moved in list */
576  int tick_lru3; /* tick incremented whenever bcb's fall to zone three */
577 
578  volatile int flags; /* LRU list flags */
579 
580  int index; /* LRU list index */
581 };
582 
583 /* buffer invalid BCB list : single linked list */
585 {
586 #if defined(SERVER_MODE)
587  pthread_mutex_t invalid_mutex; /* invalid mutex for the integrity of invalid BCB list. */
588 #endif /* SERVER_MODE */
589  PGBUF_BCB *invalid_top; /* top of the invalid BCB list */
590  int invalid_cnt; /* # of entries in invalid BCB list */
591 };
592 
593 /* The page replacement algorithm is LRU + Aout of 2Q. This algorithm uses two linked lists as follows:
594  * - LRU list: this is a list of BCBs managed as a Least Recently Used queue
595  * - Aout list: this is a list on VPIDs managed as a FIFO queue
596  * The LRU list manages the "hot" pages, Aout list holds a short term history of pages which have been victimized.
597  */
598 /* Aout list node */
600 {
601  VPID vpid; /* page VPID */
602  int lru_idx;
603  PGBUF_AOUT_BUF *next; /* next element in list */
604  PGBUF_AOUT_BUF *prev; /* prev element in list */
605 };
606 
607 /* Aout list */
609 {
610 #if defined(SERVER_MODE)
611  pthread_mutex_t Aout_mutex; /* Aout mutex for the integrity of Aout list. */
612 #endif /* SERVER_MODE */
613  PGBUF_AOUT_BUF *Aout_top; /* top of the queue */
614  PGBUF_AOUT_BUF *Aout_bottom; /* bottom of the queue */
615 
616  PGBUF_AOUT_BUF *Aout_free; /* a free list of Aout nodes */
617 
618  PGBUF_AOUT_BUF *bufarray; /* Array holding all the nodes in the list. Since Aout has a predefined fixed size, it
619  * makes more sense to preallocate all the nodes */
620  int num_hashes; /* number of hash tables */
621  MHT_TABLE **aout_buf_ht; /* hash table for fast history lookup. */
622 
623  int max_count; /* maximum size of the Aout queue */
624 };
625 
626 /* Generic structure to manage sequential flush with flush rate control:
627  * Flush rate control is achieved by breaking each 1 second into intervals, and attempt to flush an equal number of
628  * pages in each interval.
629  * Compensation is applied across all intervals in one second to achieve overall flush rate.
630  * In each interval, the pages are flushed either in burst mode or equally time spread during the entire interval.
631  */
633 {
635  LOG_LSA flush_upto_lsa; /* newest of the oldest LSA record of the pages which will be written to disk */
636 
637  int control_intervals_cnt; /* intervals passed */
638  int control_flushed; /* number of pages flushed since the 1 second super-interval started */
639 
640  int interval_msec; /* duration of one interval */
641  int flush_max_size; /* max size of elements, set only on init */
642  int flush_cnt; /* current count of elements in flush_list */
643  int flush_idx; /* index of current element to flush */
644  int flushed_pages; /* cnt of flushed pages (return parameter) */
645  float flush_rate; /* maximum rate of flushing (negative if none should be used) */
646 
647  bool burst_mode; /* config : flush in burst or flush one page and wait */
648 };
649 
652 {
653  INT64 dirties_cnt; /* Number of dirty buffers. */
654 
655  int *lru_hits; /* Current hits in LRU1 per LRU */
656  int *lru_activity; /* Activity level per LRU */
657 
658  /* Overall counters */
659  volatile int lru_shared_pgs_cnt; /* count of BCBs in all shared LRUs */
660  int pg_unfix_cnt; /* Count of page unfixes; used for refreshing quota adjustment */
661  int lru_victim_req_cnt; /* number of victim requests from all LRUs */
662  int fix_req_cnt; /* number of fix requests */
663 
664 #if defined (SERVER_MODE)
665  PGBUF_MONITOR_BCB_MUTEX *bcb_locks; /* track bcb mutex usage. */
666 #endif /* SERVER_MODE */
667 
668  bool victim_rich; /* true if page buffer pool has many victims. pgbuf_adjust_quotas will update this
669  * value. */
670 };
671 
674 {
675  int num_private_LRU_list; /* number of private LRU lists */
676 
677  /* Real-time tunning: */
678  float *lru_victim_flush_priority_per_lru; /* priority to flush from this LRU */
679 
680  int *private_lru_session_cnt; /* Number of active session for each private LRU: Contains only private lists ! */
681  float private_pages_ratio; /* Ratio of all private BCBs among total BCBs */
682 
683  /* TODO: remove me --> */
684  unsigned int add_shared_lru_idx; /* circular index of shared LRU for relocating to shared */
685  int avoid_shared_lru_idx; /* index of shared LRU to avoid when relocating to shared;
686  * this is usually the index of shared LRU with maximum number of BCBs;
687  * transaction will avoid this list when relocating to shared LRU (like when moving from
688  * a garbage LRU); such LRU list returns to normal size through victimization */
689 
691  INT32 adjust_age;
693 };
694 
695 #if defined (SERVER_MODE)
696 /* PGBUF_DIRECT_VICTIM - system used to optimize the victim assignment without searching and burning CPU uselessly.
697  * threads are waiting to be assigned a victim directly and woken up.
698  */
699 typedef struct pgbuf_direct_victim PGBUF_DIRECT_VICTIM;
700 struct pgbuf_direct_victim
701 {
702  PGBUF_BCB **bcb_victims;
703  /* *INDENT-OFF* */
704  lockfree::circular_queue<THREAD_ENTRY *> *waiter_threads_high_priority;
705  lockfree::circular_queue<THREAD_ENTRY *> *waiter_threads_low_priority;
706  /* *INDENT-ON* */
707 };
708 #define PGBUF_FLUSHED_BCBS_BUFFER_SIZE (8 * 1024) /* 8k */
709 #endif /* SERVER_MODE */
710 
711 /* The buffer Pool */
713 {
714  /* total # of buffer frames on the buffer (fixed value: 10 * num_trans) */
716 
717  /* buffer related tables and lists (the essential structures) */
718 
719  PGBUF_BCB *BCB_table; /* BCB table */
720  PGBUF_BUFFER_HASH *buf_hash_table; /* buffer hash table */
721  PGBUF_BUFFER_LOCK *buf_lock_table; /* buffer lock table */
722  PGBUF_IOPAGE_BUFFER *iopage_table; /* IO page table */
723  int num_LRU_list; /* number of shared LRU lists */
724  float ratio_lru1; /* ratio for lru 1 zone */
725  float ratio_lru2; /* ratio for lru 2 zone */
726  PGBUF_LRU_LIST *buf_LRU_list; /* LRU lists. When Page quota is enabled, first 'num_LRU_list' store shared pages;
727  * the next 'num_garbage_LRU_list' lists store shared garbage pages;
728  * the last 'num_private_LRU_list' are private lists.
729  * When page quota is disabled only shared lists are used */
731  PGBUF_INVALID_LIST buf_invalid_list; /* buffer invalid BCB list */
732 
735 
738 
739  /*
740  * the structures for maintaining information on BCB holders.
741  * 'thrd_holder_info' has entries as many as the # of threads and
742  * each entry maintains free BCB holder list and used BCB holder list
743  * of the corresponding thread.
744  * 'thrd_reserved_holder' has memory space for all BCB holder entries.
745  */
748 
749  /*
750  * free BCB holder list shared by all the threads.
751  * When a thread needs more free BCB holder entries,
752  * the thread allocates them one by one from this list.
753  * However, the thread never return the entries into this list.
754  * The structure is a list of the arrays of BCB holder entries.
755  * 'free_holder_set' points to the first array that has free entries
756  * and 'free_index' indicates the first free entry in the array.
757  */
758 #if defined(SERVER_MODE)
759  pthread_mutex_t free_holder_set_mutex;
760 #endif /* SERVER_MODE */
763 
764  /* 'check_for_interrupt' is set true when interrupts must be checked. Log manager set and clears this value while
765  * holding TR_TABLE_CS. */
767 
768 #if defined(SERVER_MODE)
769  bool is_flushing_victims; /* flag set true when pgbuf flush thread is flushing victim candidates */
770  bool is_checkpoint; /* flag set true when checkpoint is running */
771 #endif /* SERVER_MODE */
772 
773  /* *INDENT-OFF* */
774 #if defined (SERVER_MODE)
775  PGBUF_DIRECT_VICTIM direct_victims; /* direct victim assignment */
776  lockfree::circular_queue<PGBUF_BCB *> *flushed_bcbs; /* post-flush processing */
777 #endif /* SERVER_MODE */
781  /* *INDENT-ON* */
782 
786 #if defined (SERVER_MODE)
787  pthread_mutex_t show_status_mutex;
788 #endif
789 };
790 
791 /* victim candidate list */
792 /* One daemon thread performs flush task for victim candidates.
793  * The daemon finds and saves victim candidates using following list.
794  * And then, based on the list, the daemon performs actual flush task.
795  */
797 {
798  PGBUF_BCB *bufptr; /* selected BCB as victim candidate */
799  VPID vpid; /* page id of the page managed by the BCB */
800 };
801 
802 static PGBUF_BUFFER_POOL pgbuf_Pool; /* The buffer Pool */
804 
806 
807 #if defined(CUBRID_DEBUG)
808 /* A buffer guard to detect over runs .. */
809 static char pgbuf_Guard[8] = { MEM_REGION_GUARD_MARK, MEM_REGION_GUARD_MARK, MEM_REGION_GUARD_MARK,
812  MEM_REGION_GUARD_MARK
813 };
814 #endif /* CUBRID_DEBUG */
815 
816 #define AOUT_HASH_DIVIDE_RATIO 1000
817 #define AOUT_HASH_IDX(vpid, list) ((vpid)->pageid % list->num_hashes)
818 
819 /* pgbuf_monitor_bcb_mutex - debug tool to monitor bcb mutex usage (and leaks). */
821 {
824  int line;
826 };
827 #if defined (SERVER_MODE)
828 static bool pgbuf_Monitor_locks = false;
829 #endif /* SERVER_MODE */
830 
831 #if defined (SERVER_MODE)
832 #define PGBUF_BCB_LOCK(bcb) \
833  (pgbuf_Monitor_locks ? pgbuf_bcbmon_lock (bcb, __LINE__) : (void) pthread_mutex_lock (&(bcb)->mutex))
834 #define PGBUF_BCB_TRYLOCK(bcb) \
835  (pgbuf_Monitor_locks ? pgbuf_bcbmon_trylock (bcb, __LINE__) : pthread_mutex_trylock (&(bcb)->mutex))
836 #define PGBUF_BCB_UNLOCK(bcb) \
837  (pgbuf_Monitor_locks ? pgbuf_bcbmon_unlock (bcb) : (void) pthread_mutex_unlock (&(bcb)->mutex))
838 #define PGBUF_BCB_CHECK_OWN(bcb) if (pgbuf_Monitor_locks) pgbuf_bcbmon_check_own (bcb)
839 #define PGBUF_BCB_CHECK_MUTEX_LEAKS() if (pgbuf_Monitor_locks) pgbuf_bcbmon_check_mutex_leaks ()
840 #else /* !SERVER_MODE */ /* SA_MODE */
841 /* single-threaded does not require mutexes, nor does it need to check them */
842 #define PGBUF_BCB_LOCK(bcb)
843 #define PGBUF_BCB_TRYLOCK(bcb) (0)
844 #define PGBUF_BCB_UNLOCK(bcb)
845 #define PGBUF_BCB_CHECK_OWN(bcb) (true)
846 #define PGBUF_BCB_CHECK_MUTEX_LEAKS()
847 #endif /* SA_MODE */
848 
849 /* helper to collect performance in page fix functions */
852 {
865 };
866 
867 /* in FILEIO_PAGE_RESERVED */
870 {
871  INT32 pageid; /* Page identifier */
872  INT16 volid; /* Volume identifier where the page reside */
873  unsigned char ptype; /* Page type */
874  unsigned char pflag;
875 };
876 
877 /************************************************************************/
878 /* Page buffer LRU section */
879 /************************************************************************/
880 #define PGBUF_GET_LRU_LIST(lru_idx) (&pgbuf_Pool.buf_LRU_list[lru_idx])
881 
882 #define PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE(bcb) (pgbuf_bcb_get_zone (bcb) == PGBUF_LRU_3_ZONE)
883 #define PGBUF_IS_BCB_IN_LRU(bcb) ((pgbuf_bcb_get_zone (bcb) & PGBUF_LRU_ZONE_MASK) != 0)
884 
885 /* How old is a BCB (bcb_age) related to age of list to which it belongs */
886 #define PGBUF_AGE_DIFF(bcb_age,list_age) \
887  (((list_age) >= (bcb_age)) ? ((list_age) - (bcb_age)) : (DB_INT32_MAX - ((bcb_age) - (list_age))))
888 /* is bcb old enough. we use it as indicator of the buffer lru zone. when bcb falls more than half of this buffer zone,
889  * it is considered old */
890 #define PGBUF_IS_BCB_OLD_ENOUGH(bcb, lru_list) \
891  (PGBUF_AGE_DIFF ((bcb)->tick_lru_list, (lru_list)->tick_list) >= ((lru_list)->count_lru2 / 2))
892 /* zone counts & thresholds */
893 #define PGBUF_LRU_ZONE_ONE_TWO_COUNT(list) ((list)->count_lru1 + (list)->count_lru2)
894 #define PGBUF_LRU_LIST_COUNT(list) (PGBUF_LRU_ZONE_ONE_TWO_COUNT(list) + (list)->count_lru3)
895 #define PGBUF_LRU_VICTIM_ZONE_COUNT(list) ((list)->count_lru3)
896 
897 #define PGBUF_LRU_IS_ZONE_ONE_OVER_THRESHOLD(list) ((list)->threshold_lru1 < (list)->count_lru1)
898 #define PGBUF_LRU_IS_ZONE_TWO_OVER_THRESHOLD(list) ((list)->threshold_lru2 < (list)->count_lru2)
899 #define PGBUF_LRU_ARE_ZONES_ONE_TWO_OVER_THRESHOLD(list) \
900  ((list)->threshold_lru1 + (list)->threshold_lru2 < PGBUF_LRU_ZONE_ONE_TWO_COUNT(list))
901 
902 /* macros for retrieving info on shared and private LRUs */
903 
904 /* Limits for private chains */
905 #define PGBUF_PRIVATE_LRU_MIN_COUNT 4
906 #define PGBUF_PRIVATE_LRU_MAX_HARD_QUOTA 5000
907 
908 /* Lower limit for number of pages in shared LRUs: used to compute number of private lists and number of shared lists */
909 #define PGBUF_MIN_PAGES_IN_SHARED_LIST 1000
910 #define PGBUF_MIN_SHARED_LIST_ADJUST_SIZE 50
911 
912 #define PGBUF_PAGE_QUOTA_IS_ENABLED (pgbuf_Pool.quota.num_private_LRU_list > 0)
913 
914 /* macros for retrieving id of private chains of thread (to use actual LRU index use PGBUF_LRU_INDEX_FROM_PRIVATE on
915  * this result.
916  */
917 #if defined (SERVER_MODE)
918 #define PGBUF_PRIVATE_LRU_FROM_THREAD(thread_p) \
919  ((thread_p) != NULL) ? ((thread_p)->private_lru_index) : (0)
920 static bool
922 {
923  return PGBUF_PAGE_QUOTA_IS_ENABLED && (thread_p) != NULL && (thread_p)->private_lru_index != -1;
924 }
925 #else
926 #define PGBUF_PRIVATE_LRU_FROM_THREAD(thread_p) 0
927 #define PGBUF_THREAD_HAS_PRIVATE_LRU(thread_p) false
928 #endif
929 
930 #define PGBUF_SHARED_LRU_COUNT (pgbuf_Pool.num_LRU_list)
931 #define PGBUF_PRIVATE_LRU_COUNT (pgbuf_Pool.quota.num_private_LRU_list)
932 #define PGBUF_TOTAL_LRU_COUNT (PGBUF_SHARED_LRU_COUNT + PGBUF_PRIVATE_LRU_COUNT)
933 
934 #define PGBUF_PRIVATE_LIST_FROM_LRU_INDEX(i) ((i) - PGBUF_SHARED_LRU_COUNT)
935 #define PGBUF_LRU_INDEX_FROM_PRIVATE(private_id) (PGBUF_SHARED_LRU_COUNT + (private_id))
936 
937 #define PGBUF_IS_SHARED_LRU_INDEX(lru_idx) ((lru_idx) < PGBUF_SHARED_LRU_COUNT)
938 #define PGBUF_IS_PRIVATE_LRU_INDEX(lru_idx) ((lru_idx) >= PGBUF_SHARED_LRU_COUNT)
939 
940 #define PGBUF_LRU_LIST_IS_OVER_QUOTA(list) (PGBUF_LRU_LIST_COUNT (list) > (list)->quota)
941 #define PGBUF_LRU_LIST_IS_ONE_TWO_OVER_QUOTA(list) ((PGBUF_LRU_ZONE_ONE_TWO_COUNT (list) > (list)->quota))
942 #define PGBUF_LRU_LIST_OVER_QUOTA_COUNT(list) (PGBUF_LRU_LIST_COUNT (list) - (list)->quota)
943 
944 #define PGBUF_IS_PRIVATE_LRU_OVER_QUOTA(lru_idx) \
945  (PGBUF_IS_PRIVATE_LRU_INDEX (lru_idx) && PGBUF_LRU_LIST_IS_OVER_QUOTA (PGBUF_GET_LRU_LIST (lru_idx)))
946 #define PGBUF_IS_PRIVATE_LRU_ONE_TWO_OVER_QUOTA(lru_idx) \
947  (PGBUF_IS_PRIVATE_LRU_INDEX (lru_idx) && PGBUF_LRU_LIST_IS_ONE_TWO_OVER_QUOTA (PGBUF_GET_LRU_LIST (lru_idx)))
948 
949 #define PGBUF_OVER_QUOTA_BUFFER(quota) MAX (10, (int) (quota * 0.01f))
950 #define PGBUF_LRU_LIST_IS_OVER_QUOTA_WITH_BUFFER(list) \
951  (PGBUF_LRU_LIST_COUNT (list) > (list)->quota + PGBUF_OVER_QUOTA_BUFFER ((list)->quota))
952 
953 #define PBGUF_BIG_PRIVATE_MIN_SIZE 100
954 
955 /* LRU flags */
956 #define PGBUF_LRU_VICTIM_LFCQ_FLAG ((int) 0x80000000)
957 
958 #if defined (NDEBUG)
959 /* note: release bugs can be hard to debug due to compile optimization. the crash call-stack may point to a completely
960  * different code than the one that caused the crash. my workaround is to save the line of code in this global
961  * variable pgbuf_Abort_release_line.
962  *
963  * careful about overusing this. the code may not be fully optimized when using it. */
964 static int pgbuf_Abort_release_line = 0;
965 #define PGBUF_ABORT_RELEASE() do { pgbuf_Abort_release_line = __LINE__; abort (); } while (false)
966 #else /* DEBUG */
967 #define PGBUF_ABORT_RELEASE() assert (false)
968 #endif /* DEBUG */
969 
970 static INLINE unsigned int pgbuf_hash_func_mirror (const VPID * vpid) __attribute__ ((ALWAYS_INLINE));
971 
973 static int pgbuf_initialize_bcb_table (void);
974 static int pgbuf_initialize_hash_table (void);
975 static int pgbuf_initialize_lock_table (void);
976 static int pgbuf_initialize_lru_list (void);
977 static int pgbuf_initialize_aout_list (void);
978 static int pgbuf_initialize_invalid_list (void);
980 static int pgbuf_initialize_page_quota (void);
981 static int pgbuf_initialize_page_monitor (void);
982 static int pgbuf_initialize_thrd_holder (void);
989  PGBUF_HOLDER_STAT * holder_perf_stat_p) __attribute__ ((ALWAYS_INLINE));
990 STATIC_INLINE int pgbuf_unlatch_bcb_upon_unfix (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, int holder_status)
992 static void pgbuf_unlatch_void_zone_bcb (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, int thread_private_lru_index);
994  int thread_private_lru_index) __attribute__ ((ALWAYS_INLINE));
995 static int pgbuf_block_bcb (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, PGBUF_LATCH_MODE request_mode,
996  int request_fcnt, bool as_promote);
997 STATIC_INLINE int pgbuf_latch_bcb_upon_fix (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, PGBUF_LATCH_MODE request_mode,
998  int buf_lock_acquired, PGBUF_LATCH_CONDITION condition,
999  bool * is_latch_wait) __attribute__ ((ALWAYS_INLINE));
1000 static int pgbuf_latch_idle_page (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, PGBUF_LATCH_MODE request_mode);
1001 
1003  const VPID * vpid) __attribute__ ((ALWAYS_INLINE));
1005  PGBUF_BCB * bufptr) __attribute__ ((ALWAYS_INLINE));
1008 static int pgbuf_lock_page (THREAD_ENTRY * thread_p, PGBUF_BUFFER_HASH * hash_anchor, const VPID * vpid);
1009 static int pgbuf_unlock_page (THREAD_ENTRY * thread_p, PGBUF_BUFFER_HASH * hash_anchor, const VPID * vpid,
1010  int need_hash_mutex);
1011 static PGBUF_BCB *pgbuf_allocate_bcb (THREAD_ENTRY * thread_p, const VPID * src_vpid);
1013  PGBUF_BUFFER_HASH * hash_anchor, PGBUF_FIX_PERF * perf, bool * try_again);
1014 static int pgbuf_victimize_bcb (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr);
1015 static int pgbuf_bcb_safe_flush_internal (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, bool synchronous, bool * locked);
1016 static int pgbuf_invalidate_bcb (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr);
1017 static int pgbuf_bcb_safe_flush_force_lock (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, bool synchronous);
1018 static int pgbuf_bcb_safe_flush_force_unlock (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, bool synchronous);
1020 static int pgbuf_put_bcb_into_invalid_list (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr);
1021 
1023 static int pgbuf_get_victim_candidates_from_lru (THREAD_ENTRY * thread_p, int check_count,
1024  float lru_sum_flush_priority, bool * assigned_directly);
1025 static PGBUF_BCB *pgbuf_get_victim (THREAD_ENTRY * thread_p);
1026 static PGBUF_BCB *pgbuf_get_victim_from_lru_list (THREAD_ENTRY * thread_p, const int lru_idx);
1027 #if defined (SERVER_MODE)
1028 static int pgbuf_panic_assign_direct_victims_from_lru (THREAD_ENTRY * thread_p, PGBUF_LRU_LIST * lru_list,
1029  PGBUF_BCB * bcb_start);
1030 STATIC_INLINE void pgbuf_lfcq_assign_direct_victims (THREAD_ENTRY * thread_p, int lru_idx, int *nassign_inout)
1031  __attribute__ ((ALWAYS_INLINE));
1032 #endif /* SERVER_MODE */
1033 STATIC_INLINE void pgbuf_add_vpid_to_aout_list (THREAD_ENTRY * thread_p, const VPID * vpid, const int lru_idx)
1034  __attribute__ ((ALWAYS_INLINE));
1035 static int pgbuf_remove_vpid_from_aout_list (THREAD_ENTRY * thread_p, const VPID * vpid);
1036 static int pgbuf_remove_private_from_aout_list (const int lru_idx);
1037 STATIC_INLINE void pgbuf_remove_from_lru_list (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, PGBUF_LRU_LIST * lru_list)
1038  __attribute__ ((ALWAYS_INLINE));
1039 
1040 STATIC_INLINE void pgbuf_lru_add_bcb_to_top (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, PGBUF_LRU_LIST * lru_list)
1041  __attribute__ ((ALWAYS_INLINE));
1043  __attribute__ ((ALWAYS_INLINE));
1045  __attribute__ ((ALWAYS_INLINE));
1046 STATIC_INLINE void pgbuf_lru_adjust_zone1 (THREAD_ENTRY * thread_p, PGBUF_LRU_LIST * lru_list, bool min_one)
1047  __attribute__ ((ALWAYS_INLINE));
1048 STATIC_INLINE void pgbuf_lru_adjust_zone2 (THREAD_ENTRY * thread_p, PGBUF_LRU_LIST * lru_list, bool min_one)
1049  __attribute__ ((ALWAYS_INLINE));
1050 STATIC_INLINE void pgbuf_lru_adjust_zones (THREAD_ENTRY * thread_p, PGBUF_LRU_LIST * lru_list, bool min_one)
1051  __attribute__ ((ALWAYS_INLINE));
1053  __attribute__ ((ALWAYS_INLINE));
1054 static void pgbuf_lru_boost_bcb (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb);
1055 STATIC_INLINE void pgbuf_lru_add_new_bcb_to_top (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, int lru_idx)
1056  __attribute__ ((ALWAYS_INLINE));
1057 STATIC_INLINE void pgbuf_lru_add_new_bcb_to_middle (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, int lru_idx)
1058  __attribute__ ((ALWAYS_INLINE));
1059 STATIC_INLINE void pgbuf_lru_add_new_bcb_to_bottom (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, int lru_idx)
1060  __attribute__ ((ALWAYS_INLINE));
1061 STATIC_INLINE void pgbuf_lru_remove_bcb (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1062 static void pgbuf_lru_move_from_private_to_shared (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb);
1063 static void pgbuf_move_bcb_to_bottom_lru (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb);
1064 
1065 STATIC_INLINE int pgbuf_bcb_flush_with_wal (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, bool is_page_flush_thread,
1066  bool * is_bcb_locked) __attribute__ ((ALWAYS_INLINE));
1067 static void pgbuf_wake_flush_waiters (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb);
1069 static int pgbuf_flush_all_helper (THREAD_ENTRY * thread_p, VOLID volid, bool is_only_fixed, bool is_set_lsa_as_null);
1070 
1071 #if defined(SERVER_MODE)
1072 static int pgbuf_timed_sleep_error_handling (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, THREAD_ENTRY * thrd_entry);
1073 static int pgbuf_timed_sleep (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, THREAD_ENTRY * thrd_entry);
1074 STATIC_INLINE void pgbuf_wakeup_reader_writer (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr)
1075  __attribute__ ((ALWAYS_INLINE));
1076 #endif /* SERVER_MODE */
1077 
1078 STATIC_INLINE bool pgbuf_get_check_page_validation_level (int page_validation_level) __attribute__ ((ALWAYS_INLINE));
1079 static bool pgbuf_is_valid_page_ptr (const PAGE_PTR pgptr);
1080 STATIC_INLINE void pgbuf_set_bcb_page_vpid (PGBUF_BCB * bufptr, bool force_set_vpid) __attribute__ ((ALWAYS_INLINE));
1081 STATIC_INLINE bool pgbuf_check_bcb_page_vpid (PGBUF_BCB * bufptr, bool maybe_deallocated)
1082  __attribute__ ((ALWAYS_INLINE));
1083 
1084 #if defined(CUBRID_DEBUG)
1085 static void pgbuf_scramble (FILEIO_PAGE * iopage);
1086 static void pgbuf_dump (void);
1087 static int pgbuf_is_consistent (const PGBUF_BCB * bufptr, int likely_bad_after_fixcnt);
1088 #endif /* CUBRID_DEBUG */
1089 
1090 #if !defined(NDEBUG)
1091 static void pgbuf_add_fixed_at (PGBUF_HOLDER * holder, const char *caller_file, int caller_line, bool reset);
1092 #endif
1093 
1094 #if defined(SERVER_MODE)
1095 static void pgbuf_sleep (THREAD_ENTRY * thread_p, pthread_mutex_t * mutex_p);
1096 STATIC_INLINE int pgbuf_wakeup (THREAD_ENTRY * thread_p) __attribute__ ((ALWAYS_INLINE));
1097 STATIC_INLINE int pgbuf_wakeup_uncond (THREAD_ENTRY * thread_p) __attribute__ ((ALWAYS_INLINE));
1098 #endif /* SERVER_MODE */
1100  __attribute__ ((ALWAYS_INLINE));
1101 static int pgbuf_compare_victim_list (const void *p1, const void *p2);
1102 static void pgbuf_wakeup_page_flush_daemon (THREAD_ENTRY * thread_p);
1103 STATIC_INLINE bool pgbuf_check_page_ptype_internal (PAGE_PTR pgptr, PAGE_TYPE ptype, bool no_error)
1104  __attribute__ ((ALWAYS_INLINE));
1105 #if defined (SERVER_MODE)
1106 static bool pgbuf_is_thread_high_priority (THREAD_ENTRY * thread_p);
1107 #endif /* SERVER_MODE */
1108 static int pgbuf_flush_page_and_neighbors_fb (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, int *flushed_pages);
1109 STATIC_INLINE void pgbuf_add_bufptr_to_batch (PGBUF_BCB * bufptr, int idx) __attribute__ ((ALWAYS_INLINE));
1110 STATIC_INLINE int pgbuf_flush_neighbor_safe (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, VPID * expected_vpid,
1111  bool * flushed) __attribute__ ((ALWAYS_INLINE));
1112 
1113 static int pgbuf_get_groupid_and_unfix (THREAD_ENTRY * thread_p, const VPID * req_vpid, PAGE_PTR * pgptr,
1114  VPID * groupid, bool do_unfix);
1115 #if !defined(NDEBUG)
1117  const PGBUF_LATCH_MODE latch_mode, const bool clear_unfix_flag,
1118  const char *caller_file, const int caller_line)
1119  __attribute__ ((ALWAYS_INLINE));
1120 #else
1122  const PGBUF_LATCH_MODE latch_mode, const bool clear_unfix_flag)
1123  __attribute__ ((ALWAYS_INLINE));
1124 #endif
1125 static PGBUF_HOLDER *pgbuf_get_holder (THREAD_ENTRY * thread_p, PAGE_PTR pgptr);
1126 static void pgbuf_remove_watcher (PGBUF_HOLDER * holder, PGBUF_WATCHER * watcher_object);
1127 static int pgbuf_flush_chkpt_seq_list (THREAD_ENTRY * thread_p, PGBUF_SEQ_FLUSHER * seq_flusher,
1128  const LOG_LSA * prev_chkpt_redo_lsa, LOG_LSA * chkpt_smallest_lsa);
1129 static int pgbuf_flush_seq_list (THREAD_ENTRY * thread_p, PGBUF_SEQ_FLUSHER * seq_flusher, struct timeval *limit_time,
1130  const LOG_LSA * prev_chkpt_redo_lsa, LOG_LSA * chkpt_smallest_lsa, int *time_rem);
1132  const int cnt);
1133 static const char *pgbuf_latch_mode_str (PGBUF_LATCH_MODE latch_mode);
1134 static const char *pgbuf_zone_str (PGBUF_ZONE zone);
1135 static const char *pgbuf_consistent_str (int consistent);
1136 
1137 static void pgbuf_compute_lru_vict_target (float *lru_sum_flush_priority);
1138 
1139 STATIC_INLINE bool pgbuf_is_bcb_victimizable (PGBUF_BCB * bcb, bool has_mutex_lock) __attribute__ ((ALWAYS_INLINE));
1140 STATIC_INLINE bool pgbuf_is_bcb_fixed_by_any (PGBUF_BCB * bcb, bool has_mutex_lock) __attribute__ ((ALWAYS_INLINE));
1141 
1143  __attribute__ ((ALWAYS_INLINE));
1144 #if defined (SERVER_MODE)
1145 STATIC_INLINE bool pgbuf_get_thread_waiting_for_direct_victim (REFPTR (THREAD_ENTRY, waiting_thread_out))
1146  __attribute__ ((ALWAYS_INLINE));
1147 STATIC_INLINE PGBUF_BCB *pgbuf_get_direct_victim (THREAD_ENTRY * thread_p) __attribute__ ((ALWAYS_INLINE));
1148 STATIC_INLINE bool pgbuf_is_any_thread_waiting_for_direct_victim (void) __attribute__ ((ALWAYS_INLINE));
1149 #endif /* SERVER_MODE */
1150 
1152  __attribute__ ((ALWAYS_INLINE));
1154  PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1156  PGBUF_BCB * bcb_prev_hint, PGBUF_BCB * bcb_new_hint,
1157  bool was_vict_count_updated) __attribute__ ((ALWAYS_INLINE));
1160 
1161 STATIC_INLINE void pgbuf_bcb_update_flags (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, int set_flags, int clear_flags)
1162  __attribute__ ((ALWAYS_INLINE));
1163 STATIC_INLINE void pgbuf_bcb_change_zone (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, int lru_idx, PGBUF_ZONE zone)
1164  __attribute__ ((ALWAYS_INLINE));
1165 STATIC_INLINE PGBUF_ZONE pgbuf_bcb_get_zone (const PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1166 STATIC_INLINE int pgbuf_bcb_get_lru_index (const PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1167 STATIC_INLINE int pgbuf_bcb_get_pool_index (const PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1168 STATIC_INLINE bool pgbuf_bcb_is_dirty (const PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1170  __attribute__ ((ALWAYS_INLINE));
1171 STATIC_INLINE bool pgbuf_bcb_is_flushing (const PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1172 STATIC_INLINE bool pgbuf_bcb_is_direct_victim (const PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1173 STATIC_INLINE bool pgbuf_bcb_is_invalid_direct_victim (const PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1174 STATIC_INLINE bool pgbuf_bcb_is_async_flush_request (const PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1175 STATIC_INLINE bool pgbuf_bcb_is_to_vacuum (const PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1177 STATIC_INLINE bool pgbuf_bcb_avoid_victim (const PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1178 STATIC_INLINE void pgbuf_bcb_set_dirty (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1179 STATIC_INLINE void pgbuf_bcb_clear_dirty (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1181  __attribute__ ((ALWAYS_INLINE));
1182 STATIC_INLINE void pgbuf_bcb_mark_was_not_flushed (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, bool mark_dirty)
1183  __attribute__ ((ALWAYS_INLINE));
1186 STATIC_INLINE bool pgbuf_bcb_should_avoid_deallocation (const PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1187 STATIC_INLINE void pgbuf_bcb_check_and_reset_fix_and_avoid_dealloc (PGBUF_BCB * bcb, const char *file, int line)
1188  __attribute__ ((ALWAYS_INLINE));
1189 STATIC_INLINE void pgbuf_bcb_register_fix (PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1190 STATIC_INLINE bool pgbuf_bcb_is_hot (const PGBUF_BCB * bcb) __attribute__ ((ALWAYS_INLINE));
1191 
1192 #if defined (SERVER_MODE)
1193 static void pgbuf_bcbmon_lock (PGBUF_BCB * bcb, int caller_line);
1194 static int pgbuf_bcbmon_trylock (PGBUF_BCB * bcb, int caller_line);
1195 static void pgbuf_bcbmon_unlock (PGBUF_BCB * bcb);
1196 static void pgbuf_bcbmon_check_own (PGBUF_BCB * bcb);
1197 static void pgbuf_bcbmon_check_mutex_leaks (void);
1198 #endif /* SERVER_MODE */
1199 
1201 static PGBUF_BCB *pgbuf_lfcq_get_victim_from_private_lru (THREAD_ENTRY * thread_p, bool restricted);
1202 static PGBUF_BCB *pgbuf_lfcq_get_victim_from_shared_lru (THREAD_ENTRY * thread_p, bool multi_threaded);
1203 
1205 
1206 static void pgbuf_flags_mask_sanity_check (void);
1207 static void pgbuf_lru_sanity_check (const PGBUF_LRU_LIST * lru);
1208 
1209 // TODO: find a better place for this, but not log_impl.h
1210 STATIC_INLINE int pgbuf_find_current_wait_msecs (THREAD_ENTRY * thread_p) __attribute__ ((ALWAYS_INLINE));
1211 
1212 static bool pgbuf_is_temp_lsa (const log_lsa & lsa);
1213 static void pgbuf_init_temp_page_lsa (FILEIO_PAGE * io_page, PGLENGTH page_size);
1214 
1215 static void pgbuf_scan_bcb_table (THREAD_ENTRY * thread_p);
1216 
1217 #if defined (SERVER_MODE)
1218 // *INDENT-OFF*
1219 static cubthread::daemon *pgbuf_Page_maintenance_daemon = NULL;
1220 static cubthread::daemon *pgbuf_Page_flush_daemon = NULL;
1221 static cubthread::daemon *pgbuf_Page_post_flush_daemon = NULL;
1222 static cubthread::daemon *pgbuf_Flush_control_daemon = NULL;
1223 // *INDENT-ON*
1224 #endif /* SERVER_MODE */
1225 
1227 
1228 /*
1229  * pgbuf_hash_func_mirror () - Hash VPID into hash anchor
1230  * return: hash value
1231  * key_vpid(in): VPID to hash
1232  */
1233 STATIC_INLINE unsigned int
1235 {
1236 #define VOLID_LSB_BITS 8
1237  int i;
1238  unsigned int hash_val;
1239  unsigned int volid_lsb;
1240  unsigned int reversed_volid_lsb = 0;
1241  unsigned int lsb_mask;
1242  unsigned int reverse_mask;
1243 
1244  volid_lsb = vpid->volid;
1245 
1246  lsb_mask = 1;
1247  reverse_mask = 1 << (HASH_SIZE_BITS - 1);
1248 
1249  for (i = VOLID_LSB_BITS; i > 0; i--)
1250  {
1251  if (volid_lsb & lsb_mask)
1252  {
1253  reversed_volid_lsb |= reverse_mask;
1254  }
1255  reverse_mask = reverse_mask >> 1;
1256  lsb_mask = lsb_mask << 1;
1257  }
1258 
1259  hash_val = vpid->pageid ^ reversed_volid_lsb;
1260  hash_val = hash_val & ((1 << HASH_SIZE_BITS) - 1);
1261 
1262  return hash_val;
1263 #undef VOLID_LSB_BITS
1264 }
1265 
1266 /*
1267  * pgbuf_hash_vpid () - Hash a volume_page identifier
1268  * return: hash value
1269  * key_vpid(in): VPID to hash
1270  * htsize(in): Size of hash table
1271  */
1272 unsigned int
1273 pgbuf_hash_vpid (const void *key_vpid, unsigned int htsize)
1274 {
1275  const VPID *vpid = (VPID *) key_vpid;
1276 
1277  return ((vpid->pageid | ((unsigned int) vpid->volid) << 24) % htsize);
1278 }
1279 
1280 /*
1281  * pgbuf_compare_vpid () - Compare two vpids keys for hashing
1282  * return: int (key_vpid1 == key_vpid2 ?)
1283  * key_vpid1(in): First key
1284  * key_vpid2(in): Second key
1285  */
1286 int
1287 pgbuf_compare_vpid (const void *key_vpid1, const void *key_vpid2)
1288 {
1289  const VPID *vpid1 = (VPID *) key_vpid1;
1290  const VPID *vpid2 = (VPID *) key_vpid2;
1291 
1292  if (vpid1->volid == vpid2->volid)
1293  {
1294  return vpid1->pageid - vpid2->pageid;
1295  }
1296  else
1297  {
1298  return vpid1->volid - vpid2->volid;
1299  }
1300 }
1301 
1302 /*
1303  * pgbuf_initialize () - Initialize the page buffer pool
1304  * return: NO_ERROR, or ER_code
1305  *
1306  * Note: Function invalidates any resident page, creates a hash table for easy
1307  * lookup of pages in the page buffer pool, and resets the clock tick for
1308  * the page replacement algorithm.
1309  */
1310 int
1312 {
1314 
1315  memset (&pgbuf_Pool, 0, sizeof (pgbuf_Pool));
1316 
1318  if (pgbuf_Pool.num_buffers < PGBUF_MINIMUM_BUFFERS)
1319  {
1320 #if defined(CUBRID_DEBUG)
1321  er_log_debug (ARG_FILE_LINE, "pgbuf_initialize: WARNING Num_buffers = %d is too small. %d was assumed",
1322  pgbuf_Pool.num_buffers, PGBUF_MINIMUM_BUFFERS);
1323 #endif /* CUBRID_DEBUG */
1324  pgbuf_Pool.num_buffers = PGBUF_MINIMUM_BUFFERS;
1325  }
1326 #if defined (SERVER_MODE)
1327 #if defined (NDEBUG)
1328  pgbuf_Monitor_locks = prm_get_bool_value (PRM_ID_PB_MONITOR_LOCKS);
1329 #else /* !NDEBUG */
1330  pgbuf_Monitor_locks = true;
1331 #endif /* !NDEBUG */
1332 #endif /* SERVER_MODE */
1333 
1334  /* set ratios for lru zones */
1337  pgbuf_Pool.ratio_lru1 = MAX (pgbuf_Pool.ratio_lru1, PGBUF_LRU_ZONE_MIN_RATIO);
1338  pgbuf_Pool.ratio_lru1 = MIN (pgbuf_Pool.ratio_lru1, PGBUF_LRU_ZONE_MAX_RATIO);
1339  pgbuf_Pool.ratio_lru2 = MAX (pgbuf_Pool.ratio_lru2, PGBUF_LRU_ZONE_MIN_RATIO);
1340  pgbuf_Pool.ratio_lru2 = MIN (pgbuf_Pool.ratio_lru2, 1.0f - PGBUF_LRU_ZONE_MIN_RATIO - pgbuf_Pool.ratio_lru1);
1342  assert ((pgbuf_Pool.ratio_lru1 + pgbuf_Pool.ratio_lru2) >= 0.099f
1343  && (pgbuf_Pool.ratio_lru1 + pgbuf_Pool.ratio_lru2) <= 0.951f);
1344 
1345  /* keep page quota parameter initializer first */
1347  {
1348  goto error;
1349  }
1350 
1352  {
1353  goto error;
1354  }
1355 
1357  {
1358  goto error;
1359  }
1360 
1362  {
1363  goto error;
1364  }
1365 
1367  {
1368  goto error;
1369  }
1370 
1372  {
1373  goto error;
1374  }
1375 
1377  {
1378  goto error;
1379  }
1380 
1382  {
1383  goto error;
1384  }
1385 
1386  /* keep page quota initializer first */
1388  {
1389  goto error;
1390  }
1391 
1393  {
1394  goto error;
1395  }
1396 
1397  pgbuf_Pool.check_for_interrupts = false;
1398 
1399  pgbuf_Pool.victim_cand_list =
1400  ((PGBUF_VICTIM_CANDIDATE_LIST *) malloc (pgbuf_Pool.num_buffers * sizeof (PGBUF_VICTIM_CANDIDATE_LIST)));
1401  if (pgbuf_Pool.victim_cand_list == NULL)
1402  {
1404  (pgbuf_Pool.num_buffers * sizeof (PGBUF_VICTIM_CANDIDATE_LIST)));
1405  goto error;
1406  }
1407 
1408 #if defined (SERVER_MODE)
1409  pgbuf_Pool.is_flushing_victims = false;
1410  pgbuf_Pool.is_checkpoint = false;
1411 #endif
1412 
1413  {
1414  int cnt;
1415  cnt = (int) (0.25f * pgbuf_Pool.num_buffers);
1416  cnt = MIN (cnt, 65536);
1417 
1418  if (pgbuf_initialize_seq_flusher (&(pgbuf_Pool.seq_chkpt_flusher), NULL, cnt) != NO_ERROR)
1419  {
1420  goto error;
1421  }
1422  }
1423 
1424  /* TODO[arnia] : not required, if done in monitor initialization */
1425  pgbuf_Pool.monitor.dirties_cnt = 0;
1426 
1427 #if defined (SERVER_MODE)
1428  pgbuf_Pool.direct_victims.bcb_victims = (PGBUF_BCB **) malloc (thread_num_total_threads () * sizeof (PGBUF_BCB *));
1429  if (pgbuf_Pool.direct_victims.bcb_victims == NULL)
1430  {
1432  thread_num_total_threads () * sizeof (PGBUF_BCB *));
1433  goto error;
1434  }
1435  memset (pgbuf_Pool.direct_victims.bcb_victims, 0, thread_num_total_threads () * sizeof (PGBUF_BCB *));
1436 
1437  /* *INDENT-OFF* */
1438  pgbuf_Pool.direct_victims.waiter_threads_high_priority =
1440  /* *INDENT-ON* */
1441  if (pgbuf_Pool.direct_victims.waiter_threads_high_priority == NULL)
1442  {
1443  ASSERT_ERROR ();
1444  goto error;
1445  }
1446 
1447  /* *INDENT-OFF* */
1448  pgbuf_Pool.direct_victims.waiter_threads_low_priority =
1450  /* *INDENT-ON* */
1451  if (pgbuf_Pool.direct_victims.waiter_threads_low_priority == NULL)
1452  {
1453  ASSERT_ERROR ();
1454  goto error;
1455  }
1456 
1457  /* *INDENT-OFF* */
1458  pgbuf_Pool.flushed_bcbs = new lockfree::circular_queue<PGBUF_BCB *> (PGBUF_FLUSHED_BCBS_BUFFER_SIZE);
1459  /* *INDENT-ON* */
1460  if (pgbuf_Pool.flushed_bcbs == NULL)
1461  {
1462  ASSERT_ERROR ();
1463  goto error;
1464  }
1465 #endif /* SERVER_MODE */
1466 
1468  {
1469  /* *INDENT-OFF* */
1471  /* *INDENT-ON* */
1472  if (pgbuf_Pool.private_lrus_with_victims == NULL)
1473  {
1474  ASSERT_ERROR ();
1475  goto error;
1476  }
1477 
1478  /* *INDENT-OFF* */
1480  /* *INDENT-ON* */
1481  if (pgbuf_Pool.big_private_lrus_with_victims == NULL)
1482  {
1483  ASSERT_ERROR ();
1484  goto error;
1485  }
1486  }
1487 
1488  /* *INDENT-OFF* */
1490  /* *INDENT-ON* */
1491  if (pgbuf_Pool.shared_lrus_with_victims == NULL)
1492  {
1493  ASSERT_ERROR ();
1494  goto error;
1495  }
1496 
1497  pgbuf_Pool.show_status = (PGBUF_STATUS *) malloc (sizeof (PGBUF_STATUS) * (MAX_NTRANS + 1));
1498  if (pgbuf_Pool.show_status == NULL)
1499  {
1500  ASSERT_ERROR ();
1501  goto error;
1502  }
1503 
1504  memset (pgbuf_Pool.show_status, 0, sizeof (PGBUF_STATUS) * (MAX_NTRANS + 1));
1505 
1506  pgbuf_Pool.show_status_old.print_out_time = time (NULL);
1507 
1508 #if defined(SERVER_MODE)
1509  pthread_mutex_init (&pgbuf_Pool.show_status_mutex, NULL);
1510 #endif
1511 
1512  return NO_ERROR;
1513 
1514 error:
1515  /* destroy mutexes and deallocate all the allocated memory */
1516  pgbuf_finalize ();
1517  return ER_FAILED;
1518 }
1519 
1520 /*
1521  * pgbuf_finalize () - Terminate the page buffer manager
1522  * return: void
1523  *
1524  * Note: Function invalidates any resident page, destroys the hash table used
1525  * for lookup of pages in the page buffer pool.
1526  */
1527 void
1529 {
1530  PGBUF_BCB *bufptr;
1531  PGBUF_HOLDER_SET *holder_set;
1532  int i;
1533  size_t hash_size, j;
1534 
1535 #if defined(CUBRID_DEBUG)
1536  pgbuf_dump_if_any_fixed ();
1537 #endif /* CUBRID_DEBUG */
1538 
1539  /* final task for buffer hash table */
1540  if (pgbuf_Pool.buf_hash_table != NULL)
1541  {
1542  hash_size = PGBUF_HASH_SIZE;
1543  for (j = 0; j < hash_size; j++)
1544  {
1545  pthread_mutex_destroy (&pgbuf_Pool.buf_hash_table[j].hash_mutex);
1546  }
1547  free_and_init (pgbuf_Pool.buf_hash_table);
1548  }
1549 
1550  /* final task for buffer lock table */
1551  if (pgbuf_Pool.buf_lock_table != NULL)
1552  {
1553  free_and_init (pgbuf_Pool.buf_lock_table);
1554  }
1555 
1556  /* final task for BCB table */
1557  if (pgbuf_Pool.BCB_table != NULL)
1558  {
1559  for (i = 0; i < pgbuf_Pool.num_buffers; i++)
1560  {
1561  bufptr = PGBUF_FIND_BCB_PTR (i);
1562  pthread_mutex_destroy (&bufptr->mutex);
1563  }
1564  free_and_init (pgbuf_Pool.BCB_table);
1565  pgbuf_Pool.num_buffers = 0;
1566  }
1567 
1568  if (pgbuf_Pool.iopage_table != NULL)
1569  {
1570  free_and_init (pgbuf_Pool.iopage_table);
1571  }
1572 
1573  /* final task for LRU list */
1574  if (pgbuf_Pool.buf_LRU_list != NULL)
1575  {
1576  for (i = 0; i < PGBUF_TOTAL_LRU_COUNT; i++)
1577  {
1578  pthread_mutex_destroy (&pgbuf_Pool.buf_LRU_list[i].mutex);
1579  }
1580  free_and_init (pgbuf_Pool.buf_LRU_list);
1581  }
1582 
1583  /* final task for invalid BCB list */
1584  pthread_mutex_destroy (&pgbuf_Pool.buf_invalid_list.invalid_mutex);
1585 
1586  /* final task for thrd_holder_info */
1587  if (pgbuf_Pool.thrd_holder_info != NULL)
1588  {
1589  free_and_init (pgbuf_Pool.thrd_holder_info);
1590  }
1591 
1592  if (pgbuf_Pool.thrd_reserved_holder != NULL)
1593  {
1594  free_and_init (pgbuf_Pool.thrd_reserved_holder);
1595  }
1596 
1597  /* final task for free holder set */
1598  pthread_mutex_destroy (&pgbuf_Pool.free_holder_set_mutex);
1599  while (pgbuf_Pool.free_holder_set != NULL)
1600  {
1601  holder_set = pgbuf_Pool.free_holder_set;
1602  pgbuf_Pool.free_holder_set = holder_set->next_set;
1603  free_and_init (holder_set);
1604  }
1605 
1606  if (pgbuf_Pool.victim_cand_list != NULL)
1607  {
1608  free_and_init (pgbuf_Pool.victim_cand_list);
1609  }
1610 
1611  if (pgbuf_Pool.buf_AOUT_list.bufarray != NULL)
1612  {
1613  free_and_init (pgbuf_Pool.buf_AOUT_list.bufarray);
1614  }
1615 
1616  if (pgbuf_Pool.buf_AOUT_list.aout_buf_ht != NULL)
1617  {
1618  for (i = 0; i < pgbuf_Pool.buf_AOUT_list.num_hashes; i++)
1619  {
1620  mht_destroy (pgbuf_Pool.buf_AOUT_list.aout_buf_ht[i]);
1621  }
1623 
1624  pgbuf_Pool.buf_AOUT_list.num_hashes = 0;
1625  }
1626 
1627  pthread_mutex_destroy (&pgbuf_Pool.buf_AOUT_list.Aout_mutex);
1628 
1629  pgbuf_Pool.buf_AOUT_list.aout_buf_ht = NULL;
1630  pgbuf_Pool.buf_AOUT_list.Aout_bottom = NULL;
1631  pgbuf_Pool.buf_AOUT_list.Aout_top = NULL;
1632  pgbuf_Pool.buf_AOUT_list.Aout_free = NULL;
1633  pgbuf_Pool.buf_AOUT_list.max_count = 0;
1634 
1635  if (pgbuf_Pool.seq_chkpt_flusher.flush_list != NULL)
1636  {
1638  }
1639 
1640  /* Free quota structure data */
1641  if (pgbuf_Pool.quota.lru_victim_flush_priority_per_lru != NULL)
1642  {
1644  }
1645  if (pgbuf_Pool.quota.private_lru_session_cnt != NULL)
1646  {
1648  }
1649 
1650  /* Free monitor structure data */
1651  if (pgbuf_Pool.monitor.lru_hits != NULL)
1652  {
1653  free_and_init (pgbuf_Pool.monitor.lru_hits);
1654  }
1655  if (pgbuf_Pool.monitor.lru_activity != NULL)
1656  {
1657  free_and_init (pgbuf_Pool.monitor.lru_activity);
1658  }
1659 
1660 #if defined (SERVER_MODE)
1661  if (pgbuf_Pool.monitor.bcb_locks != NULL)
1662  {
1663  free_and_init (pgbuf_Pool.monitor.bcb_locks);
1664  }
1665 
1666  if (pgbuf_Pool.direct_victims.bcb_victims != NULL)
1667  {
1668  free_and_init (pgbuf_Pool.direct_victims.bcb_victims);
1669  }
1670  if (pgbuf_Pool.direct_victims.waiter_threads_high_priority != NULL)
1671  {
1672  delete pgbuf_Pool.direct_victims.waiter_threads_high_priority;
1673  pgbuf_Pool.direct_victims.waiter_threads_high_priority = NULL;
1674  }
1675  if (pgbuf_Pool.direct_victims.waiter_threads_low_priority != NULL)
1676  {
1677  delete pgbuf_Pool.direct_victims.waiter_threads_low_priority;
1678  pgbuf_Pool.direct_victims.waiter_threads_low_priority = NULL;
1679  }
1680  if (pgbuf_Pool.flushed_bcbs != NULL)
1681  {
1682  delete pgbuf_Pool.flushed_bcbs;
1683  pgbuf_Pool.flushed_bcbs = NULL;
1684  }
1685 #endif /* SERVER_MODE */
1686 
1687  if (pgbuf_Pool.private_lrus_with_victims != NULL)
1688  {
1689  delete pgbuf_Pool.private_lrus_with_victims;
1690  pgbuf_Pool.private_lrus_with_victims = NULL;
1691  }
1692  if (pgbuf_Pool.big_private_lrus_with_victims != NULL)
1693  {
1694  delete pgbuf_Pool.big_private_lrus_with_victims;
1695  pgbuf_Pool.big_private_lrus_with_victims = NULL;
1696  }
1697  if (pgbuf_Pool.shared_lrus_with_victims != NULL)
1698  {
1699  delete pgbuf_Pool.shared_lrus_with_victims;
1700  pgbuf_Pool.shared_lrus_with_victims = NULL;
1701  }
1702 
1703  if (pgbuf_Pool.show_status != NULL)
1704  {
1705  free (pgbuf_Pool.show_status);
1706  pgbuf_Pool.show_status = NULL;
1707  }
1708 
1709 #if defined(SERVER_MODE)
1710  pthread_mutex_destroy (&pgbuf_Pool.show_status_mutex);
1711 #endif
1712 }
1713 
1714 /*
1715  * pgbuf_fix_with_retry () -
1716  * return: Pointer to the page or NULL
1717  * vpid(in): Complete Page identifier
1718  * fetch_mode(in): Page fetch mode
1719  * request_mode(in): Lock request_mode
1720  * retry(in): Retry count
1721  */
1722 PAGE_PTR
1724  PGBUF_LATCH_MODE request_mode, int retry)
1725 {
1726  PAGE_PTR pgptr;
1727  int i = 0;
1728  bool noretry = false;
1729 
1730  while ((pgptr = pgbuf_fix (thread_p, vpid, fetch_mode, request_mode, PGBUF_UNCONDITIONAL_LATCH)) == NULL)
1731  {
1732  switch (er_errid ())
1733  {
1734  case NO_ERROR: /* interrupt */
1735  case ER_INTERRUPTED:
1736  break;
1737  case ER_LK_UNILATERALLY_ABORTED: /* timeout */
1738  case ER_LK_PAGE_TIMEOUT:
1740  i++;
1741  break;
1742  default:
1743  noretry = true;
1744  break;
1745  }
1746 
1747  if (noretry || i > retry)
1748  {
1750  break;
1751  }
1752  }
1753 
1754  return pgptr;
1755 }
1756 
1757 /*
1758  * below two functions are dummies for Windows build
1759  * (which defined at cubridsa.def)
1760  */
1761 #if defined(WINDOWS)
1762 #if !defined(NDEBUG)
1763 PAGE_PTR
1764 pgbuf_fix_release (THREAD_ENTRY * thread_p, const VPID * vpid, PAGE_FETCH_MODE fetch_mode,
1765  PGBUF_LATCH_MODE request_mode, PGBUF_LATCH_CONDITION condition)
1766 {
1767  return NULL;
1768 }
1769 #else
1770 PAGE_PTR
1771 pgbuf_fix_debug (THREAD_ENTRY * thread_p, const VPID * vpid, PAGE_FETCH_MODE fetch_mode, PGBUF_LATCH_MODE request_mode,
1772  PGBUF_LATCH_CONDITION condition, const char *caller_file, int caller_line)
1773 {
1774  return NULL;
1775 }
1776 #endif
1777 #endif
1778 
1779 /*
1780  * pgbuf_fix () -
1781  * return: Pointer to the page or NULL
1782  * vpid(in): Complete Page identifier
1783  * fetch_mode(in): Page fetch mode.
1784  * request_mode(in): Page latch mode.
1785  * condition(in): Page latch condition.
1786  */
1787 #if !defined(NDEBUG)
1788 PAGE_PTR
1789 pgbuf_fix_debug (THREAD_ENTRY * thread_p, const VPID * vpid, PAGE_FETCH_MODE fetch_mode, PGBUF_LATCH_MODE request_mode,
1790  PGBUF_LATCH_CONDITION condition, const char *caller_file, int caller_line)
1791 #else /* NDEBUG */
1792 PAGE_PTR
1793 pgbuf_fix_release (THREAD_ENTRY * thread_p, const VPID * vpid, PAGE_FETCH_MODE fetch_mode,
1794  PGBUF_LATCH_MODE request_mode, PGBUF_LATCH_CONDITION condition)
1795 #endif /* NDEBUG */
1796 {
1797  PGBUF_BUFFER_HASH *hash_anchor;
1798  PGBUF_BCB *bufptr;
1799  PAGE_PTR pgptr;
1800  int wait_msecs;
1801 #if defined(ENABLE_SYSTEMTAP)
1802  bool pgbuf_hit = false;
1803 #endif /* ENABLE_SYSTEMTAP */
1804  PGBUF_HOLDER *holder;
1805  PGBUF_WATCHER *watcher;
1806  bool buf_lock_acquired = false;
1807  bool is_latch_wait = false;
1808  bool retry = false;
1809 #if !defined (NDEBUG)
1810  bool had_holder = false;
1811 #endif /* !NDEBUG */
1812  PGBUF_FIX_PERF perf;
1813  bool maybe_deallocated, force_set_vpid;
1814  int tran_index = LOG_FIND_THREAD_TRAN_INDEX (thread_p);
1815  PGBUF_STATUS *show_status = &pgbuf_Pool.show_status[tran_index];
1816 
1818 
1819  /* parameter validation */
1820  if (request_mode != PGBUF_LATCH_READ && request_mode != PGBUF_LATCH_WRITE)
1821  {
1822  assert_release (false);
1823  return NULL;
1824  }
1825  if (condition != PGBUF_UNCONDITIONAL_LATCH && condition != PGBUF_CONDITIONAL_LATCH)
1826  {
1827  assert_release (false);
1828  return NULL;
1829  }
1830 
1831  ATOMIC_INC_32 (&pgbuf_Pool.monitor.fix_req_cnt, 1);
1832 
1834  {
1835  /* Make sure that the page has been allocated (i.e., is a valid page) */
1836  /* Suppress errors if fetch mode is OLD_PAGE_IF_IN_BUFFER. */
1837  if (pgbuf_is_valid_page (thread_p, vpid, fetch_mode == OLD_PAGE_IF_IN_BUFFER, NULL, NULL) != DISK_VALID)
1838  {
1839  return NULL;
1840  }
1841  }
1842 
1843  /* Do a simple check in non debugging mode */
1844  if (vpid->pageid < 0)
1845  {
1848  return NULL;
1849  }
1850 
1851  if (condition == PGBUF_UNCONDITIONAL_LATCH)
1852  {
1853  /* Check the wait_msecs of current transaction. If the wait_msecs is zero wait that means no wait, change current
1854  * request as a conditional request. */
1855  wait_msecs = pgbuf_find_current_wait_msecs (thread_p);
1856 
1857  if (wait_msecs == LK_ZERO_WAIT || wait_msecs == LK_FORCE_ZERO_WAIT)
1858  {
1859  condition = PGBUF_CONDITIONAL_LATCH;
1860  }
1861  }
1862 
1863  perf.lock_wait_time = 0;
1865 
1866  if (perf.is_perf_tracking)
1867  {
1868  tsc_getticks (&perf.start_tick);
1869  }
1870 
1871 try_again:
1872 
1873  /* interrupt check */
1874  if (logtb_get_check_interrupt (thread_p) == true)
1875  {
1876  if (logtb_is_interrupted (thread_p, true, &pgbuf_Pool.check_for_interrupts) == true)
1877  {
1880  return NULL;
1881  }
1882  }
1883 
1884  /* Normal process */
1885  /* latch_mode = PGBUF_LATCH_READ/PGBUF_LATCH_WRITE */
1886  hash_anchor = &pgbuf_Pool.buf_hash_table[PGBUF_HASH_VALUE (vpid)];
1887 
1888  buf_lock_acquired = false;
1889  bufptr = pgbuf_search_hash_chain (thread_p, hash_anchor, vpid);
1890  if (bufptr != NULL && pgbuf_bcb_is_direct_victim (bufptr))
1891  {
1892  /* we need to notify the thread that is waiting for this bcb to victimize that it cannot use it. */
1894  }
1895  if (bufptr != NULL)
1896  {
1897 #if defined (ENABLE_SYSTEMTAP)
1898  CUBRID_PGBUF_HIT ();
1899  pgbuf_hit = true;
1900 #endif /* ENABLE_SYSTEMTAP */
1901 
1902  show_status->num_hit++;
1903 
1904  if (fetch_mode == NEW_PAGE)
1905  {
1906  /* Fix a page as NEW_PAGE, when oldest_unflush_lsa of the page is not NULL_LSA, it should be dirty. */
1907  assert (LSA_ISNULL (&bufptr->oldest_unflush_lsa) || pgbuf_bcb_is_dirty (bufptr));
1908 
1909  /* The page may be invalidated and has been remained in the buffer and it is going to be used again as a new
1910  * page. */
1911  }
1912  }
1913  else if (fetch_mode == OLD_PAGE_IF_IN_BUFFER)
1914  {
1915  /* we don't need to fix page */
1916  pthread_mutex_unlock (&hash_anchor->hash_mutex);
1917  return NULL;
1918  }
1919  else
1920  {
1921  bufptr = pgbuf_claim_bcb_for_fix (thread_p, vpid, fetch_mode, hash_anchor, &perf, &retry);
1922  if (bufptr == NULL)
1923  {
1924  if (retry)
1925  {
1926  retry = false;
1927  goto try_again;
1928  }
1929  ASSERT_ERROR ();
1930  return NULL;
1931  }
1932  buf_lock_acquired = true;
1933 
1934 #if defined(ENABLE_SYSTEMTAP)
1935  if (fetch_mode == NEW_PAGE && pgbuf_hit == false)
1936  {
1937  pgbuf_hit = true;
1938  }
1939  if (fetch_mode != NEW_PAGE)
1940  {
1941  CUBRID_PGBUF_MISS ();
1942  }
1943 #endif /* ENABLE_SYSTEMTAP */
1944  }
1945  assert (!pgbuf_bcb_is_direct_victim (bufptr));
1946 
1947  /* At this place, the caller is holding bufptr->mutex */
1948 
1949  pgbuf_bcb_register_fix (bufptr);
1950 
1951  /* Set Page identifier if needed */
1952  // Redo recovery may find an immature page which should be set.
1953  force_set_vpid = (fetch_mode == NEW_PAGE && log_is_in_crash_recovery_and_not_yet_completes_redo ());
1954  pgbuf_set_bcb_page_vpid (bufptr, force_set_vpid);
1955 
1956  maybe_deallocated = (fetch_mode == OLD_PAGE_MAYBE_DEALLOCATED);
1957  if (pgbuf_check_bcb_page_vpid (bufptr, maybe_deallocated) != true)
1958  {
1959  if (buf_lock_acquired)
1960  {
1961  /* bufptr->mutex will be released in the following function. */
1962  pgbuf_put_bcb_into_invalid_list (thread_p, bufptr);
1963 
1964  /*
1965  * Now, caller is not holding any mutex.
1966  * the last argument of pgbuf_unlock_page () is true that
1967  * means hash_mutex must be held before unlocking page.
1968  */
1969  (void) pgbuf_unlock_page (thread_p, hash_anchor, vpid, true);
1970  }
1971  else
1972  {
1973  PGBUF_BCB_UNLOCK (bufptr);
1974  }
1975 
1977  return NULL;
1978  }
1979 
1980  if (fetch_mode == OLD_PAGE_PREVENT_DEALLOC)
1981  {
1983  }
1984 
1985  /* At this place, the caller is holding bufptr->mutex */
1986  if (perf.is_perf_tracking)
1987  {
1989  }
1990 
1991  /* Latch Pass */
1992 #if !defined (NDEBUG)
1993  had_holder = pgbuf_find_thrd_holder (thread_p, bufptr) != NULL;
1994 #endif /* NDEBUG */
1995  if (pgbuf_latch_bcb_upon_fix (thread_p, bufptr, request_mode, buf_lock_acquired, condition, &is_latch_wait)
1996  != NO_ERROR)
1997  {
1998  /* bufptr->mutex has been released, error was set in the function, */
1999 
2000  if (buf_lock_acquired)
2001  {
2002  /* hold bufptr->mutex again */
2003  PGBUF_BCB_LOCK (bufptr);
2004 
2005  /* bufptr->mutex will be released in the following function. */
2006  pgbuf_put_bcb_into_invalid_list (thread_p, bufptr);
2007 
2008  /*
2009  * Now, caller is not holding any mutex.
2010  * the last argument of pgbuf_unlock_page () is true that
2011  * means hash_mutex must be held before unlocking page.
2012  */
2013  (void) pgbuf_unlock_page (thread_p, hash_anchor, vpid, true);
2014  }
2015 
2017  return NULL;
2018  }
2019 
2020 #if !defined (NDEBUG)
2021  pgbuf_add_fixed_at (pgbuf_find_thrd_holder (thread_p, bufptr), caller_file, caller_line, !had_holder);
2022 #endif /* NDEBUG */
2023 
2024  if (perf.is_perf_tracking && is_latch_wait)
2025  {
2026  tsc_getticks (&perf.end_tick);
2028  perf.holder_wait_time = perf.tv_diff.tv_sec * 1000000LL + perf.tv_diff.tv_usec;
2029  }
2030 
2031  assert (bufptr == bufptr->iopage_buffer->bcb);
2032 
2033  /* In case of NO_ERROR, bufptr->mutex has been released. */
2034 
2035  /* Dirty Pages Table Registration Pass */
2036 
2037  /* Currently, do nothing. Whenever the fixed page becomes dirty, oldest_unflush_lsa is set. */
2038 
2039  /* Hash Chain Connection Pass */
2040  if (buf_lock_acquired)
2041  {
2042  pgbuf_insert_into_hash_chain (thread_p, hash_anchor, bufptr);
2043 
2044  /*
2045  * the caller is holding hash_anchor->hash_mutex.
2046  * Therefore, the third argument of pgbuf_unlock_page () is false
2047  * that means hash mutex does not need to be held.
2048  */
2049  (void) pgbuf_unlock_page (thread_p, hash_anchor, vpid, false);
2050  }
2051 
2052  CAST_BFPTR_TO_PGPTR (pgptr, bufptr);
2053 
2054 #if !defined (NDEBUG)
2055  assert (pgptr != NULL);
2056 
2057  holder = pgbuf_get_holder (thread_p, pgptr);
2058  assert (holder != NULL);
2059 
2060  watcher = holder->last_watcher;
2061  while (watcher != NULL)
2062  {
2063  assert (watcher->magic == PGBUF_WATCHER_MAGIC_NUMBER);
2064  watcher = watcher->prev;
2065  }
2066 #endif
2067 
2068  if (fetch_mode == OLD_PAGE_PREVENT_DEALLOC)
2069  {
2070  /* latch is obtained, no need for avoidance of dealloc */
2072  }
2073 
2074 #if !defined (NDEBUG)
2075  thread_p->get_pgbuf_tracker ().increment (caller_file, caller_line, pgptr);
2076 #endif // !NDEBUG
2077 
2078  if (bufptr->iopage_buffer->iopage.prv.ptype == PAGE_UNKNOWN)
2079  {
2080  /* deallocated page */
2081  switch (fetch_mode)
2082  {
2083  case NEW_PAGE:
2084  case OLD_PAGE_DEALLOCATED:
2085  case OLD_PAGE_IF_IN_BUFFER:
2086  /* fixing deallocated page is expected. fall through to return it. */
2087  break;
2088  case OLD_PAGE:
2090  default:
2091  /* caller does not expect any deallocated pages. this is an invalid page. */
2092  assert (false);
2095  /* fall through to unfix */
2097  pgbuf_unfix (thread_p, pgptr);
2098  return NULL;
2100  /* OLD_PAGE_MAYBE_DEALLOCATED is called when deallocated page may be fixed. The caller wants the page only if
2101  * it is not deallocated. However, if it is deallocated, no error is required. */
2104  /* fall through to unfix */
2106  pgbuf_unfix (thread_p, pgptr);
2107  return NULL;
2108  }
2109 
2110  /* note: maybe we could check this in an earlier stage, but would have been a lot more complicated. the only
2111  * interesting case here is OLD_PAGE_MAYBE_DEALLOCATED. However, even this is used in cases where the vast
2112  * majority of pages will not be deallocated! So in terms of performance, the loss is insignificant.
2113  * However, it is safer and easier to treat the case here, where we have latch to prevent concurrent
2114  * deallocations. */
2115  }
2116  else
2117  {
2118  /* this cannot be a new page or a deallocated page.
2119  * note: temporary pages are not strictly handled in regard with their deallocation status. */
2120  assert (fetch_mode != NEW_PAGE || pgbuf_is_lsa_temporary (pgptr));
2121  }
2122 
2123  show_status->num_page_request++;
2124 
2125  /* Record number of fetches in statistics */
2126  if (perf.is_perf_tracking)
2127  {
2128  perf.perf_page_type = pgbuf_get_page_type_for_stat (thread_p, pgptr);
2129 
2131  if (request_mode == PGBUF_LATCH_READ)
2132  {
2134  }
2135  else
2136  {
2137  assert (request_mode == PGBUF_LATCH_WRITE);
2139  }
2140 
2141  if (condition == PGBUF_UNCONDITIONAL_LATCH)
2142  {
2143  if (is_latch_wait)
2144  {
2146  if (perf.holder_wait_time > 0)
2147  {
2148  perfmon_pbx_hold_acquire_time (thread_p, perf.perf_page_type, perf.perf_page_found,
2149  perf.perf_latch_mode, perf.holder_wait_time);
2150  }
2151  }
2152  else
2153  {
2155  }
2156  }
2157  else
2158  {
2160  }
2161 
2162  perfmon_pbx_fix (thread_p, perf.perf_page_type, perf.perf_page_found, perf.perf_latch_mode, perf.perf_cond_type);
2163  if (perf.lock_wait_time > 0)
2164  {
2165  perfmon_pbx_lock_acquire_time (thread_p, perf.perf_page_type, perf.perf_page_found, perf.perf_latch_mode,
2166  perf.perf_cond_type, perf.lock_wait_time);
2167  }
2168 
2169  tsc_getticks (&perf.end_tick);
2170  tsc_elapsed_time_usec (&perf.tv_diff, perf.end_tick, perf.start_tick);
2171  perf.fix_wait_time = perf.tv_diff.tv_sec * 1000000LL + perf.tv_diff.tv_usec;
2172 
2173  if (perf.fix_wait_time > 0)
2174  {
2175  perfmon_pbx_fix_acquire_time (thread_p, perf.perf_page_type, perf.perf_page_found, perf.perf_latch_mode,
2176  perf.perf_cond_type, perf.fix_wait_time);
2177  }
2178  }
2179 
2180  if (VACUUM_IS_THREAD_VACUUM_WORKER (thread_p))
2181  {
2182  pgbuf_bcb_update_flags (thread_p, bufptr, 0, PGBUF_BCB_TO_VACUUM_FLAG);
2183  }
2184 
2186 
2187  return pgptr;
2188 }
2189 
2190 /*
2191  * pgbuf_promote_read_latch () - Promote read latch to write latch
2192  * return: error code or NO_ERROR
2193  * pgptr(in/out): page pointer
2194  * condition(in): promotion condition (single reader holder/shared reader holder)
2195  */
2196 #if !defined (NDEBUG)
2197 int
2199  const char *caller_file, int caller_line)
2200 #else /* NDEBUG */
2201 int
2202 pgbuf_promote_read_latch_release (THREAD_ENTRY * thread_p, PAGE_PTR * pgptr_p, PGBUF_PROMOTE_CONDITION condition)
2203 #endif /* NDEBUG */
2204 {
2205  PGBUF_BCB *bufptr;
2206 #if defined(SERVER_MODE)
2207  PGBUF_HOLDER *holder;
2208  VPID vpid;
2209  TSC_TICKS start_tick, end_tick;
2210  TSCTIMEVAL tv_diff;
2211  UINT64 promote_wait_time;
2212  bool is_perf_tracking;
2213  PERF_PAGE_TYPE perf_page_type = PERF_PAGE_UNKNOWN;
2214  PERF_PROMOTE_CONDITION perf_promote_cond_type = PERF_PROMOTE_ONLY_READER;
2215  PERF_HOLDER_LATCH perf_holder_latch = PERF_HOLDER_LATCH_READ;
2216  int stat_success = 0;
2217  int rv = NO_ERROR;
2218 #endif /* SERVER_MODE */
2219 
2220 #if !defined (NDEBUG)
2221  assert (pgptr_p != NULL);
2222  assert (*pgptr_p != NULL);
2223 
2225  {
2226  if (pgbuf_is_valid_page_ptr (*pgptr_p) == false)
2227  {
2228  return ER_FAILED;
2229  }
2230  }
2231 #else /* !NDEBUG */
2232  if (*pgptr_p == NULL)
2233  {
2234  return ER_FAILED;
2235  }
2236 #endif /* !NDEBUG */
2237 
2238  /* fetch BCB from page pointer */
2239  CAST_PGPTR_TO_BFPTR (bufptr, *pgptr_p);
2240  assert (!VPID_ISNULL (&bufptr->vpid));
2241 
2242  /* check latch mode - no need for BCB mutex, page is already latched */
2243  if (bufptr->latch_mode == PGBUF_LATCH_WRITE)
2244  {
2245  /* this is a redundant call */
2246  return NO_ERROR;
2247  }
2248  else if (bufptr->latch_mode != PGBUF_LATCH_READ)
2249  {
2250  assert_release (false);
2251  return ER_FAILED;
2252  }
2253 
2254  /* check condition */
2255  if (condition != PGBUF_PROMOTE_ONLY_READER && condition != PGBUF_PROMOTE_SHARED_READER)
2256  {
2257  assert_release (false);
2258  return ER_FAILED;
2259  }
2260 
2261 #if defined(SERVER_MODE) /* SERVER_MODE */
2262  /* performance tracking - get start counter */
2263  is_perf_tracking = perfmon_is_perf_tracking ();
2264  if (is_perf_tracking)
2265  {
2266  tsc_getticks (&start_tick);
2267  }
2268 
2269  PGBUF_BCB_LOCK (bufptr);
2270 
2271  /* save info for performance tracking */
2272  vpid = bufptr->vpid;
2273  if (is_perf_tracking)
2274  {
2275  perf_page_type = pgbuf_get_page_type_for_stat (thread_p, *pgptr_p);
2276 
2277  /* promote condition */
2278  if (condition == PGBUF_PROMOTE_ONLY_READER)
2279  {
2280  perf_promote_cond_type = PERF_PROMOTE_ONLY_READER;
2281  }
2282  else
2283  {
2284  perf_promote_cond_type = PERF_PROMOTE_SHARED_READER;
2285  }
2286 
2287  /* latch mode - NOTE: MIX will be always zero */
2288  if (bufptr->latch_mode == PGBUF_LATCH_READ)
2289  {
2290  perf_holder_latch = PERF_HOLDER_LATCH_READ;
2291  }
2292  else
2293  {
2294  perf_holder_latch = PERF_HOLDER_LATCH_WRITE;
2295  }
2296  }
2297 
2298  /* check if we're the single read latch holder */
2299  holder = pgbuf_find_thrd_holder (thread_p, bufptr);
2300  assert_release (holder != NULL);
2301  if (holder->fix_count == bufptr->fcnt)
2302  {
2303  assert (bufptr->latch_mode == PGBUF_LATCH_READ);
2304 
2305  /* check for waiters for promotion */
2306  if (bufptr->next_wait_thrd != NULL && bufptr->next_wait_thrd->wait_for_latch_promote)
2307  {
2308  PGBUF_BCB_UNLOCK (bufptr);
2310 #if !defined(NDEBUG)
2312 #endif
2313  goto end;
2314  }
2315 
2316  /* we're the single holder of the read latch, do an in-place promotion */
2317  bufptr->latch_mode = PGBUF_LATCH_WRITE;
2318  holder->perf_stat.hold_has_write_latch = 1;
2319  /* NOTE: no need to set the promoted flag as long as we don't wait */
2320  PGBUF_BCB_UNLOCK (bufptr);
2321  }
2322  else
2323  {
2324  if ((condition == PGBUF_PROMOTE_ONLY_READER)
2325  || (bufptr->next_wait_thrd != NULL && bufptr->next_wait_thrd->wait_for_latch_promote))
2326  {
2327  /*
2328  * CASE #1: first waiter is from a latch promotion - we can't
2329  * guarantee both will see the same page they initially fixed so
2330  * we'll abort the current promotion
2331  * CASE #2: PGBUF_PROMOTE_ONLY_READER condition, we're only allowed
2332  * to promote if we're the only reader; this is not the case
2333  */
2334  PGBUF_BCB_UNLOCK (bufptr);
2336 #if !defined(NDEBUG)
2338 #endif
2339  goto end;
2340  }
2341  else
2342  {
2343  int fix_count = holder->fix_count;
2344  PGBUF_HOLDER_STAT perf_stat = holder->perf_stat;
2345 
2346  bufptr->fcnt -= fix_count;
2347  holder->fix_count = 0;
2348  if (pgbuf_remove_thrd_holder (thread_p, holder) != NO_ERROR)
2349  {
2350  /* We unfixed the page, but failed to remove holder entry; consider the page as unfixed */
2351  *pgptr_p = NULL;
2352 
2353  /* shouldn't happen */
2354  PGBUF_BCB_UNLOCK (bufptr);
2355  assert_release (false);
2356  return ER_FAILED;
2357  }
2358  holder = NULL;
2359  /* NOTE: at this point the page is unfixed */
2360 
2361  /* flag this thread as promoter */
2362  thread_p->wait_for_latch_promote = true;
2363 
2364  /* register as first blocker */
2365  if (pgbuf_block_bcb (thread_p, bufptr, PGBUF_LATCH_WRITE, fix_count, true) != NO_ERROR)
2366  {
2367  *pgptr_p = NULL; /* we didn't get a new latch */
2368  thread_p->wait_for_latch_promote = false;
2369  return ER_FAILED;
2370  }
2371 
2372  /* NOTE: BCB mutex is no longer held at this point */
2373 
2374  /* remove promote flag */
2375  thread_p->wait_for_latch_promote = false;
2376 
2377  /* new holder entry */
2378  assert (pgbuf_find_thrd_holder (thread_p, bufptr) == NULL);
2379  holder = pgbuf_allocate_thrd_holder_entry (thread_p);
2380  if (holder == NULL)
2381  {
2382  /* We have new latch, but can't add a holder entry; consider the page as fixed */
2383  /* This situation must not be occurred. */
2384  assert_release (false);
2385  return ER_FAILED;
2386  }
2387  holder->fix_count = fix_count;
2388  holder->bufptr = bufptr;
2389  holder->perf_stat = perf_stat;
2390  if (bufptr->latch_mode == PGBUF_LATCH_WRITE)
2391  {
2392  holder->perf_stat.hold_has_write_latch = 1;
2393  }
2394  else if (bufptr->latch_mode == PGBUF_LATCH_READ)
2395  {
2396  holder->perf_stat.hold_has_read_latch = 1;
2397  }
2398 #if !defined(NDEBUG)
2399  pgbuf_add_fixed_at (holder, caller_file, caller_line, true);
2400 #endif /* NDEBUG */
2401  }
2402  }
2403 
2404 end:
2405  assert (rv == NO_ERROR || rv == ER_PAGE_LATCH_PROMOTE_FAIL);
2406 
2407  /* performance tracking */
2408  if (is_perf_tracking)
2409  {
2410  /* compute time */
2411  tsc_getticks (&end_tick);
2412  tsc_elapsed_time_usec (&tv_diff, end_tick, start_tick);
2413  promote_wait_time = tv_diff.tv_sec * 1000000LL + tv_diff.tv_usec;
2414 
2415  /* determine success or fail */
2416  if (rv == NO_ERROR)
2417  {
2418  stat_success = 1;
2419  }
2420 
2421  /* aggregate success/fail */
2422  perfmon_pbx_promote (thread_p, perf_page_type, perf_promote_cond_type, perf_holder_latch, stat_success,
2423  promote_wait_time);
2424  }
2425 
2426  /* all successful */
2427  return rv;
2428 
2429 #else /* SERVER_MODE */
2430  bufptr->latch_mode = PGBUF_LATCH_WRITE;
2431  return NO_ERROR;
2432 #endif
2433 }
2434 
2435 /*
2436  * pgbuf_unfix () - Free the buffer where the page associated with pgptr resides
2437  * return: void
2438  * pgptr(in): Pointer to page
2439  *
2440  * Note: The page is subject to replacement, if not fixed by other thread of execution.
2441  */
2442 #if !defined (NDEBUG)
2443 void
2444 pgbuf_unfix_debug (THREAD_ENTRY * thread_p, PAGE_PTR pgptr, const char *caller_file, int caller_line)
2445 #else /* NDEBUG */
2446 void
2447 pgbuf_unfix (THREAD_ENTRY * thread_p, PAGE_PTR pgptr)
2448 #endif /* NDEBUG */
2449 {
2450  PGBUF_BCB *bufptr;
2451  int holder_status;
2452  PERF_HOLDER_LATCH perf_holder_latch;
2453  PGBUF_HOLDER *holder;
2454  PGBUF_WATCHER *watcher;
2455  PGBUF_HOLDER_STAT holder_perf_stat;
2456  PERF_PAGE_TYPE perf_page_type = PERF_PAGE_UNKNOWN;
2457  bool is_perf_tracking;
2458 
2459 #if defined(CUBRID_DEBUG)
2460  LOG_LSA restart_lsa;
2461 #endif /* CUBRID_DEBUG */
2462 
2463 #if !defined (NDEBUG)
2464  assert (pgptr != NULL);
2465 
2467  {
2468  if (pgbuf_is_valid_page_ptr (pgptr) == false)
2469  {
2470  return;
2471  }
2472  }
2473 
2474  holder = pgbuf_get_holder (thread_p, pgptr);
2475 
2476  assert (holder != NULL);
2477 
2478  watcher = holder->last_watcher;
2479  while (watcher != NULL)
2480  {
2481  assert (watcher->magic == PGBUF_WATCHER_MAGIC_NUMBER);
2482  watcher = watcher->prev;
2483  }
2484 #else /* !NDEBUG */
2485  if (pgptr == NULL)
2486  {
2487  return;
2488  }
2489 #endif /* !NDEBUG */
2490 
2491  /* Get the address of the buffer from the page and free the buffer */
2492  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
2493  assert (!VPID_ISNULL (&bufptr->vpid));
2494 
2495 #if defined(CUBRID_DEBUG)
2496  /*
2497  * If the buffer is dirty and the log sequence address of the buffer
2498  * has not changed since the database restart, a warning is given about
2499  * lack of logging
2500  */
2501  if (pgbuf_bcb_is_dirty (bufptr) && !pgbuf_is_temp_lsa (bufptr->iopage_buffer->iopage.prv.lsa)
2502  && PGBUF_IS_AUXILIARY_VOLUME (bufptr->vpid.volid) == false
2504  {
2506  "pgbuf_unfix: WARNING: No logging on dirty pageid = %d of Volume = %s.\n Recovery problems"
2507  " may happen\n", bufptr->vpid.pageid, fileio_get_volume_label (bufptr->vpid.volid, PEEK));
2508  /*
2509  * Do not give warnings on this page any longer. Set the LSA of the
2510  * buffer for this purposes
2511  */
2512  pgbuf_set_lsa (thread_p, pgptr, log_get_restart_lsa ());
2513  pgbuf_set_lsa (thread_p, pgptr, &restart_lsa);
2514  LSA_COPY (&bufptr->oldest_unflush_lsa, &bufptr->iopage_buffer->iopage.prv.lsa);
2515  }
2516 
2517  /* Check for over runs */
2518  if (memcmp (PGBUF_FIND_BUFFER_GUARD (bufptr), pgbuf_Guard, sizeof (pgbuf_Guard)) != 0)
2519  {
2520  er_log_debug (ARG_FILE_LINE, "pgbuf_unfix: SYSTEM ERROR buffer of pageid = %d|%d has been OVER RUN",
2521  bufptr->vpid.volid, bufptr->vpid.pageid);
2522  memcpy (PGBUF_FIND_BUFFER_GUARD (bufptr), pgbuf_Guard, sizeof (pgbuf_Guard));
2523  }
2524 
2525  /* Give a warning if the page is not consistent */
2526  if (bufptr->fcnt <= 0)
2527  {
2529  "pgbuf_unfix: SYSTEM ERROR Freeing too much buffer of pageid = %d of Volume = %s\n",
2530  bufptr->vpid.pageid, fileio_get_volume_label (bufptr->vpid.volid, PEEK));
2531  }
2532 #endif /* CUBRID_DEBUG */
2533 
2534  is_perf_tracking = perfmon_is_perf_tracking ();
2535  if (is_perf_tracking)
2536  {
2537  perf_page_type = pgbuf_get_page_type_for_stat (thread_p, pgptr);
2538  }
2539  INIT_HOLDER_STAT (&holder_perf_stat);
2540  holder_status = pgbuf_unlatch_thrd_holder (thread_p, bufptr, &holder_perf_stat);
2541 
2542  assert (holder_perf_stat.hold_has_write_latch == 1 || holder_perf_stat.hold_has_read_latch == 1);
2543 
2544  if (is_perf_tracking)
2545  {
2546  if (holder_perf_stat.hold_has_read_latch && holder_perf_stat.hold_has_write_latch)
2547  {
2548  perf_holder_latch = PERF_HOLDER_LATCH_MIXED;
2549  }
2550  else if (holder_perf_stat.hold_has_read_latch)
2551  {
2552  perf_holder_latch = PERF_HOLDER_LATCH_READ;
2553  }
2554  else
2555  {
2556  assert (holder_perf_stat.hold_has_write_latch);
2557  perf_holder_latch = PERF_HOLDER_LATCH_WRITE;
2558  }
2559  perfmon_pbx_unfix (thread_p, perf_page_type, holder_perf_stat.dirty_before_hold,
2560  holder_perf_stat.dirtied_by_holder, perf_holder_latch);
2561  }
2562 
2563  PGBUF_BCB_LOCK (bufptr);
2564 
2565 #if !defined (NDEBUG)
2566  thread_p->get_pgbuf_tracker ().decrement (pgptr);
2567 #endif // !NDEBUG
2568  (void) pgbuf_unlatch_bcb_upon_unfix (thread_p, bufptr, holder_status);
2569  /* bufptr->mutex has been released in above function. */
2570 
2572 
2573 #if defined(CUBRID_DEBUG)
2574  /*
2575  * CONSISTENCIES AND SCRAMBLES
2576  * You may want to tailor the following debugging block
2577  * since its operations and their implications are very expensive.
2578  * Too much I/O
2579  */
2581  {
2582  /*
2583  * Check if the content of the page is consistent and then scramble
2584  * the page to detect illegal access to the page in the future.
2585  */
2586  PGBUF_BCB_LOCK (bufptr);
2587  if (bufptr->fcnt == 0)
2588  {
2589  /* Check for consistency */
2590  if (!VPID_ISNULL (&bufptr->vpid) && pgbuf_is_consistent (bufptr, 0) == PGBUF_CONTENT_BAD)
2591  {
2592  er_log_debug (ARG_FILE_LINE, "pgbuf_unfix: WARNING Pageid = %d|%d seems inconsistent",
2593  bufptr->vpid.volid, bufptr->vpid.pageid);
2594  /* some problems in the consistency of the given buffer page */
2595  pgbuf_dump ();
2596  }
2597  else
2598  {
2599  /* the given buffer page is consistent */
2600 
2601  /* Flush the page if it is dirty */
2602  if (pgbuf_bcb_is_dirty (bufptr))
2603  {
2604  /* flush the page with PGBUF_LATCH_FLUSH mode */
2605  (void) pgbuf_bcb_safe_flush_force_unlock (thread_p, bufptr, true);
2606  /*
2607  * Since above function releases bufptr->mutex,
2608  * the caller must hold bufptr->mutex again.
2609  */
2610  PGBUF_BCB_LOCK (bufptr);
2611  }
2612 
2613  /*
2614  * If the buffer is associated with a page (i.e., if the buffer
2615  * is not used as a working area --malloc--), invalidate the
2616  * page on this buffer.
2617  * Detach the buffer area or scramble tha area.
2618  */
2619  if (!VPID_ISNULL (&bufptr->vpid))
2620  {
2621  /* invalidate the page with PGBUF_LATCH_INVALID mode */
2622  (void) pgbuf_invalidate_bcb (thread_p, bufptr);
2623  /*
2624  * Since above function releases mutex after flushing,
2625  * the caller must hold bufptr->mutex again.
2626  */
2627  PGBUF_BCB_LOCK (bufptr);
2628  }
2629 
2630  pgbuf_scramble (&bufptr->iopage_buffer->iopage);
2631 
2632  /*
2633  * Note that the buffer is not declared for immediate
2634  * replacement.
2635  * wait for a while to see if an invalid access is found.
2636  */
2637  }
2638  }
2639  PGBUF_BCB_UNLOCK (bufptr);
2640  }
2641 #endif /* CUBRID_DEBUG */
2642 }
2643 
2644 /*
2645  * pgbuf_unfix_all () - Unfixes all the buffers that have been fixed by current
2646  * thread at the time of request termination
2647  * return: void
2648  *
2649  * Note: At the time of request termination, there must
2650  * be no buffers that were fixed by the thread. In current CUBRID
2651  * system, however, above situation has occurred. In some later time,
2652  * our system must be corrected to prevent above situation from
2653  * occurring.
2654  */
2655 void
2657 {
2658  int thrd_index;
2659  PAGE_PTR pgptr;
2660  PGBUF_HOLDER_ANCHOR *thrd_holder_info;
2661  PGBUF_HOLDER *holder;
2662 #if defined(NDEBUG)
2663 #else /* NDEBUG */
2664  PGBUF_BCB *bufptr;
2665 #if defined(CUBRID_DEBUG)
2666  int consistent;
2667 #endif /* CUBRID_DEBUG */
2668  const char *latch_mode_str, *zone_str, *consistent_str;
2669 #endif /* NDEBUG */
2670 
2671  thrd_index = thread_get_entry_index (thread_p);
2672 
2673  thrd_holder_info = &(pgbuf_Pool.thrd_holder_info[thrd_index]);
2674 
2675  if (thrd_holder_info->num_hold_cnt > 0)
2676  {
2677  /* For each BCB holder entry of thread's holder list */
2678  holder = thrd_holder_info->thrd_hold_list;
2679  while (holder != NULL)
2680  {
2681  assert (false);
2682 
2683  CAST_BFPTR_TO_PGPTR (pgptr, holder->bufptr);
2684 
2685 #if defined(NDEBUG)
2686  pgbuf_unfix_and_init (thread_p, pgptr);
2687 
2688  /* Within the execution of pgbuf_unfix(), the BCB holder entry is moved from the holder list of BCB to the
2689  * free holder list of thread, and the BCB holder entry is removed from the holder list of the thread. */
2690  holder = thrd_holder_info->thrd_hold_list;
2691 #else /* NDEBUG */
2692  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
2693  assert (!VPID_ISNULL (&bufptr->vpid));
2694 
2695  latch_mode_str = pgbuf_latch_mode_str (bufptr->latch_mode);
2696  zone_str = pgbuf_zone_str (pgbuf_bcb_get_zone (bufptr));
2697 
2698  /* check if the content of current buffer page is consistent. */
2699 #if defined(CUBRID_DEBUG)
2700  consistent = pgbuf_is_consistent (bufptr, 0);
2701  consistenet_str = pgbuf_consistent_str (consistent);
2702 #else /* CUBRID_DEBUG */
2703  consistent_str = "UNKNOWN";
2704 #endif /* CUBRID_DEBUG */
2706  "pgbuf_unfix_all: WARNING %4d %5d %6d %4d %9s %1d %1d %1d %11s %6d|%4d %10s %p %p-%p\n",
2707  pgbuf_bcb_get_pool_index (bufptr), bufptr->vpid.volid, bufptr->vpid.pageid, bufptr->fcnt,
2708  latch_mode_str, (int) pgbuf_bcb_is_dirty (bufptr), (int) pgbuf_bcb_is_flushing (bufptr),
2709  (int) pgbuf_bcb_is_async_flush_request (bufptr), zone_str,
2710  LSA_AS_ARGS (&bufptr->iopage_buffer->iopage.prv.lsa), consistent_str, (void *) bufptr,
2711  (void *) (&bufptr->iopage_buffer->iopage.page[0]),
2712  (void *) (&bufptr->iopage_buffer->iopage.page[DB_PAGESIZE - 1]));
2713 
2714  holder = holder->thrd_link;
2715 #endif /* NDEBUG */
2716  }
2717  }
2718 }
2719 
2720 /*
2721  * pgbuf_invalidate () - Invalidate page in buffer
2722  * return: NO_ERROR, or ER_code
2723  * pgptr(in): Pointer to page
2724  *
2725  * Note: Invalidate the buffer corresponding to page associated with pgptr when
2726  * the page has been fixed only once, otherwise, the page is only
2727  * unfixed. If the page is invalidated, the page will not be associated
2728  * with the buffer any longer and the buffer can be used for the buffer
2729  * allocation immediately.
2730  *
2731  * The page invalidation task is executed only for performance
2732  * enhancement. This task is irrespective of correctness. That is, If
2733  * this task is not performed, there is no problem in the correctness of
2734  * the system. When page invalidation task is used, however, following
2735  * things must be kept to prevent incorrectness incurred by using page
2736  * invalidation task.
2737  *
2738  * 1. For temporary pages, page invalidation can be performed at any
2739  * time.
2740  * 2. For regular pages(used to save persistent data such as meta data
2741  * and user data), page invalidation must be performed as postpone
2742  * operation that is executed after the commit decision of transaction
2743  * has been made. The reason will be explained in the
2744  * document[TM-2001-04].
2745  */
2746 #if !defined(NDEBUG)
2747 int
2748 pgbuf_invalidate_debug (THREAD_ENTRY * thread_p, PAGE_PTR pgptr, const char *caller_file, int caller_line)
2749 #else /* NDEBUG */
2750 int
2751 pgbuf_invalidate (THREAD_ENTRY * thread_p, PAGE_PTR pgptr)
2752 #endif /* NDEBUG */
2753 {
2754  PGBUF_BCB *bufptr;
2755  VPID temp_vpid;
2756  int holder_status;
2757 
2759  {
2760  if (pgbuf_is_valid_page_ptr (pgptr) == false)
2761  {
2762  return ER_FAILED;
2763  }
2764  }
2765 
2766  /* Get the address of the buffer from the page and invalidate buffer */
2767  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
2768  assert (!VPID_ISNULL (&bufptr->vpid));
2769 
2770  PGBUF_BCB_LOCK (bufptr);
2771 
2772  /*
2773  * This function is called by the caller while it is fixing the page
2774  * with PGBUF_LATCH_WRITE mode in CUBRID environment. Therefore,
2775  * the caller must unfix the page and then invalidate the page.
2776  */
2777  if (bufptr->fcnt > 1)
2778  {
2779  holder_status = pgbuf_unlatch_thrd_holder (thread_p, bufptr, NULL);
2780 
2781 #if !defined (NDEBUG)
2782  thread_p->get_pgbuf_tracker ().decrement (pgptr);
2783 #endif // !NDEBUG
2784  /* If the page has been fixed more than one time, just unfix it. */
2785  /* todo: is this really safe? */
2786  if (pgbuf_unlatch_bcb_upon_unfix (thread_p, bufptr, holder_status) != NO_ERROR)
2787  {
2788  return ER_FAILED;
2789  }
2790 
2791  return NO_ERROR;
2792  /* bufptr->mutex hash been released in above function. */
2793  }
2794 
2795  /* bufptr->fcnt == 1 */
2796  /* Currently, bufptr->latch_mode is PGBUF_LATCH_WRITE */
2797  if (pgbuf_bcb_safe_flush_force_lock (thread_p, bufptr, true) != NO_ERROR)
2798  {
2799  ASSERT_ERROR ();
2800  return ER_FAILED;
2801  }
2802 
2803  /* save the pageid of the page temporarily. */
2804  temp_vpid = bufptr->vpid;
2805 
2806  holder_status = pgbuf_unlatch_thrd_holder (thread_p, bufptr, NULL);
2807 
2808 #if !defined (NDEBUG)
2809  thread_p->get_pgbuf_tracker ().decrement (pgptr);
2810 #endif // !NDEBUG
2811  if (pgbuf_unlatch_bcb_upon_unfix (thread_p, bufptr, holder_status) != NO_ERROR)
2812  {
2813  return ER_FAILED;
2814  }
2815  /* bufptr->mutex has been released in above function. */
2816 
2817  /* hold mutex again to invalidate the BCB */
2818  PGBUF_BCB_LOCK (bufptr);
2819 
2820  /* check if the page should be invalidated. */
2821  if (VPID_ISNULL (&bufptr->vpid) || !VPID_EQ (&temp_vpid, &bufptr->vpid) || bufptr->fcnt > 0
2822  || pgbuf_bcb_avoid_victim (bufptr))
2823  {
2824  PGBUF_BCB_UNLOCK (bufptr);
2825  return NO_ERROR;
2826  }
2827 
2828 #if defined(CUBRID_DEBUG)
2829  pgbuf_scramble (&bufptr->iopage_buffer->iopage);
2830 #endif /* CUBRID_DEBUG */
2831 
2832  /* Now, invalidation task is performed after holding a page latch with PGBUF_LATCH_INVALID mode. */
2833  if (pgbuf_invalidate_bcb (thread_p, bufptr) != NO_ERROR)
2834  {
2835  return ER_FAILED;
2836  }
2837 
2838  /* bufptr->mutex has been released in above function. */
2839  return NO_ERROR;
2840 }
2841 
2842 /*
2843  * pgbuf_invalidate_all () - Invalidate all unfixed buffers corresponding to the given volume
2844  * return: NO_ERROR, or ER_code
2845  * volid(in): Permanent Volume Identifier or NULL_VOLID
2846  *
2847  * Note: The pages in these buffers are disassociated from the buffers.
2848  * If a page was dirty, it is flushed before the buffer is invalidated.
2849  */
2850 #if !defined(NDEBUG)
2851 int
2852 pgbuf_invalidate_all_debug (THREAD_ENTRY * thread_p, VOLID volid, const char *caller_file, int caller_line)
2853 #else /* NDEBUG */
2854 int
2855 pgbuf_invalidate_all (THREAD_ENTRY * thread_p, VOLID volid)
2856 #endif /* NDEBUG */
2857 {
2858  PGBUF_BCB *bufptr;
2859  VPID temp_vpid;
2860  int bufid;
2861 
2862  /*
2863  * While searching all the buffer pages or corresponding buffer pages,
2864  * the caller flushes each buffer page if it is dirty and
2865  * invalidates the buffer page if it is not fixed on the buffer.
2866  */
2867  for (bufid = 0; bufid < pgbuf_Pool.num_buffers; bufid++)
2868  {
2869  bufptr = PGBUF_FIND_BCB_PTR (bufid);
2870  if (VPID_ISNULL (&bufptr->vpid) || (volid != NULL_VOLID && volid != bufptr->vpid.volid))
2871  {
2872  continue;
2873  }
2874 
2875  PGBUF_BCB_LOCK (bufptr);
2876  if (VPID_ISNULL (&bufptr->vpid) || (volid != NULL_VOLID && volid != bufptr->vpid.volid) || bufptr->fcnt > 0)
2877  {
2878  /* PGBUF_LATCH_READ/PGBUF_LATCH_WRITE */
2879  PGBUF_BCB_UNLOCK (bufptr);
2880  continue;
2881  }
2882 
2883  if (pgbuf_bcb_is_dirty (bufptr))
2884  {
2885  temp_vpid = bufptr->vpid;
2886  if (pgbuf_bcb_safe_flush_force_lock (thread_p, bufptr, true) != NO_ERROR)
2887  {
2888  return ER_FAILED;
2889  }
2890 
2891  /* check if page invalidation should be performed on the page */
2892  if (VPID_ISNULL (&bufptr->vpid) || !VPID_EQ (&temp_vpid, &bufptr->vpid)
2893  || (volid != NULL_VOLID && volid != bufptr->vpid.volid) || bufptr->fcnt > 0)
2894  {
2895  PGBUF_BCB_UNLOCK (bufptr);
2896  continue;
2897  }
2898  }
2899 
2900  if (pgbuf_bcb_avoid_victim (bufptr))
2901  {
2902  PGBUF_BCB_UNLOCK (bufptr);
2903  continue;
2904  }
2905 
2906 #if defined(CUBRID_DEBUG)
2907  pgbuf_scramble (&bufptr->iopage_buffer->iopage);
2908 #endif /* CUBRID_DEBUG */
2909 
2910  /* Now, page invalidation task is performed while holding a page latch with PGBUF_LATCH_INVALID mode. */
2911  (void) pgbuf_invalidate_bcb (thread_p, bufptr);
2912  /* bufptr->mutex has been released in above function. */
2913  }
2914 
2915  return NO_ERROR;
2916 }
2917 
2918 /*
2919  * pgbuf_flush () - Flush a page out to disk
2920  * return: pgptr on success, NULL on failure
2921  * pgptr(in): Page pointer
2922  * free_page(in): Free the page too ?
2923  *
2924  * Note: The page associated with pgptr is written out to disk (ONLY when the
2925  * page is dirty) and optionally is freed (See pb_free). The interface
2926  * requires the pgptr instead of vpid to avoid hashing.
2927  *
2928  * The page flush task is also executed only for performance enhancement
2929  * like page invalidation task. And, this task can be performed at any
2930  * time unlike page invalidation task.
2931  */
2932 void
2933 pgbuf_flush (THREAD_ENTRY * thread_p, PAGE_PTR pgptr, bool free_page)
2934 {
2935  /* caller flushes page but does not really care if page really makes it to disk. or doesn't know what to do in that
2936  * case... I recommend against using it. */
2937  if (pgbuf_flush_with_wal (thread_p, pgptr) == NULL)
2938  {
2939  ASSERT_ERROR ();
2940  }
2941  if (free_page == FREE)
2942  {
2943  pgbuf_unfix (thread_p, pgptr);
2944  }
2945 }
2946 
2947 /*
2948  * pgbuf_flush_with_wal () - Flush a page out to disk after following the wal rule
2949  * return: pgptr on success, NULL on failure
2950  * pgptr(in): Page pointer
2951  *
2952  * Note: The page associated with pgptr is written out to disk (ONLY when the page is dirty)
2953  * Before the page is flushed, the WAL rule of the log manager is called.
2954  */
2955 PAGE_PTR
2957 {
2958  PGBUF_BCB *bufptr;
2959 
2961  {
2962  if (pgbuf_is_valid_page_ptr (pgptr) == false)
2963  {
2964  return NULL;
2965  }
2966  }
2967 
2968  /* NOTE: the page is fixed */
2969  /* Get the address of the buffer from the page. */
2970  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
2971  assert (!VPID_ISNULL (&bufptr->vpid));
2972 
2973  /* In CUBRID, the caller is holding WRITE page latch */
2974  assert (bufptr->latch_mode >= PGBUF_LATCH_READ && pgbuf_find_thrd_holder (thread_p, bufptr) != NULL);
2975  PGBUF_BCB_LOCK (bufptr);
2976 
2977  /* Flush the page only when it is dirty */
2978  if (pgbuf_bcb_safe_flush_force_unlock (thread_p, bufptr, true) != NO_ERROR)
2979  {
2980  ASSERT_ERROR ();
2981  return NULL;
2982  }
2983 
2984  return pgptr;
2985 }
2986 
2987 /*
2988  * pgbuf_flush_if_requested () - flush page if needed. this function is used for permanently latched pages. the thread
2989  * holding should periodically check if flush is requested (usually by checkpoint thread).
2990  *
2991  * return : void
2992  * thread_p (in) : thread entry
2993  * page (in) : page
2994  */
2995 void
2997 {
2998  PGBUF_BCB *bcb;
2999 
3001  {
3002  if (pgbuf_is_valid_page_ptr (page) == false)
3003  {
3004  assert (false);
3005  return;
3006  }
3007  }
3008 
3009  /* NOTE: the page is fixed */
3010  /* Get the address of the buffer from the page. */
3011  CAST_PGPTR_TO_BFPTR (bcb, page);
3012  assert (!VPID_ISNULL (&bcb->vpid));
3013 
3014  /* caller should have write latch, otherwise there is no point in calling this function */
3015  assert (bcb->latch_mode == PGBUF_LATCH_WRITE && pgbuf_find_thrd_holder (thread_p, bcb) != NULL);
3016 
3018  {
3019  PGBUF_BCB_LOCK (bcb);
3020  if (pgbuf_bcb_safe_flush_force_unlock (thread_p, bcb, false) != NO_ERROR)
3021  {
3022  assert (false);
3023  }
3024  }
3025 
3027 }
3028 
3029 static int
3030 pgbuf_flush_all_helper (THREAD_ENTRY * thread_p, VOLID volid, bool is_unfixed_only, bool is_set_lsa_as_null)
3031 {
3032  PGBUF_BCB *bufptr;
3033  int i, ret = NO_ERROR;
3034 
3035  /* Flush all unfixed dirty buffers */
3036  for (i = 0; i < pgbuf_Pool.num_buffers; i++)
3037  {
3038  bufptr = PGBUF_FIND_BCB_PTR (i);
3039  if (!pgbuf_bcb_is_dirty (bufptr) || (volid != NULL_VOLID && volid != bufptr->vpid.volid))
3040  {
3041  continue;
3042  }
3043 
3044  PGBUF_BCB_LOCK (bufptr);
3045  /* flush condition check */
3046  if (!pgbuf_bcb_is_dirty (bufptr) || (is_unfixed_only && bufptr->fcnt > 0)
3047  || (volid != NULL_VOLID && volid != bufptr->vpid.volid))
3048  {
3049  PGBUF_BCB_UNLOCK (bufptr);
3050  continue;
3051  }
3052 
3053  if (is_set_lsa_as_null)
3054  {
3055  /* set PageLSA as NULL value */
3057  }
3058 
3059  /* flush */
3060  if (pgbuf_bcb_safe_flush_force_unlock (thread_p, bufptr, true) != NO_ERROR)
3061  {
3062  /* best efforts */
3063  assert (false);
3064  ret = ER_FAILED;
3065  }
3066  /* Above function released mutex regardless of its return value. */
3067  }
3068 
3069  return ret;
3070 }
3071 
3072 /*
3073  * pgbuf_flush_all () - Flush all dirty pages out to disk
3074  * return: NO_ERROR, or ER_code
3075  * volid(in): Permanent Volume Identifier or NULL_VOLID
3076  *
3077  * Note: Every dirty page of the specified volume is written out to disk.
3078  * If volid is equal to NULL_VOLID, all dirty pages of all volumes are
3079  * written out to disk. Its use is recommended by only the log and
3080  * recovery manager.
3081  */
3082 int
3084 {
3085  return pgbuf_flush_all_helper (thread_p, volid, false, false);
3086 }
3087 
3088 /*
3089  * pgbuf_flush_all_unfixed () - Flush all unfixed dirty pages out to disk
3090  * return: NO_ERROR, or ER_code
3091  * volid(in): Permanent Volume Identifier or NULL_VOLID
3092  *
3093  * Note: Every dirty page of the specified volume which is unfixed is written
3094  * out to disk. If volid is equal to NULL_VOLID, all dirty pages of all
3095  * volumes that are unfixed are written out to disk.
3096  * Its use is recommended by only the log and recovery manager.
3097  */
3098 int
3100 {
3101  return pgbuf_flush_all_helper (thread_p, volid, true, false);
3102 }
3103 
3104 /*
3105  * pgbuf_flush_all_unfixed_and_set_lsa_as_null () - Set lsa to null and flush all unfixed dirty pages out to disk
3106  * return: NO_ERROR, or ER_code
3107  * volid(in): Permanent Volume Identifier or NULL_VOLID
3108  *
3109  * Note: Every dirty page of the specified volume which is unfixed is written
3110  * out after its lsa is initialized to a null lsa. If volid is equal to
3111  * NULL_VOLID, all dirty pages of all volumes that are unfixed are
3112  * flushed to disk after its lsa is initialized to null.
3113  * Its use is recommended by only the log and recovery manager.
3114  */
3115 int
3117 {
3118  return pgbuf_flush_all_helper (thread_p, volid, true, true);
3119 }
3120 
3121 /*
3122  * pgbuf_compare_victim_list () - Compare the vpid of victim candidate list
3123  * return: p1 - p2
3124  * p1(in): victim candidate list 1
3125  * p2(in): victim candidate list 2
3126  */
3127 static int
3128 pgbuf_compare_victim_list (const void *p1, const void *p2)
3129 {
3130  PGBUF_VICTIM_CANDIDATE_LIST *node1, *node2;
3131  int diff;
3132 
3133  node1 = (PGBUF_VICTIM_CANDIDATE_LIST *) p1;
3134  node2 = (PGBUF_VICTIM_CANDIDATE_LIST *) p2;
3135 
3136  diff = node1->vpid.volid - node2->vpid.volid;
3137  if (diff != 0)
3138  {
3139  return diff;
3140  }
3141  else
3142  {
3143  return (node1->vpid.pageid - node2->vpid.pageid);
3144  }
3145 }
3146 
3147 /*
3148  * pgbuf_get_victim_candidates_from_lru () - get victim candidates from LRU list
3149  * return : number of victims found
3150  * thread_p (in) : thread entry
3151  * check_count (in) : number of items to verify before abandoning search
3152  * flush_ratio (in) : flush ratio
3153  * assigned_directly (out) : output true if a bcb was assigned directly.
3154  */
3155 static int
3156 pgbuf_get_victim_candidates_from_lru (THREAD_ENTRY * thread_p, int check_count, float lru_sum_flush_priority,
3157  bool * assigned_directly)
3158 {
3159  int lru_idx, victim_cand_count, i;
3160  PGBUF_BCB *bufptr;
3161  int check_count_this_lru;
3162  float victim_flush_priority_this_lru;
3163  int count_checked_lists = 0;
3164 #if defined (SERVER_MODE)
3165  /* as part of handling a rare case when there are rare direct victim waiters although there are plenty victims, flush
3166  * thread assigns one bcb per iteration directly. this will add only a little overhead in general cases. */
3167  bool try_direct_assign = true;
3168 #endif /* SERVER_MODE */
3169 
3170  /* init */
3171  victim_cand_count = 0;
3172  for (lru_idx = 0; lru_idx < PGBUF_TOTAL_LRU_COUNT; lru_idx++)
3173  {
3174  victim_flush_priority_this_lru = pgbuf_Pool.quota.lru_victim_flush_priority_per_lru[lru_idx];
3175  if (victim_flush_priority_this_lru <= 0)
3176  {
3177  /* no target for this list. */
3178  continue;
3179  }
3180  ++count_checked_lists;
3181 
3182  check_count_this_lru = (int) (victim_flush_priority_this_lru * (float) check_count / lru_sum_flush_priority);
3183  check_count_this_lru = MAX (check_count_this_lru, 1);
3184 
3185  i = check_count_this_lru;
3186 
3187  (void) pthread_mutex_lock (&pgbuf_Pool.buf_LRU_list[lru_idx].mutex);
3188 
3189  for (bufptr = pgbuf_Pool.buf_LRU_list[lru_idx].bottom;
3190  bufptr != NULL && PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (bufptr) && i > 0; bufptr = bufptr->prev_BCB, i--)
3191  {
3192  if (pgbuf_bcb_is_dirty (bufptr))
3193  {
3194  /* save victim candidate information temporarily. */
3195  pgbuf_Pool.victim_cand_list[victim_cand_count].bufptr = bufptr;
3196  pgbuf_Pool.victim_cand_list[victim_cand_count].vpid = bufptr->vpid;
3197  victim_cand_count++;
3198  }
3199 #if defined (SERVER_MODE)
3200  else if (try_direct_assign && pgbuf_is_any_thread_waiting_for_direct_victim ()
3201  && pgbuf_is_bcb_victimizable (bufptr, false) && PGBUF_BCB_TRYLOCK (bufptr) == 0)
3202  {
3203  if (pgbuf_is_bcb_victimizable (bufptr, true) && pgbuf_assign_direct_victim (thread_p, bufptr))
3204  {
3205  /* assigned directly. don't try any other. */
3206  try_direct_assign = false;
3207  *assigned_directly = true;
3209  }
3210  PGBUF_BCB_UNLOCK (bufptr);
3211  }
3212 #endif /* SERVER_MODE */
3213  }
3214  pthread_mutex_unlock (&pgbuf_Pool.buf_LRU_list[lru_idx].mutex);
3215  }
3216 
3218  {
3220  "pgbuf_flush_victim_candidates: pgbuf_get_victim_candidates_from_lru %d candidates in %d lists \n",
3221  victim_cand_count, count_checked_lists);
3222  }
3223 
3224  return victim_cand_count;
3225 }
3226 
3227 /*
3228  * pgbuf_flush_victim_candidates () - collect & flush victim candidates
3229  *
3230  * return : error code
3231  * thread_p (in) : thread entry
3232  * flush_ratio (in) : desired flush ratio
3233  * perf_tracker (in/out) : time tracker for performance statistics
3234  * stop (out) : output to stop looping
3235  */
3236 int
3237 pgbuf_flush_victim_candidates (THREAD_ENTRY * thread_p, float flush_ratio, PERF_UTIME_TRACKER * perf_tracker,
3238  bool * stop)
3239 {
3240  PGBUF_BCB *bufptr;
3241  PGBUF_VICTIM_CANDIDATE_LIST *victim_cand_list;
3242  int i, victim_count = 0;
3243  int check_count_lru;
3244  int cfg_check_cnt;
3245  int total_flushed_count;
3246  int error = NO_ERROR;
3247  float lru_miss_rate;
3248  float lru_dynamic_flush_adj = 1.0f;
3249  int lru_victim_req_cnt, fix_req_cnt;
3250  float lru_sum_flush_priority;
3251  int count_need_wal = 0;
3252  LOG_LSA lsa_need_wal = LSA_INITIALIZER;
3253 #if defined(SERVER_MODE)
3254  LOG_LSA save_lsa_need_wal = LSA_INITIALIZER;
3255  static THREAD_ENTRY *page_flush_thread = NULL;
3256  bool repeated = false;
3257 #endif /* SERVER_MODE */
3258  bool is_bcb_locked = false;
3260  bool assigned_directly = false;
3261 #if !defined (NDEBUG) && defined (SERVER_MODE)
3262  bool empty_flushed_bcb_queue = false;
3263  bool direct_victim_waiters = false;
3264 #endif /* DEBUG && SERVER_MODE */
3265 
3266  // stats
3267  UINT64 num_skipped_already_flushed = 0;
3268  UINT64 num_skipped_fixed_or_hot = 0;
3269  UINT64 num_skipped_need_wal = 0;
3270  UINT64 num_skipped_flush = 0;
3271 
3273 
3275  if (logging)
3276  {
3277  _er_log_debug (ARG_FILE_LINE, "pgbuf_flush_victim_candidates: start flush victim candidates\n");
3278  }
3279 
3280 #if !defined(NDEBUG) && defined(SERVER_MODE)
3282  {
3283  if (page_flush_thread == NULL)
3284  {
3285  page_flush_thread = thread_p;
3286  }
3287 
3288  /* This should be fixed */
3289  assert (page_flush_thread == thread_p);
3290  }
3291 #endif
3292 
3294 
3295  *stop = false;
3296 
3297  pgbuf_compute_lru_vict_target (&lru_sum_flush_priority);
3298 
3299  victim_cand_list = pgbuf_Pool.victim_cand_list;
3300 
3301  victim_count = 0;
3302  total_flushed_count = 0;
3303  check_count_lru = 0;
3304 
3305  lru_victim_req_cnt = ATOMIC_TAS_32 (&pgbuf_Pool.monitor.lru_victim_req_cnt, 0);
3306  fix_req_cnt = ATOMIC_TAS_32 (&pgbuf_Pool.monitor.fix_req_cnt, 0);
3307 
3308  if (fix_req_cnt > lru_victim_req_cnt)
3309  {
3310  lru_miss_rate = (float) lru_victim_req_cnt / (float) fix_req_cnt;
3311  }
3312  else
3313  {
3314  /* overflow of fix counter, we ignore miss rate */
3315  lru_miss_rate = 0;
3316  }
3317 
3318  cfg_check_cnt = (int) (pgbuf_Pool.num_buffers * flush_ratio);
3319 
3320  /* Victims will only be flushed, not decached. */
3321 
3322 #if defined (SERVER_MODE)
3323  /* do not apply flush boost during checkpoint; since checkpoint is already flushing pages we expect some of the victim
3324  * candidates are already flushed by checkpoint */
3325  if (pgbuf_Pool.is_checkpoint == false)
3326  {
3327  lru_dynamic_flush_adj = MAX (1.0f, 1 + (PGBUF_FLUSH_VICTIM_BOOST_MULT - 1) * lru_miss_rate);
3328  lru_dynamic_flush_adj = MIN (PGBUF_FLUSH_VICTIM_BOOST_MULT, lru_dynamic_flush_adj);
3329  }
3330  else
3331 #endif
3332  {
3333  lru_dynamic_flush_adj = 1.0f;
3334  }
3335 
3336  check_count_lru = (int) (cfg_check_cnt * lru_dynamic_flush_adj);
3337  /* limit the checked BCBs to equivalent of 200 M */
3338  check_count_lru = MIN (check_count_lru, (200 * 1024 * 1024) / db_page_size ());
3339 
3340 #if !defined (NDEBUG) && defined (SERVER_MODE)
3341  empty_flushed_bcb_queue = pgbuf_Pool.flushed_bcbs->is_empty ();
3342  direct_victim_waiters = pgbuf_is_any_thread_waiting_for_direct_victim ();
3343 #endif /* DEBUG && SERVER_MODE */
3344 
3345  if (check_count_lru > 0 && lru_sum_flush_priority > 0)
3346  {
3347  victim_count =
3348  pgbuf_get_victim_candidates_from_lru (thread_p, check_count_lru, lru_sum_flush_priority, &assigned_directly);
3349  }
3350  if (victim_count == 0)
3351  {
3352  /* We didn't find any victims */
3353  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, perf_tracker, PSTAT_PB_FLUSH_COLLECT);
3354  /* if pgbuf_get_victim_candidates_from_lru failed to provide candidates, it means we already flushed enough.
3355  * give threads looking for victims a chance to find them before looping again. output hint to stop looping. */
3356  *stop = check_count_lru > 0 && lru_sum_flush_priority > 0;
3357  goto end;
3358  }
3359 
3360 #if defined (SERVER_MODE)
3361  /* wake up log flush thread. we need log up to date to be able to flush pages */
3363  {
3365  }
3366  else
3367 #endif /* SERVER_MODE */
3368  {
3369  logpb_force_flush_pages (thread_p);
3370  }
3371 
3373  {
3374  qsort ((void *) victim_cand_list, victim_count, sizeof (PGBUF_VICTIM_CANDIDATE_LIST), pgbuf_compare_victim_list);
3375  }
3376 
3377 #if defined (SERVER_MODE)
3378  pgbuf_Pool.is_flushing_victims = true;
3379 #endif
3380 
3381  if (logging)
3382  {
3383  _er_log_debug (ARG_FILE_LINE, "pgbuf_flush_victim_candidates: start flushing collected victim candidates\n");
3384  }
3385  if (perf_tracker->is_perf_tracking)
3386  {
3387  UINT64 utime;
3388  tsc_getticks (&perf_tracker->end_tick);
3389  utime = tsc_elapsed_utime (perf_tracker->end_tick, perf_tracker->start_tick);
3390  perfmon_time_stat (thread_p, PSTAT_PB_FLUSH_COLLECT, utime);
3391  if (detailed_perf)
3392  {
3393  perfmon_time_bulk_stat (thread_p, PSTAT_PB_FLUSH_COLLECT_PER_PAGE, utime, victim_count);
3394  }
3395  perf_tracker->start_tick = perf_tracker->end_tick;
3396  }
3397 #if defined (SERVER_MODE)
3398 repeat:
3399 #endif
3400  count_need_wal = 0;
3401 
3402  /* temporary disable second iteration */
3403  /* for each victim candidate, do flush task */
3404  for (i = 0; i < victim_count; i++)
3405  {
3406  int flushed_pages = 0;
3407 
3408  bufptr = victim_cand_list[i].bufptr;
3409 
3410  PGBUF_BCB_LOCK (bufptr);
3411 
3412  /* check flush conditions */
3413 
3414  if (!VPID_EQ (&bufptr->vpid, &victim_cand_list[i].vpid) || !pgbuf_bcb_is_dirty (bufptr)
3415  || pgbuf_bcb_is_flushing (bufptr))
3416  {
3417  /* must be already flushed or currently flushing */
3418  PGBUF_BCB_UNLOCK (bufptr);
3419  ++num_skipped_already_flushed;
3420  continue;
3421  }
3422 
3423  if (!PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (bufptr) || bufptr->latch_mode != PGBUF_NO_LATCH)
3424  {
3425  /* page was fixed or became hot after selected as victim. do not flush it. */
3426  PGBUF_BCB_UNLOCK (bufptr);
3427  ++num_skipped_fixed_or_hot;
3428  continue;
3429  }
3430 
3431  if (logpb_need_wal (&bufptr->iopage_buffer->iopage.prv.lsa))
3432  {
3433  /* we cannot flush a page unless log has been flushed up until page LSA. otherwise we might have recovery
3434  * issues. */
3435  count_need_wal++;
3436  if (LSA_ISNULL (&lsa_need_wal) || LSA_LE (&lsa_need_wal, &(bufptr->iopage_buffer->iopage.prv.lsa)))
3437  {
3438  LSA_COPY (&lsa_need_wal, &(bufptr->iopage_buffer->iopage.prv.lsa));
3439  }
3440  PGBUF_BCB_UNLOCK (bufptr);
3441  ++num_skipped_need_wal;
3442 #if defined (SERVER_MODE)
3444 #endif /* SERVER_MODE */
3445  continue;
3446  }
3447 
3448  if (PGBUF_NEIGHBOR_PAGES > 1)
3449  {
3450  error = pgbuf_flush_page_and_neighbors_fb (thread_p, bufptr, &flushed_pages);
3451  /* BCB mutex already unlocked by neighbor flush function */
3452  }
3453  else
3454  {
3455  error = pgbuf_bcb_flush_with_wal (thread_p, bufptr, true, &is_bcb_locked);
3456  if (is_bcb_locked)
3457  {
3458  PGBUF_BCB_UNLOCK (bufptr);
3459  }
3460  flushed_pages = 1;
3461  }
3462  if (error != NO_ERROR)
3463  {
3464  /* if this shows up in statistics or log, consider it a red flag */
3465  if (logging)
3466  {
3467  _er_log_debug (ARG_FILE_LINE, "pgbuf_flush_victim_candidates: error during flush");
3468  }
3469  goto end;
3470  }
3471  total_flushed_count += flushed_pages;
3472  }
3473 
3474  num_skipped_flush = num_skipped_need_wal + num_skipped_fixed_or_hot + num_skipped_already_flushed;
3475  if (perf_tracker->is_perf_tracking)
3476  {
3477  perfmon_add_stat (thread_p, PSTAT_PB_NUM_SKIPPED_FLUSH, num_skipped_flush);
3478  if (detailed_perf)
3479  {
3480  perfmon_add_stat (thread_p, PSTAT_PB_NUM_SKIPPED_NEED_WAL, num_skipped_need_wal);
3481  perfmon_add_stat (thread_p, PSTAT_PB_NUM_SKIPPED_FIXED_OR_HOT, num_skipped_fixed_or_hot);
3482  perfmon_add_stat (thread_p, PSTAT_PB_NUM_SKIPPED_ALREADY_FLUSHED, num_skipped_already_flushed);
3483  }
3484 
3485  UINT64 utime;
3486  tsc_getticks (&perf_tracker->end_tick);
3487  utime = tsc_elapsed_utime (perf_tracker->end_tick, perf_tracker->start_tick);
3488  perfmon_time_stat (thread_p, PSTAT_PB_FLUSH_FLUSH, utime);
3489  if (detailed_perf)
3490  {
3491  perfmon_time_bulk_stat (thread_p, PSTAT_PB_FLUSH_FLUSH_PER_PAGE, utime, total_flushed_count);
3492  }
3493  perf_tracker->start_tick = perf_tracker->end_tick;
3494  }
3495 
3496 end:
3497 
3498 #if defined (SERVER_MODE)
3499  if (pgbuf_is_any_thread_waiting_for_direct_victim () && victim_count != 0 && count_need_wal == victim_count)
3500  {
3501  /* log flush thread did not wake up in time. we must make sure log is flushed and retry. */
3502  if (repeated)
3503  {
3504  /* already waited and failed again? all bcb's must have changed again (confirm by comparing save_lsa_need_wal
3505  * and lsa_need_wal. */
3506  assert (LSA_LT (&save_lsa_need_wal, &lsa_need_wal));
3507  }
3508  else
3509  {
3510  repeated = true;
3511  save_lsa_need_wal = lsa_need_wal;
3512  logpb_flush_log_for_wal (thread_p, &lsa_need_wal);
3513  goto repeat;
3514  }
3515  }
3516 
3517  pgbuf_Pool.is_flushing_victims = false;
3518 #endif /* SERVER_MODE */
3519 
3520  if (logging)
3521  {
3523  "pgbuf_flush_victim_candidates: flush %d pages from lru lists.\n"
3524  "\tvictim_count = %d\n"
3525  "\tcheck_count_lru = %d\n"
3526  "\tnum_skipped_need_wal = %d\n"
3527  "\tnum_skipped_fixed_or_hot = %d\n"
3528  "\tnum_skipped_already_flushed = %d\n",
3529  total_flushed_count, victim_count, check_count_lru, num_skipped_need_wal, num_skipped_fixed_or_hot,
3530  num_skipped_already_flushed);
3531  }
3533 
3534  perfmon_add_stat (thread_p, PSTAT_PB_NUM_FLUSHED, total_flushed_count);
3535 
3536  return error;
3537 }
3538 
3539 /*
3540  * pgbuf_flush_checkpoint () - Flush any unfixed dirty page whose lsa is smaller than the last checkpoint lsa
3541  * return:error code or NO_ERROR
3542  * flush_upto_lsa(in):
3543  * prev_chkpt_redo_lsa(in): Redo_LSA of previous checkpoint
3544  * smallest_lsa(out): Smallest LSA of a dirty buffer in buffer pool
3545  * flushed_page_cnt(out): The number of flushed pages
3546  *
3547  * Note: The function flushes and dirty unfixed page whose LSA is smaller that the last_chkpt_lsa,
3548  * it returns the smallest_lsa from the remaining dirty buffers which were not flushed.
3549  * This function is used by the log and recovery manager when a checkpoint is issued.
3550  */
3551 int
3552 pgbuf_flush_checkpoint (THREAD_ENTRY * thread_p, const LOG_LSA * flush_upto_lsa, const LOG_LSA * prev_chkpt_redo_lsa,
3553  LOG_LSA * smallest_lsa, int *flushed_page_cnt)
3554 {
3555 #define detailed_er_log(...) if (detailed_logging) _er_log_debug (ARG_FILE_LINE, __VA_ARGS__)
3556  PGBUF_BCB *bufptr;
3557  int bufid;
3558  int flushed_page_cnt_local = 0;
3559  PGBUF_SEQ_FLUSHER *seq_flusher;
3561  int collected_bcbs;
3562  int error = NO_ERROR;
3563  bool detailed_logging = prm_get_bool_value (PRM_ID_LOG_CHKPT_DETAILED);
3564 
3565  detailed_er_log ("pgbuf_flush_checkpoint start : flush_upto_LSA:%d, prev_chkpt_redo_LSA:%d\n",
3566  flush_upto_lsa->pageid, (prev_chkpt_redo_lsa ? prev_chkpt_redo_lsa->pageid : -1));
3567 
3568  if (flushed_page_cnt != NULL)
3569  {
3570  *flushed_page_cnt = -1;
3571  }
3572 
3573  /* Things must be truly flushed up to this lsa */
3574  logpb_flush_log_for_wal (thread_p, flush_upto_lsa);
3575  LSA_SET_NULL (smallest_lsa);
3576 
3577  seq_flusher = &(pgbuf_Pool.seq_chkpt_flusher);
3578  f_list = seq_flusher->flush_list;
3579 
3580  LSA_COPY (&seq_flusher->flush_upto_lsa, flush_upto_lsa);
3581 
3582  detailed_er_log ("pgbuf_flush_checkpoint start : start\n");
3583 
3584  collected_bcbs = 0;
3585 
3586 #if defined (SERVER_MODE)
3587  pgbuf_Pool.is_checkpoint = true;
3588 #endif
3589 
3590  for (bufid = 0; bufid < pgbuf_Pool.num_buffers; bufid++)
3591  {
3592  if (collected_bcbs >= seq_flusher->flush_max_size)
3593  {
3594  /* flush exiting list */
3595  seq_flusher->flush_cnt = collected_bcbs;
3596  seq_flusher->flush_idx = 0;
3597 
3598  qsort (f_list, seq_flusher->flush_cnt, sizeof (f_list[0]), pgbuf_compare_victim_list);
3599 
3600  error = pgbuf_flush_chkpt_seq_list (thread_p, seq_flusher, prev_chkpt_redo_lsa, smallest_lsa);
3601  if (error != NO_ERROR)
3602  {
3603 #if defined (SERVER_MODE)
3604  pgbuf_Pool.is_checkpoint = false;
3605 #endif
3606  return error;
3607  }
3608 
3609  flushed_page_cnt_local += seq_flusher->flushed_pages;
3610 
3611  collected_bcbs = 0;
3612  }
3613 
3614  bufptr = PGBUF_FIND_BCB_PTR (bufid);
3615  PGBUF_BCB_LOCK (bufptr);
3616 
3617  /* flush condition check */
3618  if (!pgbuf_bcb_is_dirty (bufptr)
3619  || (!LSA_ISNULL (&bufptr->oldest_unflush_lsa) && LSA_GT (&bufptr->oldest_unflush_lsa, flush_upto_lsa)))
3620  {
3621  PGBUF_BCB_UNLOCK (bufptr);
3622  continue;
3623  }
3624 
3625  if (!LSA_ISNULL (&bufptr->oldest_unflush_lsa) && prev_chkpt_redo_lsa != NULL && !LSA_ISNULL (prev_chkpt_redo_lsa))
3626  {
3627  if (LSA_LT (&bufptr->oldest_unflush_lsa, prev_chkpt_redo_lsa))
3628  {
3629  er_stack_push ();
3632  bufptr->oldest_unflush_lsa.offset, prev_chkpt_redo_lsa->pageid, prev_chkpt_redo_lsa->offset);
3633  er_stack_pop ();
3634 
3635  assert (false);
3636  }
3637  }
3638 
3639  /* add to flush list */
3640  f_list[collected_bcbs].bufptr = bufptr;
3641  VPID_COPY (&f_list[collected_bcbs].vpid, &bufptr->vpid);
3642  PGBUF_BCB_UNLOCK (bufptr);
3643 
3644  collected_bcbs++;
3645 
3646 #if defined(SERVER_MODE)
3647  if (thread_p != NULL && thread_p->shutdown == true)
3648  {
3649  pgbuf_Pool.is_checkpoint = false;
3650  return ER_FAILED;
3651  }
3652 #endif
3653  }
3654 
3655  if (collected_bcbs > 0)
3656  {
3657  /* flush exiting list */
3658  seq_flusher->flush_cnt = collected_bcbs;
3659  seq_flusher->flush_idx = 0;
3660 
3661  qsort (f_list, seq_flusher->flush_cnt, sizeof (f_list[0]), pgbuf_compare_victim_list);
3662 
3663  error = pgbuf_flush_chkpt_seq_list (thread_p, seq_flusher, prev_chkpt_redo_lsa, smallest_lsa);
3664  flushed_page_cnt_local += seq_flusher->flushed_pages;
3665  }
3666 
3667 #if defined (SERVER_MODE)
3668  pgbuf_Pool.is_checkpoint = false;
3669 #endif
3670 
3671  detailed_er_log ("pgbuf_flush_checkpoint END flushed:%d\n", flushed_page_cnt_local);
3672 
3673  if (flushed_page_cnt != NULL)
3674  {
3675  *flushed_page_cnt = flushed_page_cnt_local;
3676  }
3677 
3678  return error;
3679 
3680 #undef detailed_er_log
3681 }
3682 
3683 /*
3684  * pgbuf_flush_chkpt_seq_list () - flush a sequence of pages during checkpoint
3685  * return:error code or NO_ERROR
3686  * thread_p(in):
3687  * seq_flusher(in): container for list of pages
3688  * prev_chkpt_redo_lsa(in): LSA of previous checkpoint
3689  * chkpt_smallest_lsa(out): smallest LSA found in a page
3690  *
3691  */
3692 static int
3694  const LOG_LSA * prev_chkpt_redo_lsa, LOG_LSA * chkpt_smallest_lsa)
3695 {
3696 #define WAIT_FLUSH_VICTIMS_MAX_MSEC 1500.0f
3697  int error = NO_ERROR;
3698  struct timeval *p_limit_time;
3699  int total_flushed;
3700  int time_rem;
3701 #if defined (SERVER_MODE)
3702  int flush_interval, sleep_msecs;
3703  float wait_victims;
3704  float chkpt_flush_rate;
3705  struct timeval limit_time = { 0, 0 };
3706  struct timeval cur_time = { 0, 0 };
3707 #endif
3708 
3709 #if defined (SERVER_MODE)
3711  if (sleep_msecs > 0)
3712  {
3713  chkpt_flush_rate = 1000.0f / (float) sleep_msecs;
3714  }
3715  else
3716  {
3717  chkpt_flush_rate = 1000.0f;
3718  }
3719 
3720  flush_interval = (int) (1000.0f * PGBUF_CHKPT_BURST_PAGES / chkpt_flush_rate);
3721  seq_flusher->interval_msec = flush_interval;
3722 #endif
3723 
3724  total_flushed = 0;
3725  seq_flusher->control_flushed = 0;
3726  seq_flusher->control_intervals_cnt = 0;
3727  while (seq_flusher->flush_idx < seq_flusher->flush_cnt)
3728  {
3729 #if defined (SERVER_MODE)
3730  if (thread_p != NULL && thread_p->shutdown)
3731  {
3732  // stop
3733  return ER_FAILED;
3734  }
3735 
3736  gettimeofday (&cur_time, NULL);
3737 
3738  /* compute time limit for allowed flush interval */
3739  timeval_add_msec (&limit_time, &cur_time, flush_interval);
3740 
3741  seq_flusher->flush_rate = chkpt_flush_rate;
3742  p_limit_time = &limit_time;
3743 #else
3744  p_limit_time = NULL;
3745 #endif
3746 
3747 #if defined (SERVER_MODE)
3748  wait_victims = 0;
3749  while (pgbuf_Pool.is_flushing_victims == true && wait_victims < WAIT_FLUSH_VICTIMS_MAX_MSEC)
3750  {
3751  /* wait 100 micro-seconds */
3752  thread_sleep (0.1f);
3753  wait_victims += 0.1f;
3754  }
3755 #endif
3756 
3757  error = pgbuf_flush_seq_list (thread_p, seq_flusher, p_limit_time, prev_chkpt_redo_lsa, chkpt_smallest_lsa,
3758  &time_rem);
3759  total_flushed += seq_flusher->flushed_pages;
3760 
3761  if (error != NO_ERROR)
3762  {
3763  seq_flusher->flushed_pages = total_flushed;
3764  return error;
3765  }
3766 
3767 #if defined (SERVER_MODE)
3768  if (time_rem > 0)
3769  {
3770  thread_sleep (time_rem);
3771  }
3772 #endif
3773  }
3774 
3775  seq_flusher->flushed_pages = total_flushed;
3776 
3777  return error;
3778 #undef WAIT_FLUSH_VICTIMS_MAX_MSEC
3779 }
3780 
3781 /*
3782  * pgbuf_flush_seq_list () - flushes a sequence of pages
3783  * return:error code or NO_ERROR
3784  * thread_p(in):
3785  * seq_flusher(in): container for list of pages
3786  * limit_time(in): absolute time limit allowed for this call
3787  * prev_chkpt_redo_lsa(in): LSA of previous checkpoint
3788  * chkpt_smallest_lsa(out): smallest LSA found in a page
3789  * time_rem(in): time remaining until limit time expires
3790  *
3791  * Note : burst_mode from seq_flusher container controls how the flush is performed:
3792  * - if enabled, an amount of pages is flushed as soon as possible,
3793  * according to desired flush rate and time limit
3794  * - if disabled, the same amount of pages is flushed, but with a
3795  * pause between each flushed page.
3796  * Since data flush is concurrent with other IO, burst mode increases
3797  * the chance that data and other IO sequences do not mix at IO
3798  * scheduler level and break each-other's sequentiality.
3799  */
3800 static int
3801 pgbuf_flush_seq_list (THREAD_ENTRY * thread_p, PGBUF_SEQ_FLUSHER * seq_flusher, struct timeval *limit_time,
3802  const LOG_LSA * prev_chkpt_redo_lsa, LOG_LSA * chkpt_smallest_lsa, int *time_rem)
3803 {
3804 #define detailed_er_log(...) if (detailed_logging) _er_log_debug (ARG_FILE_LINE, __VA_ARGS__)
3805  PGBUF_BCB *bufptr;
3807  int error = NO_ERROR;
3808  int avail_time_msec = 0, time_rem_msec = 0;
3809 #if defined (SERVER_MODE)
3810  double sleep_msecs = 0;
3811  struct timeval cur_time = { 0, 0 };
3812 #endif /* SERVER_MODE */
3813  int flush_per_interval;
3814  int cnt_writes;
3815  int dropped_pages;
3816  bool done_flush;
3817  float control_est_flush_total = 0;
3818  int control_total_cnt_intervals = 0;
3819  bool ignore_time_limit = false;
3820  bool flush_if_already_flushed;
3821  bool locked_bcb = false;
3822  bool detailed_logging = prm_get_bool_value (PRM_ID_LOG_CHKPT_DETAILED);
3823 
3824  assert (seq_flusher != NULL);
3825  f_list = seq_flusher->flush_list;
3826 
3827 #if defined (SERVER_MODE)
3828  gettimeofday (&cur_time, NULL);
3829 
3830  if (seq_flusher->burst_mode == true)
3831  {
3832  assert_release (limit_time != NULL);
3833  }
3834 
3835  *time_rem = 0;
3836  if (limit_time != NULL)
3837  {
3838  /* limited time job: amount to flush in this interval */
3839  avail_time_msec = (int) timeval_diff_in_msec (limit_time, &cur_time);
3840 
3841  control_total_cnt_intervals = (int) (1000.f / (float) seq_flusher->interval_msec + 0.5f);
3842 
3843  if (seq_flusher->control_intervals_cnt > 0)
3844  {
3845  control_est_flush_total =
3846  (seq_flusher->flush_rate * (float) (seq_flusher->control_intervals_cnt + 1) /
3847  (float) control_total_cnt_intervals);
3848 
3849  flush_per_interval = (int) (control_est_flush_total - seq_flusher->control_flushed);
3850  }
3851  else
3852  {
3853  flush_per_interval = (int) (seq_flusher->flush_rate / control_total_cnt_intervals);
3854  if (seq_flusher->control_intervals_cnt < 0)
3855  {
3856  flush_per_interval -= seq_flusher->control_flushed;
3857  }
3858  }
3859  }
3860  else
3861  {
3862  /* flush all */
3863  avail_time_msec = -1;
3864  flush_per_interval = seq_flusher->flush_cnt;
3865  }
3866 
3867  flush_per_interval =
3868  (int) MAX (flush_per_interval, (PGBUF_CHKPT_MIN_FLUSH_RATE * seq_flusher->interval_msec) / 1000.0f);
3869 #else
3870  flush_per_interval = seq_flusher->flush_cnt;
3871 #endif /* SERVER_MODE */
3872 
3873  detailed_er_log ("pgbuf_flush_seq_list (%s): start_idx:%d, flush_cnt:%d, LSA_flush:%d, "
3874  "flush_rate:%.2f, control_flushed:%d, this_interval:%d, "
3875  "Est_tot_flush:%.2f, control_intervals:%d, %d Avail_time:%d\n", "chkpt",
3876  seq_flusher->flush_idx, seq_flusher->flush_cnt, seq_flusher->flush_upto_lsa.pageid,
3877  seq_flusher->flush_rate, seq_flusher->control_flushed, flush_per_interval, control_est_flush_total,
3878  seq_flusher->control_intervals_cnt, control_total_cnt_intervals, avail_time_msec);
3879 
3880  /* Start to flush */
3881  cnt_writes = 0;
3882  dropped_pages = 0;
3883  seq_flusher->flushed_pages = 0;
3884 
3885  for (; seq_flusher->flush_idx < seq_flusher->flush_cnt && seq_flusher->flushed_pages < flush_per_interval;
3886  seq_flusher->flush_idx++)
3887  {
3888  bufptr = f_list[seq_flusher->flush_idx].bufptr;
3889 
3890  /* prefer sequentiality to an unnecessary flush; skip already flushed page if is the last in list or if there is
3891  * already a gap due to missing next page */
3892  flush_if_already_flushed = true;
3893  if (seq_flusher->flush_idx + 1 >= seq_flusher->flush_cnt
3894  || f_list[seq_flusher->flush_idx].vpid.pageid + 1 != f_list[seq_flusher->flush_idx + 1].vpid.pageid)
3895  {
3896  flush_if_already_flushed = false;
3897  }
3898 
3899  PGBUF_BCB_LOCK (bufptr);
3900  locked_bcb = true;
3901 
3902  if (!VPID_EQ (&bufptr->vpid, &f_list[seq_flusher->flush_idx].vpid) || !pgbuf_bcb_is_dirty (bufptr)
3903  || (flush_if_already_flushed == false && !LSA_ISNULL (&bufptr->oldest_unflush_lsa)
3904  && LSA_GT (&bufptr->oldest_unflush_lsa, &seq_flusher->flush_upto_lsa)))
3905  {
3906  PGBUF_BCB_UNLOCK (bufptr);
3907  dropped_pages++;
3908  continue;
3909  }
3910 
3911  done_flush = false;
3912  if (pgbuf_bcb_safe_flush_force_lock (thread_p, bufptr, true) == NO_ERROR)
3913  {
3914  if (!LSA_ISNULL (&bufptr->oldest_unflush_lsa)
3915  && LSA_LE (&bufptr->oldest_unflush_lsa, &seq_flusher->flush_upto_lsa))
3916  {
3917  /* I am not sure if this is really possible. But let's assume that bcb was already flushing before
3918  * checkpoint reached it. And that it was modified again. And that the new oldest_unflush_lsa is less than
3919  * flush_upto_lsa. It may seem that many planets should align, but let's be conservative and flush again.
3920  */
3921  detailed_er_log ("pgbuf_flush_seq_list: flush again %d|%d; oldest_unflush_lsa=%lld|%d, "
3922  "flush_upto_lsa=%lld|%d \n", VPID_AS_ARGS (&bufptr->vpid),
3923  LSA_AS_ARGS (&bufptr->oldest_unflush_lsa), LSA_AS_ARGS (&seq_flusher->flush_upto_lsa));
3924  if (pgbuf_bcb_safe_flush_internal (thread_p, bufptr, true, &locked_bcb) == NO_ERROR)
3925  {
3926  /* now we should be ok. */
3928  || LSA_GT (&bufptr->oldest_unflush_lsa, &seq_flusher->flush_upto_lsa));
3929  done_flush = true;
3930  }
3931  else
3932  {
3933  assert (false);
3934  }
3935  }
3936  else
3937  {
3938  done_flush = true;
3939  }
3940  }
3941  else
3942  {
3943  assert (false);
3944  locked_bcb = false;
3945  }
3946 
3947  if (done_flush)
3948  {
3949  seq_flusher->flushed_pages++;
3950  }
3951  else
3952  {
3953  assert (false);
3954 
3955  if (!locked_bcb)
3956  {
3957  PGBUF_BCB_LOCK (bufptr);
3958  locked_bcb = true;
3959  }
3960 
3961  /* get the smallest oldest_unflush_lsa */
3962  if (!LSA_ISNULL (&bufptr->oldest_unflush_lsa)
3963  && (LSA_ISNULL (chkpt_smallest_lsa) || LSA_LT (&bufptr->oldest_unflush_lsa, chkpt_smallest_lsa)))
3964  {
3965  LSA_COPY (chkpt_smallest_lsa, &bufptr->oldest_unflush_lsa);
3966  }
3967  }
3968 
3969  if (locked_bcb)
3970  {
3971  PGBUF_BCB_UNLOCK (bufptr);
3972  locked_bcb = false;
3973  }
3974 
3975 #if defined(SERVER_MODE)
3976  if (limit_time != NULL && ignore_time_limit == false)
3977  {
3978  gettimeofday (&cur_time, NULL);
3979  if (cur_time.tv_sec > limit_time->tv_sec
3980  || (cur_time.tv_sec == limit_time->tv_sec && cur_time.tv_usec >= limit_time->tv_usec))
3981  {
3982  *time_rem = -1;
3983  break;
3984  }
3985  }
3986 
3987  if (seq_flusher->burst_mode == false && seq_flusher->flush_rate > 0
3988  && seq_flusher->flushed_pages < flush_per_interval && ignore_time_limit == false)
3989  {
3990  if (limit_time != NULL)
3991  {
3992  time_rem_msec = (int) timeval_diff_in_msec (limit_time, &cur_time);
3993  sleep_msecs = time_rem_msec / (flush_per_interval - seq_flusher->flushed_pages);
3994  }
3995  else
3996  {
3997  sleep_msecs = 1000.0f / (double) (seq_flusher->flush_rate);
3998  }
3999 
4000  if (sleep_msecs > (1000.0f / PGBUF_CHKPT_MAX_FLUSH_RATE))
4001  {
4002  thread_sleep (sleep_msecs);
4003  }
4004  }
4005 
4006  if (thread_p && thread_p->shutdown == true)
4007  {
4008  return ER_FAILED;
4009  }
4010 #endif /* SERVER_MODE */
4011  }
4012 
4013 #if defined (SERVER_MODE)
4014  gettimeofday (&cur_time, NULL);
4015  if (limit_time != NULL)
4016  {
4017  time_rem_msec = (int) timeval_diff_in_msec (limit_time, &cur_time);
4018  *time_rem = time_rem_msec;
4019 
4020  seq_flusher->control_intervals_cnt++;
4021  if (seq_flusher->control_intervals_cnt >= control_total_cnt_intervals || ignore_time_limit == true)
4022  {
4023  seq_flusher->control_intervals_cnt = 0;
4024  }
4025 
4026  if (seq_flusher->control_intervals_cnt == 0)
4027  {
4028  seq_flusher->control_flushed = 0;
4029  }
4030  else
4031  {
4032  seq_flusher->control_flushed += seq_flusher->flushed_pages;
4033  }
4034  }
4035 #endif /* SERVER_MODE */
4036 
4037  detailed_er_log ("pgbuf_flush_seq_list end (%s): %s %s pages : %d written/%d dropped, "
4038  "Remaining_time:%d, Avail_time:%d, Curr:%d/%d,", "ckpt",
4039  ((time_rem_msec <= 0) ? "[Expired] " : ""), (ignore_time_limit ? "[boost]" : ""),
4040  seq_flusher->flushed_pages, dropped_pages, time_rem_msec, avail_time_msec, seq_flusher->flush_idx,
4041  seq_flusher->flush_cnt);
4042 
4043  return error;
4044 #undef detailed_er_log
4045 }
4046 
4047 /*
4048  * pgbuf_copy_to_area () - Copy a portion of a page to the given area
4049  * return: area or NULL
4050  * vpid(in): Complete Page identifier
4051  * start_offset(in): Start offset of interested content in page
4052  * length(in): Length of the content of page to copy
4053  * area(in): Area where to copy the needed content of the page
4054  * do_fetch(in): Do we want to cache the page in the buffer pool when it is
4055  * not already cached?
4056  *
4057  * Note: If the page is not in the page buffer pool, it is only buffered when
4058  * the value of "do_fetch" is false.
4059  *
4060  * WARNING:
4061  * The user should be very careful on deciding wheater or not to allow
4062  * buffering of pages. If the page is going to be used in the short
4063  * future, it is better to allow buffering the page to avoid extra I/O.
4064  * It is better to avoid I/Os than to avoid memcpys.
4065  */
4066 void *
4067 pgbuf_copy_to_area (THREAD_ENTRY * thread_p, const VPID * vpid, int start_offset, int length, void *area, bool do_fetch)
4068 {
4069  PGBUF_BUFFER_HASH *hash_anchor;
4070  PGBUF_BCB *bufptr;
4071  PAGE_PTR pgptr;
4072 
4073  if (logtb_is_interrupted (thread_p, true, &pgbuf_Pool.check_for_interrupts) == true)
4074  {
4076  return NULL;
4077  }
4078 
4079 #if defined(CUBRID_DEBUG)
4080  if (start_offset < 0 || (start_offset + length) > DB_PAGESIZE)
4081  {
4083  "pgbuf_copy_to_area: SYSTEM ERROR.. Trying to copy"
4084  " from beyond page boundary limits. Start_offset = %d, length = %d\n", start_offset, length);
4086  return NULL;
4087  }
4088 #endif /* CUBRID_DEBUG */
4089 
4090  /* Is this a resident page ? */
4091  hash_anchor = &(pgbuf_Pool.buf_hash_table[PGBUF_HASH_VALUE (vpid)]);
4092  bufptr = pgbuf_search_hash_chain (thread_p, hash_anchor, vpid);
4093 
4094  if (bufptr == NULL)
4095  {
4096  /* the caller is holding only hash_anchor->hash_mutex. */
4097  /* release hash mutex */
4098  pthread_mutex_unlock (&hash_anchor->hash_mutex);
4099 
4101  {
4102  return NULL;
4103  }
4104 
4105  /* The page is not on the buffer pool. Do we want to cache the page ? */
4106  if (do_fetch == true)
4107  {
4108  pgptr = pgbuf_fix (thread_p, vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
4109  if (pgptr != NULL)
4110  {
4111  (void) pgbuf_check_page_ptype (thread_p, pgptr, PAGE_AREA);
4112 
4113  memcpy (area, (char *) pgptr + start_offset, length);
4114  pgbuf_unfix_and_init (thread_p, pgptr);
4115  }
4116  else
4117  {
4118  area = NULL;
4119  }
4120  }
4121 #if defined(ENABLE_UNUSED_FUNCTION)
4122  else
4123  {
4124  /*
4125  * Do not cache the page in the page buffer pool.
4126  * Read the needed portion of the page directly from disk
4127  */
4129  {
4130  if (pgbuf_is_valid_page (thread_p, vpid, false NULL, NULL) != DISK_VALID)
4131  {
4132  return NULL;
4133  }
4134  }
4135 
4136  /* Record number of reads in statistics */
4138 
4139  if (fileio_read_user_area (thread_p, fileio_get_volume_descriptor (vpid->volid), vpid->pageid, start_offset,
4140  length, area) == NULL)
4141  {
4142  area = NULL;
4143  }
4144  }
4145 #endif
4146  }
4147  else
4148  {
4149  /* the caller is holding only bufptr->mutex. */
4150  CAST_BFPTR_TO_PGPTR (pgptr, bufptr);
4151 
4152  (void) pgbuf_check_page_ptype (thread_p, pgptr, PAGE_AREA);
4153 
4154  memcpy (area, (char *) pgptr + start_offset, length);
4155 
4156  if (thread_get_sort_stats_active (thread_p))
4157  {
4159  }
4160 
4161  /* release mutex */
4162  PGBUF_BCB_UNLOCK (bufptr);
4163  }
4164 
4165  return area;
4166 }
4167 
4168 /*
4169  * pgbuf_copy_from_area () - Copy area to a portion of given page
4170  * return: area or NULL
4171  * vpid(in): Complete Page identifier
4172  * start_offset(in): Start offset of interested content in page
4173  * length(in): Length of the content of page to copy
4174  * area(in): Area where to copy the needed content of the page
4175  * do_fetch(in): Do we want to cache the page in the buffer pool when it is
4176  * not already cached?
4177  *
4178  * Note: Copy the content of the given area to the page starting at the given
4179  * offset. If the page is not in the page buffer pool, it is only
4180  * buffered when the value of "do_fetch" is not false.
4181  *
4182  * WARNING:
4183  * The user should be very careful on deciding wheater or not to allow
4184  * buffering of pages. If the page is going to be used in the short
4185  * future, it is better to allow buffering the page to avoid extra I/O.
4186  * If you do not buffer the page, not header recovery information is
4187  * copied along with the write of the page. In this case, the page may
4188  * not be able to be recovered.
4189  * DO NOT USE THIS FEATURE IF YOU LOGGED ANYTHING RELATED TO THIS PAGE.
4190  */
4191 void *
4192 pgbuf_copy_from_area (THREAD_ENTRY * thread_p, const VPID * vpid, int start_offset, int length, void *area,
4193  bool do_fetch, TDE_ALGORITHM tde_algo)
4194 {
4195  PGBUF_BUFFER_HASH *hash_anchor;
4196  PGBUF_BCB *bufptr;
4197  PAGE_PTR pgptr;
4198  LOG_DATA_ADDR addr;
4199 #if defined(ENABLE_UNUSED_FUNCTION)
4200  int vol_fd;
4201 #endif
4202 
4203  assert (start_offset >= 0 && (start_offset + length) <= DB_PAGESIZE);
4204 
4205  /* Is this a resident page ? */
4206  hash_anchor = &(pgbuf_Pool.buf_hash_table[PGBUF_HASH_VALUE (vpid)]);
4207  bufptr = pgbuf_search_hash_chain (thread_p, hash_anchor, vpid);
4208 
4209  if (bufptr == NULL)
4210  {
4211  /* the caller is holding only hash_anchor->hash_mutex. */
4212 
4213  pthread_mutex_unlock (&hash_anchor->hash_mutex);
4214 
4216  {
4217  return NULL;
4218  }
4219 
4220 #if defined(ENABLE_UNUSED_FUNCTION)
4221  if (do_fetch == false)
4222  {
4223  /* Do not cache the page in the page buffer pool. Write the desired portion of the page directly to disk */
4225  {
4226  if (pgbuf_is_valid_page (thread_p, vpid, false NULL, NULL) != DISK_VALID)
4227  {
4228  return NULL;
4229  }
4230  }
4231 
4232  /* Record number of reads in statistics */
4234 
4235  vol_fd = fileio_get_volume_descriptor (vpid->volid);
4236  if (fileio_write_user_area (thread_p, vol_fd, vpid->pageid, start_offset, length, area) == NULL)
4237  {
4238  area = NULL;
4239  }
4240 
4241  return area;
4242  }
4243 #endif
4244  }
4245  else
4246  {
4247  /* the caller is holding only bufptr->mutex. */
4248  PGBUF_BCB_UNLOCK (bufptr);
4249  }
4250 
4251  pgptr = pgbuf_fix (thread_p, vpid, NEW_PAGE, PGBUF_LATCH_WRITE, PGBUF_UNCONDITIONAL_LATCH);
4252  if (pgptr != NULL)
4253  {
4254  (void) pgbuf_set_page_ptype (thread_p, pgptr, PAGE_AREA);
4255  pgbuf_set_tde_algorithm (thread_p, pgptr, tde_algo, true);
4256 
4257  memcpy ((char *) pgptr + start_offset, area, length);
4258  /* Inform log manager that there is no need to log this page */
4259  addr.vfid = NULL;
4260  addr.pgptr = pgptr;
4261  addr.offset = 0;
4262  log_skip_logging (thread_p, &addr);
4263  pgbuf_set_dirty (thread_p, pgptr, FREE);
4264  }
4265  else
4266  {
4267  area = NULL;
4268  }
4269 
4270  return area;
4271 }
4272 
4273 /*
4274  * pgbuf_set_dirty () - Mark as modified the buffer associated with pgptr and optionally free the page
4275  * return: void
4276  * pgptr(in): Pointer to page
4277  * free_page(in): Free the page too ?
4278  */
4279 void
4280 pgbuf_set_dirty (THREAD_ENTRY * thread_p, PAGE_PTR pgptr, bool free_page)
4281 {
4282  PGBUF_BCB *bufptr;
4283 
4285  {
4286  if (pgbuf_is_valid_page_ptr (pgptr) == false)
4287  {
4288  return;
4289  }
4290  }
4291 
4292  /* Get the address of the buffer from the page and set buffer dirty */
4293  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
4294  assert (!VPID_ISNULL (&bufptr->vpid));
4295 
4296 #if defined(SERVER_MODE) && !defined(NDEBUG)
4297  if (bufptr->vpid.pageid == 0)
4298  {
4299  disk_volheader_check_magic (thread_p, pgptr);
4300  }
4301 #endif
4302 
4303  pgbuf_set_dirty_buffer_ptr (thread_p, bufptr);
4304 
4305  /* If free request is given, unfix the page. */
4306  if (free_page == FREE)
4307  {
4308  pgbuf_unfix (thread_p, pgptr);
4309  }
4310 }
4311 
4312 /*
4313  * pgbuf_get_lsa () - Find the log sequence address of the given page
4314  * return: page lsa
4315  * pgptr(in): Pointer to page
4316  */
4317 LOG_LSA *
4319 {
4320  FILEIO_PAGE *io_pgptr;
4321 
4323  {
4324  if (pgbuf_is_valid_page_ptr (pgptr) == false)
4325  {
4326  return NULL;
4327  }
4328  }
4329 
4330  /* NOTE: Does not need to hold mutex since the page is fixed */
4331 
4332  CAST_PGPTR_TO_IOPGPTR (io_pgptr, pgptr);
4333  return &io_pgptr->prv.lsa;
4334 }
4335 
4336 /*
4337  * pgbuf_page_has_changed () - check if page has change based on current LSA and a previous reference LSA
4338  * return: page lsa
4339  * pgptr(in): Pointer to page
4340  */
4341 int
4343 {
4344  LOG_LSA curr_lsa;
4345 
4346  LSA_COPY (&curr_lsa, pgbuf_get_lsa (pgptr));
4347 
4348  if (!LSA_EQ (ref_lsa, &curr_lsa))
4349  {
4350  return 1;
4351  }
4352  return 0;
4353 }
4354 
4355 /*
4356  * pgbuf_set_lsa () - Set the log sequence address of the page to the given lsa
4357  * return: page lsa or NULL
4358  * pgptr(in): Pointer to page
4359  * lsa_ptr(in): Log Sequence address
4360  *
4361  * Note: This function is for the exclusive use of the log and recovery manager.
4362  */
4363 const LOG_LSA *
4364 pgbuf_set_lsa (THREAD_ENTRY * thread_p, PAGE_PTR pgptr, const LOG_LSA * lsa_ptr)
4365 {
4366  PGBUF_BCB *bufptr;
4367 
4369  {
4370  if (pgbuf_is_valid_page_ptr (pgptr) == false)
4371  {
4372  return NULL;
4373  }
4374  }
4375 
4376  assert (lsa_ptr != NULL);
4377 
4378  /* NOTE: Does not need to hold mutex since the page is fixed */
4379 
4380  /* Get the address of the buffer from the page and set buffer dirty */
4381  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
4382 
4383  /*
4384  * Don't change LSA of temporary volumes or auxiliary volumes.
4385  * (e.g., those of copydb, backupdb).
4386  */
4388  || PGBUF_IS_AUXILIARY_VOLUME (bufptr->vpid.volid) == true)
4389  {
4390  return NULL;
4391  }
4392 
4393  /*
4394  * Always set the lsa of temporary volumes to the special
4395  * temp lsa, if it was somehow changed.
4396  */
4397  if (pgbuf_is_temporary_volume (bufptr->vpid.volid) == true)
4398  {
4400  if (logtb_is_current_active (thread_p))
4401  {
4402  return NULL;
4403  }
4404  }
4405 
4406  fileio_set_page_lsa (&bufptr->iopage_buffer->iopage, lsa_ptr, IO_PAGESIZE);
4407 
4408  /*
4409  * If this is the first time the page is set dirty, record the new LSA
4410  * of the page as the oldest_unflush_lsa for the page.
4411  * We could have placed these feature when the page is set dirty,
4412  * unfortunately, some pages are set dirty before an LSA is set.
4413  */
4414  if (LSA_ISNULL (&bufptr->oldest_unflush_lsa))
4415  {
4416  if (LSA_LT (lsa_ptr, &log_Gl.chkpt_redo_lsa))
4417  {
4418  LOG_LSA chkpt_redo_lsa;
4419  int rc;
4420 
4421  rc = pthread_mutex_lock (&log_Gl.chkpt_lsa_lock);
4422  LSA_COPY (&chkpt_redo_lsa, &log_Gl.chkpt_redo_lsa);
4423  pthread_mutex_unlock (&log_Gl.chkpt_lsa_lock);
4424 
4425  if (LSA_LT (lsa_ptr, &chkpt_redo_lsa))
4426  {
4427  er_stack_push ();
4429  fileio_get_volume_label (bufptr->vpid.volid, PEEK), lsa_ptr->pageid, lsa_ptr->offset,
4431  er_stack_pop ();
4432 
4433  assert (false);
4434  }
4435 
4436  }
4437  LSA_COPY (&bufptr->oldest_unflush_lsa, lsa_ptr);
4438  }
4439 
4440 #if defined (NDEBUG)
4441  /* We expect the page was or will be set as dirty before unfix. However, there might be a missing case to set dirty.
4442  * It is correct to set dirty here. Note that we have set lsa of the page and it should be also flushed.
4443  * But we also want to find missing cases and fix them. Make everything sure for release builds.
4444  */
4445  pgbuf_set_dirty_buffer_ptr (thread_p, bufptr);
4446 #endif /* NDEBUG */
4447 
4448  return lsa_ptr;
4449 }
4450 
4451 /*
4452  * pgbuf_reset_temp_lsa () - Reset LSA of temp volume to special temp LSA (-2,-2)
4453  * return: void
4454  * pgptr(in): Pointer to page
4455  */
4456 void
4458 {
4459  PGBUF_BCB *bufptr;
4460 
4461  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
4463 }
4464 
4465 /*
4466  * pgbuf_set_tde_algorithm () - set tde encryption algorithm to the page
4467  * return: void
4468  * thread_p (in) : Thread entry
4469  * pgptr(in): Page pointer
4470  * tde_algo (in) : encryption algorithm - NONE, AES, ARIA
4471  */
4472 void
4473 pgbuf_set_tde_algorithm (THREAD_ENTRY * thread_p, PAGE_PTR pgptr, TDE_ALGORITHM tde_algo, bool skip_logging)
4474 {
4475  FILEIO_PAGE *iopage = NULL;
4476  TDE_ALGORITHM prev_tde_algo = TDE_ALGORITHM_NONE;
4477 
4478  assert (tde_Cipher.is_loaded || tde_algo == TDE_ALGORITHM_NONE);
4479 
4480  prev_tde_algo = pgbuf_get_tde_algorithm (pgptr);
4481 
4482  if (prev_tde_algo == tde_algo)
4483  {
4484  return;
4485  }
4486 
4487  CAST_PGPTR_TO_IOPGPTR (iopage, pgptr);
4488 
4489 #if !defined(NDEBUG)
4490  er_log_debug (ARG_FILE_LINE, "TDE: pgbuf_set_tde_algorithm(): VPID = %d|%d, tde_algorithm = %s\n", iopage->prv.volid,
4491  iopage->prv.pageid, tde_get_algorithm_name (tde_algo));
4492 #endif /* !NDEBUG */
4493 
4494  if (!skip_logging)
4495  {
4497  sizeof (TDE_ALGORITHM), &prev_tde_algo, &tde_algo);
4498  }
4499 
4500  /* clear tde encryption bits */
4502 
4503  switch (tde_algo)
4504  {
4505  case TDE_ALGORITHM_AES:
4507  break;
4508  case TDE_ALGORITHM_ARIA:
4510  break;
4511  case TDE_ALGORITHM_NONE:
4512  break; // do nothing, already cleared
4513  default:
4514  assert (false);
4515  }
4516 
4517  pgbuf_set_dirty (thread_p, pgptr, DONT_FREE);
4518 }
4519 
4520 /*
4521  * pgbuf_rv_set_tde_algorithm () - recovery setting tde encryption algorithm to the page
4522  * return : NO_ERROR, or ER_code
4523  * thread_p (in) : Thread entry
4524  * pgptr(in): Page pointer
4525  * tde_algo (in) : encryption algorithm - NONE, AES, ARIA
4526  */
4527 int
4529 {
4530  FILEIO_PAGE *iopage = NULL;
4531  PAGE_PTR pgptr = rcv->pgptr;
4532  TDE_ALGORITHM tde_algo = *((TDE_ALGORITHM *) rcv->data);
4533 
4534  assert (rcv->length == sizeof (TDE_ALGORITHM));
4535 
4536  pgbuf_set_tde_algorithm (thread_p, pgptr, tde_algo, true);
4537 
4538  return NO_ERROR;
4539 }
4540 
4541 /*
4542  * pgbuf_get_tde_algorithm () - get tde encryption algorithm of the page
4543  * return: TDE_ALGORITHM
4544  * pgptr(in): Page pointer
4545  * tde_algo (out) : encryption algorithm - NONE, AES, ARIA
4546  */
4549 {
4550  FILEIO_PAGE *iopage = NULL;
4551 
4552  CAST_PGPTR_TO_IOPGPTR (iopage, pgptr);
4553 
4554  // encryption algorithms are exclusive
4557 
4559  {
4560  return TDE_ALGORITHM_AES;
4561  }
4562  else if (iopage->prv.pflag & FILEIO_PAGE_FLAG_ENCRYPTED_ARIA)
4563  {
4564  return TDE_ALGORITHM_ARIA;
4565  }
4566  else
4567  {
4568  return TDE_ALGORITHM_NONE;
4569  }
4570 }
4571 
4572 /*
4573  * pgbuf_get_vpid () - Find the volume and page identifier associated with the passed buffer
4574  * return: void
4575  * pgptr(in): Page pointer
4576  * vpid(out): Volume and page identifier
4577  */
4578 void
4580 {
4581  PGBUF_BCB *bufptr;
4582 
4584  {
4585  if (pgbuf_is_valid_page_ptr (pgptr) == false)
4586  {
4587  VPID_SET_NULL (vpid);
4588  return;
4589  }
4590  }
4591 
4592  /* NOTE: Does not need to hold mutex since the page is fixed */
4593 
4594  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
4595  *vpid = bufptr->vpid;
4596 }
4597 
4598 /*
4599  * pgbuf_get_vpid_ptr () - Find the volume and page identifier associated with the passed buffer
4600  * return: pointer to vpid
4601  * pgptr(in): Page pointer
4602  *
4603  * Note: Once the buffer is freed, the content of the vpid pointer may be
4604  * updated by the page buffer manager, thus a lot of care should be taken.
4605  * The values of the vpid pointer must not be altered by the caller.
4606  * Once the page is freed, the vpid pointer should not be used any longer.
4607  */
4608 VPID *
4610 {
4611  PGBUF_BCB *bufptr;
4612 
4614  {
4615  if (pgbuf_is_valid_page_ptr (pgptr) == false)
4616  {
4617  return NULL;
4618  }
4619  }
4620 
4621  /* NOTE: Does not need to hold mutex since the page is fixed */
4622 
4623  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
4624  return &(bufptr->vpid);
4625 }
4626 
4627 /*
4628  * pgbuf_get_latch_mode () - Find the latch mode associated with the passed buffer
4629  * return: latch mode
4630  * pgptr(in): Page pointer
4631  */
4634 {
4635  PGBUF_BCB *bufptr;
4636 
4638  {
4639  if (pgbuf_is_valid_page_ptr (pgptr) == false)
4640  {
4641  return PGBUF_LATCH_INVALID;
4642  }
4643  }
4644 
4645  /* NOTE: Does not need to hold mutex since the page is fixed */
4646 
4647  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
4648  return bufptr->latch_mode;
4649 }
4650 
4651 /*
4652  * pgbuf_get_page_id () - Find the page identifier associated with the passed buffer
4653  * return: PAGEID
4654  * pgptr(in): Page pointer
4655  */
4656 PAGEID
4658 {
4659  PGBUF_BCB *bufptr;
4660 
4661  /* NOTE: Does not need to hold mutex since the page is fixed */
4662 
4663  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
4664  assert (pgbuf_check_bcb_page_vpid (bufptr, false) == true);
4665 
4666  return bufptr->vpid.pageid;
4667 }
4668 
4669 /*
4670  * pgbuf_get_page_ptype () -
4671  * return:
4672  * pgptr(in): Pointer to page
4673  */
4674 PAGE_TYPE
4676 {
4677  PGBUF_BCB *bufptr;
4678  PAGE_TYPE ptype;
4679 
4681  {
4682  if (pgbuf_is_valid_page_ptr (pgptr) == false)
4683  {
4684  return PAGE_UNKNOWN; /* TODO - need to return error_code */
4685  }
4686  }
4687 
4688  /* NOTE: Does not need to hold mutex since the page is fixed */
4689 
4690  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
4691  assert_release (pgbuf_check_bcb_page_vpid (bufptr, false) == true);
4692 
4693  ptype = (PAGE_TYPE) (bufptr->iopage_buffer->iopage.prv.ptype);
4694 
4695  assert (PAGE_UNKNOWN <= (int) ptype);
4696  assert (ptype <= PAGE_LAST);
4697 
4698  return ptype;
4699 }
4700 
4701 /*
4702  * pgbuf_get_volume_id () - Find the volume associated with the passed buffer
4703  * return: VOLID
4704  * pgptr(in): Page pointer
4705  */
4706 VOLID
4708 {
4709  PGBUF_BCB *bufptr;
4710 
4712  {
4713  if (pgbuf_is_valid_page_ptr (pgptr) == false)
4714  {
4715  return NULL_VOLID;
4716  }
4717  }
4718 
4719  /* NOTE: Does not need to hold mutex since the page is fixed */
4720 
4721  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
4722  return bufptr->vpid.volid;
4723 }
4724 
4725 /*
4726  * pgbuf_get_volume_label () - Find the name of the volume associated with the passed buffer
4727  * return: Volume label
4728  * pgptr(in): Page pointer
4729  */
4730 const char *
4732 {
4733  PGBUF_BCB *bufptr;
4734 
4735  /* NOTE: Does not need to hold mutex since the page is fixed */
4736 
4737  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
4738  assert (!VPID_ISNULL (&bufptr->vpid));
4739 
4740  return fileio_get_volume_label (bufptr->vpid.volid, PEEK);
4741 }
4742 
4743 /*
4744  * pgbuf_force_to_check_for_interrupts () - Force the page buffer manager
4745  * to check for possible interrupts when pages are fetched
4746  * return: void
4747  * void(in):
4748  */
4749 void
4751 {
4752  pgbuf_Pool.check_for_interrupts = true;
4753 }
4754 
4755 /*
4756  * pgbuf_is_log_check_for_interrupts () - Force the page buffer manager to
4757  * check for possible interrupts when pages are fetched
4758  * return: if there is interrupt, return true, otherwise return false
4759  * void(in):
4760  */
4761 bool
4763 {
4764  if (pgbuf_Pool.check_for_interrupts == true
4765  && logtb_is_interrupted (thread_p, true, &pgbuf_Pool.check_for_interrupts) == true)
4766  {
4768  return true;
4769  }
4770  else
4771  {
4772  return false;
4773  }
4774 }
4775 
4776 /*
4777  * pgbuf_set_lsa_as_temporary () - The log sequence address of the page is set to temporary lsa address
4778  * return: void
4779  * pgptr(in): Pointer to page
4780  *
4781  * Note: Set the log sequence address of the page to the non recoverable LSA
4782  * address. In this case the page is declared a non recoverable page
4783  * (temporary page). Logging must not be done in a temporary page,
4784  * however it is not enforced. A warning message is issued if someone
4785  * logs something. This warning will indicate a potential bug.
4786  *
4787  * This function is used for debugging.
4788  */
4789 void
4791 {
4792  PGBUF_BCB *bufptr;
4793 
4794  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
4795  assert (!VPID_ISNULL (&bufptr->vpid));
4796 
4798  pgbuf_set_dirty_buffer_ptr (thread_p, bufptr);
4799 }
4800 
4801 /*
4802  * pgbuf_set_bcb_page_vpid () -
4803  * return: void
4804  * bufptr(in): pointer to buffer page
4805  * force_set_vpid(in): true, if forces VPID setting
4806  *
4807  * Note: This function is used for debugging.
4808  */
4809 STATIC_INLINE void
4810 pgbuf_set_bcb_page_vpid (PGBUF_BCB * bufptr, bool force_set_vpid)
4811 {
4812  if (bufptr == NULL || VPID_ISNULL (&bufptr->vpid))
4813  {
4814  assert (bufptr != NULL);
4815  assert (!VPID_ISNULL (&bufptr->vpid));
4816  return;
4817  }
4818 
4819  /* perm volume */
4820  if (bufptr->vpid.volid > NULL_VOLID)
4821  {
4822  /* Check if is the first time */
4823  if (force_set_vpid
4824  || (bufptr->iopage_buffer->iopage.prv.pageid == -1 && bufptr->iopage_buffer->iopage.prv.volid == -1))
4825  {
4826  /* Set Page identifier */
4827  bufptr->iopage_buffer->iopage.prv.pageid = bufptr->vpid.pageid;
4828  bufptr->iopage_buffer->iopage.prv.volid = bufptr->vpid.volid;
4829 
4830  bufptr->iopage_buffer->iopage.prv.ptype = '\0';
4831  bufptr->iopage_buffer->iopage.prv.p_reserve_1 = 0;
4832  bufptr->iopage_buffer->iopage.prv.p_reserve_2 = 0;
4833  bufptr->iopage_buffer->iopage.prv.tde_nonce = 0;
4834  }
4835  }
4836 }
4837 
4838 /*
4839  * pgbuf_set_page_ptype () -
4840  * return: void
4841  * pgptr(in): Pointer to page
4842  * ptype(in): page type
4843  *
4844  * Note: This function is used for debugging.
4845  */
4846 void
4848 {
4849  PGBUF_BCB *bufptr;
4850 
4851  assert (pgptr != NULL);
4852 
4854  {
4855  if (pgbuf_is_valid_page_ptr (pgptr) == false)
4856  {
4857  assert (false);
4858  return;
4859  }
4860  }
4861 
4862  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
4863  assert (!VPID_ISNULL (&bufptr->vpid));
4864 
4865  /* Set Page identifier if needed */
4866  pgbuf_set_bcb_page_vpid (bufptr, false);
4867 
4868  if (pgbuf_check_bcb_page_vpid (bufptr, false) != true)
4869  {
4870  assert (false);
4871  return;
4872  }
4873 
4874  bufptr->iopage_buffer->iopage.prv.ptype = (unsigned char) ptype;
4875 
4876  assert_release (bufptr->iopage_buffer->iopage.prv.ptype == ptype);
4877 }
4878 
4879 /*
4880  * pgbuf_is_lsa_temporary () - Find if the page is a temporary one
4881  * return: true/false
4882  * pgptr(in): Pointer to page
4883  */
4884 bool
4886 {
4887  PGBUF_BCB *bufptr;
4888 
4889  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
4890 
4892  || pgbuf_is_temporary_volume (bufptr->vpid.volid) == true)
4893  {
4894  return true;
4895  }
4896  else
4897  {
4898  return false;
4899  }
4900 }
4901 
4902 /*
4903  * pgbuf_is_temporary_volume () - Find if the given permanent volume has been declared for temporary storage purposes
4904  * return: true/false
4905  * volid(in): Volume identifier of last allocated permanent volume
4906  */
4907 STATIC_INLINE bool
4909 {
4910  /* TODO: I don't know why page buffer should care about temporary files and what this does, but it is really annoying.
4911  * until database is loaded and restarted, I will return false always. */
4912  if (!LOG_ISRESTARTED ())
4913  {
4914  return false;
4915  }
4916  return (LOG_DBFIRST_VOLID <= volid && xdisk_get_purpose (NULL, volid) == DB_TEMPORARY_DATA_PURPOSE);
4917 }
4918 
4919 /*
4920  * pgbuf_init_BCB_table () - Initializes page buffer BCB table
4921  * return: NO_ERROR, or ER_code
4922  */
4923 static int
4925 {
4926  PGBUF_BCB *bufptr;
4927  PGBUF_IOPAGE_BUFFER *ioptr;
4928  int i;
4929  long long unsigned alloc_size;
4930 
4931  /* allocate space for page buffer BCB table */
4932  alloc_size = (long long unsigned) pgbuf_Pool.num_buffers * PGBUF_BCB_SIZEOF;
4933  if (!MEM_SIZE_IS_VALID (alloc_size))
4934  {
4935  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_PRM_BAD_VALUE, 1, "data_buffer_pages");
4936  return ER_PRM_BAD_VALUE;
4937  }
4938  pgbuf_Pool.BCB_table = (PGBUF_BCB *) malloc ((size_t) alloc_size);
4939  if (pgbuf_Pool.BCB_table == NULL)
4940  {
4941  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_OUT_OF_VIRTUAL_MEMORY, 1, (size_t) alloc_size);
4942  return ER_OUT_OF_VIRTUAL_MEMORY;
4943  }
4944 
4945  /* allocate space for io page buffers */
4946  alloc_size = (long long unsigned) pgbuf_Pool.num_buffers * PGBUF_IOPAGE_BUFFER_SIZE;
4947  if (!MEM_SIZE_IS_VALID (alloc_size))
4948  {
4949  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_PRM_BAD_VALUE, 1, "data_buffer_pages");
4950  if (pgbuf_Pool.BCB_table != NULL)
4951  {
4952  free_and_init (pgbuf_Pool.BCB_table);
4953  }
4954  return ER_PRM_BAD_VALUE;
4955  }
4956  pgbuf_Pool.iopage_table = (PGBUF_IOPAGE_BUFFER *) malloc ((size_t) alloc_size);
4957  if (pgbuf_Pool.iopage_table == NULL)
4958  {
4959  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_OUT_OF_VIRTUAL_MEMORY, 1, (size_t) alloc_size);
4960  if (pgbuf_Pool.BCB_table != NULL)
4961  {
4962  free_and_init (pgbuf_Pool.BCB_table);
4963  }
4964  return ER_OUT_OF_VIRTUAL_MEMORY;
4965  }
4966 
4967  /* initialize each entry of the buffer BCB table */
4968  for (i = 0; i < pgbuf_Pool.num_buffers; i++)
4969  {
4970  bufptr = PGBUF_FIND_BCB_PTR (i);
4971  pthread_mutex_init (&bufptr->mutex, NULL);
4972 #if defined (SERVER_MODE)
4973  bufptr->owner_mutex = -1;
4974 #endif /* SERVER_MODE */
4975  VPID_SET_NULL (&bufptr->vpid);
4976  bufptr->fcnt = 0;
4977  bufptr->latch_mode = PGBUF_LATCH_INVALID;
4978 
4979 #if defined(SERVER_MODE)
4980  bufptr->next_wait_thrd = NULL;
4981 #endif /* SERVER_MODE */
4982 
4983  bufptr->hash_next = NULL;
4984  bufptr->prev_BCB = NULL;
4985 
4986  if (i == (pgbuf_Pool.num_buffers - 1))
4987  {
4988  bufptr->next_BCB = NULL;
4989  }
4990  else
4991  {
4992  bufptr->next_BCB = PGBUF_FIND_BCB_PTR (i + 1);
4993  }
4994 
4995  bufptr->flags = PGBUF_BCB_INIT_FLAGS;
4996  bufptr->count_fix_and_avoid_dealloc = 0;
4997  bufptr->hit_age = 0;
4998  LSA_SET_NULL (&bufptr->oldest_unflush_lsa);
4999 
5000  bufptr->tick_lru3 = 0;
5001  bufptr->tick_lru_list = 0;
5002 
5003  /* link BCB and iopage buffer */
5004  ioptr = PGBUF_FIND_IOPAGE_PTR (i);
5005 
5007 
5008  /* Init Page identifier */
5009  ioptr->iopage.prv.pageid = -1;
5010  ioptr->iopage.prv.volid = -1;
5011 
5012  ioptr->iopage.prv.ptype = '\0';
5013  ioptr->iopage.prv.pflag = '\0';
5014  ioptr->iopage.prv.p_reserve_1 = 0;
5015  ioptr->iopage.prv.p_reserve_2 = 0;
5016  ioptr->iopage.prv.tde_nonce = 0;
5017 
5018  bufptr->iopage_buffer = ioptr;
5019  ioptr->bcb = bufptr;
5020 
5021 #if defined(CUBRID_DEBUG)
5022  /* Reinitizalize the buffer */
5023  pgbuf_scramble (&bufptr->iopage_buffer->iopage);
5024  memcpy (PGBUF_FIND_BUFFER_GUARD (bufptr), pgbuf_Guard, sizeof (pgbuf_Guard));
5025 #endif /* CUBRID_DEBUG */
5026  }
5027 
5028  return NO_ERROR;
5029 }
5030 
5031 /*
5032  * pgbuf_initialize_hash_table () - Initializes page buffer hash table
5033  * return: NO_ERROR, or ER_code
5034  */
5035 static int
5037 {
5038  size_t hashsize, i;
5039 
5040  /* allocate space for the buffer hash table */
5041  hashsize = PGBUF_HASH_SIZE;
5042  pgbuf_Pool.buf_hash_table = (PGBUF_BUFFER_HASH *) malloc (hashsize * PGBUF_BUFFER_HASH_SIZEOF);
5043  if (pgbuf_Pool.buf_hash_table == NULL)
5044  {
5045  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_OUT_OF_VIRTUAL_MEMORY, 1, (hashsize * PGBUF_BUFFER_HASH_SIZEOF));
5046  return ER_OUT_OF_VIRTUAL_MEMORY;
5047  }
5048 
5049  /* initialize each entry of the buffer hash table */
5050  for (i = 0; i < hashsize; i++)
5051  {
5052  pthread_mutex_init (&pgbuf_Pool.buf_hash_table[i].hash_mutex, NULL);
5053  pgbuf_Pool.buf_hash_table[i].hash_next = NULL;
5054  pgbuf_Pool.buf_hash_table[i].lock_next = NULL;
5055  }
5056 
5057  return NO_ERROR;
5058 }
5059 
5060 /*
5061  * pgbuf_initialize_lock_table () - Initializes page buffer lock table
5062  * return: NO_ERROR, or ER_code
5063  */
5064 static int
5066 {
5067  size_t i;
5068  size_t thrd_num_total;
5069  size_t alloc_size;
5070 
5071  /* allocate memory space for the buffer lock table */
5072  thrd_num_total = thread_num_total_threads ();
5073 #if defined(SERVER_MODE)
5074  assert ((int) thrd_num_total > MAX_NTRANS * 2);
5075 #else /* !SERVER_MODE */
5076  assert (thrd_num_total == 1);
5077 #endif /* !SERVER_MODE */
5078 
5079  alloc_size = thrd_num_total * PGBUF_BUFFER_LOCK_SIZEOF;
5080  pgbuf_Pool.buf_lock_table = (PGBUF_BUFFER_LOCK *) malloc (alloc_size);
5081  if (pgbuf_Pool.buf_lock_table == NULL)
5082  {
5084  return ER_OUT_OF_VIRTUAL_MEMORY;
5085  }
5086 
5087  /* initialize each entry of the buffer lock table */
5088  for (i = 0; i < thrd_num_total; i++)
5089  {
5090  VPID_SET_NULL (&pgbuf_Pool.buf_lock_table[i].vpid);
5091  pgbuf_Pool.buf_lock_table[i].lock_next = NULL;
5092 #if defined(SERVER_MODE)
5093  pgbuf_Pool.buf_lock_table[i].next_wait_thrd = NULL;
5094 #endif /* SERVER_MODE */
5095  }
5096 
5097  return NO_ERROR;
5098 }
5099 
5100 /*
5101  * pgbuf_initialize_lru_list () - Initializes the page buffer LRU list
5102  * return: NO_ERROR, or ER_code
5103  */
5104 static int
5106 {
5107  int i;
5108 
5109  /* set the number of LRU lists */
5111  if (pgbuf_Pool.num_LRU_list == 0)
5112  {
5113  /* Default value of shared lists : # of transactions */
5114  pgbuf_Pool.num_LRU_list = (int) MAX_NTRANS;
5115  assert (pgbuf_Pool.num_LRU_list > 0);
5116 
5117  if (pgbuf_Pool.num_buffers / pgbuf_Pool.num_LRU_list < PGBUF_MIN_PAGES_IN_SHARED_LIST)
5118  {
5119  pgbuf_Pool.num_LRU_list = pgbuf_Pool.num_buffers / PGBUF_MIN_PAGES_IN_SHARED_LIST;
5120  }
5121 
5122  /* should have at least 4 shared LRUs */
5123  pgbuf_Pool.num_LRU_list = MAX (pgbuf_Pool.num_LRU_list, 4);
5124  }
5125 
5126  /* allocate memory space for the page buffer LRU lists */
5128  if (pgbuf_Pool.buf_LRU_list == NULL)
5129  {
5131  (PGBUF_TOTAL_LRU_COUNT * PGBUF_LRU_LIST_SIZEOF));
5132  return ER_OUT_OF_VIRTUAL_MEMORY;
5133  }
5134 
5135  /* initialize the page buffer LRU lists */
5136  for (i = 0; i < PGBUF_TOTAL_LRU_COUNT; i++)
5137  {
5138  pgbuf_Pool.buf_LRU_list[i].index = i;
5139 
5140  pthread_mutex_init (&pgbuf_Pool.buf_LRU_list[i].mutex, NULL);
5141  pgbuf_Pool.buf_LRU_list[i].top = NULL;
5142  pgbuf_Pool.buf_LRU_list[i].bottom = NULL;
5143  pgbuf_Pool.buf_LRU_list[i].bottom_1 = NULL;
5144  pgbuf_Pool.buf_LRU_list[i].bottom_2 = NULL;
5145  pgbuf_Pool.buf_LRU_list[i].count_lru1 = 0;
5146  pgbuf_Pool.buf_LRU_list[i].count_lru2 = 0;
5147  pgbuf_Pool.buf_LRU_list[i].count_lru3 = 0;
5148  pgbuf_Pool.buf_LRU_list[i].count_vict_cand = 0;
5149  pgbuf_Pool.buf_LRU_list[i].victim_hint = NULL;
5150  pgbuf_Pool.buf_LRU_list[i].tick_list = 0;
5151  pgbuf_Pool.buf_LRU_list[i].tick_lru3 = 0;
5152 
5153  pgbuf_Pool.buf_LRU_list[i].threshold_lru1 = 0;
5154  pgbuf_Pool.buf_LRU_list[i].threshold_lru2 = 0;
5155  pgbuf_Pool.buf_LRU_list[i].quota = 0;
5156 
5157  pgbuf_Pool.buf_LRU_list[i].flags = 0;
5158  }
5159 
5160  return NO_ERROR;
5161 }
5162 
5163 /*
5164  * pgbuf_initialize_aout_list () - initialize the Aout list
5165  * return : error code or NO_ERROR
5166  */
5167 static int
5169 {
5170 /* limit Aout size to equivalent of 512M */
5171 #define PGBUF_LIMIT_AOUT_BUFFERS 32768
5172  int i;
5173  float aout_ratio;
5174  size_t alloc_size = 0;
5175  PGBUF_AOUT_LIST *list = &pgbuf_Pool.buf_AOUT_list;
5176 
5178 
5179  list->max_count = (int) (pgbuf_Pool.num_buffers * aout_ratio);
5180  list->Aout_top = NULL;
5181  list->Aout_bottom = NULL;
5182  list->bufarray = NULL;
5183  list->aout_buf_ht = NULL;
5184 
5185  pthread_mutex_init (&list->Aout_mutex, NULL);
5186 
5187  if (aout_ratio <= 0)
5188  {
5189  /* not using Aout list */
5190  list->max_count = 0;
5191  return NO_ERROR;
5192  }
5193 
5194  list->max_count = MIN (list->max_count, PGBUF_LIMIT_AOUT_BUFFERS);
5195  alloc_size = list->max_count * sizeof (PGBUF_AOUT_BUF);
5196 
5197  list->bufarray = (PGBUF_AOUT_BUF *) malloc (alloc_size);
5198  if (list->bufarray == NULL)
5199  {
5201  return ER_OUT_OF_VIRTUAL_MEMORY;
5202  }
5203 
5204  list->Aout_free = &list->bufarray[0];
5205 
5206  for (i = 0; i < list->max_count; i++)
5207  {
5208  VPID_SET_NULL (&list->bufarray[i].vpid);
5210  if (i != list->max_count - 1)
5211  {
5212  list->bufarray[i].next = &list->bufarray[i + 1];
5213  }
5214  else
5215  {
5216  list->bufarray[i].next = NULL;
5217  }
5218  list->bufarray[i].prev = NULL;
5219  }
5220 
5221  list->num_hashes = MAX (list->max_count / AOUT_HASH_DIVIDE_RATIO, 1);
5222 
5223  alloc_size = list->num_hashes * sizeof (MHT_TABLE *);
5224  list->aout_buf_ht = (MHT_TABLE **) malloc (alloc_size);
5225  if (list->aout_buf_ht == NULL)
5226  {
5228  goto error_return;
5229  }
5230 
5231  memset (list->aout_buf_ht, 0, alloc_size);
5232 
5233  for (i = 0; i < list->num_hashes; i++)
5234  {
5235  list->aout_buf_ht[i] = mht_create ("PGBUF_AOUT_HASH", list->max_count, pgbuf_hash_vpid, pgbuf_compare_vpid);
5236 
5237  if (list->aout_buf_ht[i] == NULL)
5238  {
5239  goto error_return;
5240  }
5241  }
5242 
5243  return NO_ERROR;
5244 
5245 error_return:
5246  list->Aout_free = NULL;
5247  if (list->bufarray != NULL)
5248  {
5249  free_and_init (list->bufarray);
5250  }
5251 
5252  if (list->aout_buf_ht != NULL)
5253  {
5254  for (i = 0; list->aout_buf_ht[i] != NULL; i++)
5255  {
5256  mht_destroy (list->aout_buf_ht[i]);
5257  }
5258  free_and_init (list->aout_buf_ht);
5259  }
5260 
5261  pthread_mutex_destroy (&list->Aout_mutex);
5262 
5263  return ER_FAILED;
5264 #undef PGBUF_LIMIT_AOUT_BUFFERS
5265 }
5266 
5267 /*
5268  * pgbuf_initialize_invalid_list () - Initializes the page buffer invalid list
5269  * return: NO_ERROR
5270  */
5271 static int
5273 {
5274  /* initialize the invalid BCB list */
5275  pthread_mutex_init (&pgbuf_Pool.buf_invalid_list.invalid_mutex, NULL);
5277  pgbuf_Pool.buf_invalid_list.invalid_cnt = pgbuf_Pool.num_buffers;
5278 
5279  return NO_ERROR;
5280 }
5281 
5282 /*
5283  * pgbuf_initialize_thrd_holder () -
5284  * return: NO_ERROR, or ER_code
5285  */
5286 static int
5288 {
5289  size_t thrd_num_total;
5290  size_t alloc_size;
5291  size_t i, j, idx;
5292 
5293  thrd_num_total = thread_num_total_threads ();
5294 #if defined(SERVER_MODE)
5295  assert ((int) thrd_num_total > MAX_NTRANS * 2);
5296 #else /* !SERVER_MODE */
5297  assert (thrd_num_total == 1);
5298 #endif /* !SERVER_MODE */
5299 
5300  pgbuf_Pool.thrd_holder_info = (PGBUF_HOLDER_ANCHOR *) malloc (thrd_num_total * PGBUF_HOLDER_ANCHOR_SIZEOF);
5301  if (pgbuf_Pool.thrd_holder_info == NULL)
5302  {
5304  thrd_num_total * PGBUF_HOLDER_ANCHOR_SIZEOF);
5305  return ER_OUT_OF_VIRTUAL_MEMORY;
5306  }
5307 
5308  /* phase 1: allocate memory space that is used for BCB holder entries */
5309  alloc_size = thrd_num_total * PGBUF_DEFAULT_FIX_COUNT * PGBUF_HOLDER_SIZEOF;
5310  pgbuf_Pool.thrd_reserved_holder = (PGBUF_HOLDER *) malloc (alloc_size);
5311  if (pgbuf_Pool.thrd_reserved_holder == NULL)
5312  {
5314  return ER_OUT_OF_VIRTUAL_MEMORY;
5315  }
5316 
5317  /* phase 2: initialize all the BCB holder entries */
5318 
5319  /*
5320  * Each thread has both free holder list and used(held) holder list.
5321  * The free holder list of each thread is initialized to
5322  * have PGBUF_DEFAULT_FIX_COUNT entries and the used holder list of
5323  * each thread is initialized to have no entry.
5324  */
5325  for (i = 0; i < thrd_num_total; i++)
5326  {
5327  pgbuf_Pool.thrd_holder_info[i].num_hold_cnt = 0;
5329  pgbuf_Pool.thrd_holder_info[i].thrd_hold_list = NULL;
5331 
5332  for (j = 0; j < PGBUF_DEFAULT_FIX_COUNT; j++)
5333  {
5334  idx = (i * PGBUF_DEFAULT_FIX_COUNT) + j;
5335  pgbuf_Pool.thrd_reserved_holder[idx].fix_count = 0;
5336  pgbuf_Pool.thrd_reserved_holder[idx].bufptr = NULL;
5337  pgbuf_Pool.thrd_reserved_holder[idx].thrd_link = NULL;
5338  INIT_HOLDER_STAT (&(pgbuf_Pool.thrd_reserved_holder[idx].perf_stat));
5339  pgbuf_Pool.thrd_reserved_holder[idx].first_watcher = NULL;
5340  pgbuf_Pool.thrd_reserved_holder[idx].last_watcher = NULL;
5341  pgbuf_Pool.thrd_reserved_holder[idx].watch_count = 0;
5342 
5343  if (j == (PGBUF_DEFAULT_FIX_COUNT - 1))
5344  {
5345  pgbuf_Pool.thrd_reserved_holder[idx].next_holder = NULL;
5346  }
5347  else
5348  {
5349  pgbuf_Pool.thrd_reserved_holder[idx].next_holder = &(pgbuf_Pool.thrd_reserved_holder[idx + 1]);
5350  }
5351  }
5352  }
5353 
5354  /* phase 3: initialize free BCB holder list shared by all threads */
5355  pthread_mutex_init (&pgbuf_Pool.free_holder_set_mutex, NULL);
5356  pgbuf_Pool.free_holder_set = NULL;
5357  pgbuf_Pool.free_index = -1; /* -1 means that there is no free holder entry */
5358 
5359  return NO_ERROR;
5360 }
5361 
5362 /*
5363  * pgbuf_allocate_thrd_holder_entry () - Allocates one buffer holder entry
5364  * from the free holder list of given thread
5365  * return: pointer to holder entry or NULL
5366  *
5367  * Note: If the free holder list is empty,
5368  * allocate it from the list of free holder arrays that is shared.
5369  */
5372 {
5373  int thrd_index;
5374  PGBUF_HOLDER_ANCHOR *thrd_holder_info;
5375  PGBUF_HOLDER *holder;
5376  PGBUF_HOLDER_SET *holder_set;
5377 #if defined(SERVER_MODE)
5378  int rv;
5379 #endif /* SERVER_MODE */
5380 
5381  thrd_index = thread_get_entry_index (thread_p);
5382 
5383  thrd_holder_info = &(pgbuf_Pool.thrd_holder_info[thrd_index]);
5384 
5385  if (thrd_holder_info->thrd_free_list != NULL)
5386  {
5387  /* allocate a BCB holder entry from the free BCB holder list of given thread */
5388  holder = thrd_holder_info->thrd_free_list;
5389  thrd_holder_info->thrd_free_list = holder->next_holder;
5390  thrd_holder_info->num_free_cnt -= 1;
5391  }
5392  else
5393  {
5394  /* holder == NULL : free BCB holder list is empty */
5395 
5396  /* allocate a BCB holder entry from the free BCB holder list shared by all threads. */
5397  rv = pthread_mutex_lock (&pgbuf_Pool.free_holder_set_mutex);
5398  if (pgbuf_Pool.free_index == -1)
5399  {
5400  /* no usable free holder entry */
5401  /* expand the free BCB holder list shared by threads */
5402  holder_set = (PGBUF_HOLDER_SET *) malloc (PGBUF_HOLDER_SET_SIZEOF);
5403  if (holder_set == NULL)
5404  {
5405  /* This situation must not be occurred. */
5406  assert (false);
5407  pthread_mutex_unlock (&pgbuf_Pool.free_holder_set_mutex);
5409  return NULL;
5410  }
5411 
5412  holder_set->next_set = pgbuf_Pool.free_holder_set;
5413  pgbuf_Pool.free_holder_set = holder_set;
5414  pgbuf_Pool.free_index = 0;
5415  }
5416 
5417  holder = &(pgbuf_Pool.free_holder_set->element[pgbuf_Pool.free_index]);
5418  pgbuf_Pool.free_index += 1;
5419 
5420  if (pgbuf_Pool.free_index == PGBUF_NUM_ALLOC_HOLDER)
5421  {
5422  pgbuf_Pool.free_index = -1;
5423  }
5424  pthread_mutex_unlock (&pgbuf_Pool.free_holder_set_mutex);
5425 
5426  /* initialize the newly allocated BCB holder entry */
5427  holder->thrd_link = NULL;
5428  }
5429 
5430  holder->next_holder = NULL; /* disconnect from free BCB holder list */
5431 
5432  /* connect the BCB holder entry at the head of thread's holder list */
5433  holder->thrd_link = thrd_holder_info->thrd_hold_list;
5434  thrd_holder_info->thrd_hold_list = holder;
5435  thrd_holder_info->num_hold_cnt += 1;
5436 
5437  holder->first_watcher = NULL;
5438  holder->last_watcher = NULL;
5439  holder->watch_count = 0;
5440 
5441  return holder;
5442 }
5443 
5444 /*
5445  * pgbuf_find_thrd_holder () - Find the holder entry of current thread on the BCB holder list of given BCB
5446  * return: pointer to holder entry or NULL
5447  * bufptr(in):
5448  */
5451 {
5452  int thrd_index;
5453  PGBUF_HOLDER *holder;
5454 
5455  assert (bufptr != NULL);
5456 
5457  thrd_index = thread_get_entry_index (thread_p);
5458 
5459  /* For each BCB holder entry of thread's holder list */
5460  holder = pgbuf_Pool.thrd_holder_info[thrd_index].thrd_hold_list;
5461 
5462  while (holder != NULL)
5463  {
5464  assert (holder->next_holder == NULL);
5465 
5466  if (holder->bufptr == bufptr)
5467  {
5468  break; /* found */
5469  }
5470 
5471  holder = holder->thrd_link;
5472  }
5473 
5474  return holder;
5475 }
5476 
5477 /*
5478  * pgbuf_unlatch_thrd_holder () - decrements fix_count by one to the holder entry of current thread on the BCB holder
5479  * list of given BCB
5480  * return: pointer to holder entry or NULL
5481  * bufptr(in):
5482  */
5483 STATIC_INLINE int
5484 pgbuf_unlatch_thrd_holder (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, PGBUF_HOLDER_STAT * holder_perf_stat_p)
5485 {
5486  int err = NO_ERROR;
5487  PGBUF_HOLDER *holder;
5488  PAGE_PTR pgptr;
5489 
5490  assert (bufptr != NULL);
5491 
5492  CAST_BFPTR_TO_PGPTR (pgptr, bufptr);
5493 
5494  holder = pgbuf_find_thrd_holder (thread_p, bufptr);
5495  if (holder == NULL)
5496  {
5497  /* This situation must not be occurred. */
5498  assert (false);
5499  err = ER_PB_UNFIXED_PAGEPTR;
5500  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, err, 3, pgptr, bufptr->vpid.pageid,
5501  fileio_get_volume_label (bufptr->vpid.volid, PEEK));
5502 
5503  goto exit_on_error;
5504  }
5505 
5506  if (holder_perf_stat_p != NULL)
5507  {
5508  *holder_perf_stat_p = holder->perf_stat;
5509  }
5510 
5511  holder->fix_count--;
5512 
5513  if (holder->fix_count == 0)
5514  {
5515  /* remove its own BCB holder entry */
5516  if (pgbuf_remove_thrd_holder (thread_p, holder) != NO_ERROR)
5517  {
5518  /* This situation must not be occurred. */
5519  assert (false);
5520  err = ER_PB_UNFIXED_PAGEPTR;
5521  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, err, 3, pgptr, bufptr->vpid.pageid,
5522  fileio_get_volume_label (bufptr->vpid.volid, PEEK));
5523 
5524  goto exit_on_error;
5525  }
5526  }
5527 
5528  assert (err == NO_ERROR);
5529 
5530 exit_on_error:
5531 
5532  return err;
5533 }
5534 
5535 /*
5536  * pgbuf_remove_thrd_holder () - Remove holder entry from given BCB
5537  * return: NO_ERROR, or ER_code
5538  * holder(in): pointer to holder entry to be removed
5539  *
5540  * Note: This function removes the given holder entry from the holder list of
5541  * given BCB, and then connect it to the free holder list of the
5542  * corresponding thread.
5543  */
5544 STATIC_INLINE int
5546 {
5547  int err = NO_ERROR;
5548  int thrd_index;
5549  PGBUF_HOLDER_ANCHOR *thrd_holder_info;
5550  PGBUF_HOLDER *prev;
5551  int found;
5552 
5553  assert (holder != NULL);
5554  assert (holder->fix_count == 0);
5555 
5556  assert (holder->watch_count == 0);
5557 
5558  /* holder->fix_count is always set to some meaningful value when the holder entry is allocated for use. So, at this
5559  * time, we do not need to initialize it. connect the BCB holder entry into free BCB holder list of given thread. */
5560 
5561  thrd_index = thread_get_entry_index (thread_p);
5562 
5563  thrd_holder_info = &(pgbuf_Pool.thrd_holder_info[thrd_index]);
5564 
5565  holder->next_holder = thrd_holder_info->thrd_free_list;
5566  thrd_holder_info->thrd_free_list = holder;
5567  thrd_holder_info->num_free_cnt += 1;
5568 
5569  /* remove the BCB holder entry from thread's holder list */
5570  if (thrd_holder_info->thrd_hold_list == NULL)
5571  {
5572  /* This situation must not be occurred. */
5573  assert (false);
5574  err = ER_FAILED;
5575  goto exit_on_error;
5576  }
5577 
5578  if (thrd_holder_info->thrd_hold_list == (PGBUF_HOLDER *) holder)
5579  {
5580  thrd_holder_info->thrd_hold_list = holder->thrd_link;
5581  }
5582  else
5583  {
5584  found = false;
5585  prev = thrd_holder_info->thrd_hold_list;
5586 
5587  while (prev->thrd_link != NULL)
5588  {
5589  assert (prev->next_holder == NULL);
5590  if (prev->thrd_link == (PGBUF_HOLDER *) holder)
5591  {
5592  prev->thrd_link = holder->thrd_link;
5593  holder->thrd_link = NULL;
5594  found = true;
5595  break;
5596  }
5597  prev = prev->thrd_link;
5598  }
5599 
5600  if (found == false)
5601  {
5602  /* This situation must not be occurred. */
5603  assert (false);
5604  err = ER_FAILED;
5605  goto exit_on_error;
5606  }
5607  }
5608 
5609  thrd_holder_info->num_hold_cnt -= 1;
5610 
5611  assert (err == NO_ERROR);
5612 
5613 exit_on_error:
5614 
5615  return err;
5616 }
5617 
5618 static int
5619 pgbuf_latch_idle_page (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, PGBUF_LATCH_MODE request_mode)
5620 {
5621  PGBUF_HOLDER *holder = NULL;
5622  bool buf_is_dirty;
5623 
5624  buf_is_dirty = pgbuf_bcb_is_dirty (bufptr);
5625 
5626  bufptr->latch_mode = request_mode;
5627  bufptr->fcnt = 1;
5628 
5629  PGBUF_BCB_UNLOCK (bufptr);
5630 
5631  /* allocate a BCB holder entry */
5632 
5633  assert (pgbuf_find_thrd_holder (thread_p, bufptr) == NULL);
5634 
5635  holder = pgbuf_allocate_thrd_holder_entry (thread_p);
5636  if (holder == NULL)
5637  {
5638  /* This situation must not be occurred. */
5639  assert (false);
5640  return ER_FAILED;
5641  }
5642 
5643  holder->fix_count = 1;
5644  holder->bufptr = bufptr;
5645  holder->perf_stat.dirtied_by_holder = 0;
5646  if (request_mode == PGBUF_LATCH_WRITE)
5647  {
5648  holder->perf_stat.hold_has_write_latch = 1;
5649  holder->perf_stat.hold_has_read_latch = 0;
5650  }
5651  else
5652  {
5653  holder->perf_stat.hold_has_read_latch = 1;
5654  holder->perf_stat.hold_has_write_latch = 0;
5655  }
5656  holder->perf_stat.dirty_before_hold = buf_is_dirty;
5657 
5658  return NO_ERROR;
5659 }
5660 
5661 /*
5662  * pgbuf_latch_bcb_upon_fix () -
5663  * return: NO_ERROR, or ER_code
5664  * bufptr(in):
5665  * request_mode(in):
5666  * buf_lock_acquired(in):
5667  * condition(in):
5668  *
5669  * Note: This function latches BCB with latch mode LatchMode as far as
5670  * LatchMode is compatible with bcb->LatchMode and there is not any
5671  * blocked reader or writer.
5672  * If it cannot latch the BCB right away,
5673  * (1) in case of conditional request,
5674  * release mutex and return eERROR.
5675  * (2) in case of unconditional request, add thread on the
5676  * BCB queue and release mutex and block the thread.
5677  * In any case, if LeafLatchMode is not NO_LATCH and the PageType
5678  * of the page that BCB points is P_BPLEAF, latch BCB with latch
5679  * mode LeafLatchMode.
5680  */
5681 STATIC_INLINE int
5683  int buf_lock_acquired, PGBUF_LATCH_CONDITION condition, bool * is_latch_wait)
5684 {
5685  PGBUF_HOLDER *holder = NULL;
5686  int request_fcnt = 1;
5687  bool is_page_idle;
5688  bool buf_is_dirty;
5689 
5690  /* parameter validation */
5691  assert (request_mode == PGBUF_LATCH_READ || request_mode == PGBUF_LATCH_WRITE);
5692  assert (condition == PGBUF_UNCONDITIONAL_LATCH || condition == PGBUF_CONDITIONAL_LATCH);
5693  assert (is_latch_wait != NULL);
5694 
5695  *is_latch_wait = false;
5696 
5697  buf_is_dirty = pgbuf_bcb_is_dirty (bufptr);
5698 
5699  /* the caller is holding bufptr->mutex */
5700  is_page_idle = false;
5701  if (buf_lock_acquired || bufptr->latch_mode == PGBUF_NO_LATCH)
5702  {
5703  is_page_idle = true;
5704  }
5705 #if defined (SA_MODE)
5706  else
5707  {
5708  holder = pgbuf_find_thrd_holder (thread_p, bufptr);
5709  if (holder == NULL)
5710  {
5711  /* It means bufptr->latch_mode was leaked by the previous holder, since there should be no user except me in
5712  * SA_MODE. */
5713  assert (0);
5714  is_page_idle = true;
5715  }
5716  }
5717 #endif
5718 
5719  if (is_page_idle == true)
5720  {
5721  return pgbuf_latch_idle_page (thread_p, bufptr, request_mode);
5722  }
5723 
5724  if (request_mode == PGBUF_LATCH_READ && bufptr->latch_mode == PGBUF_LATCH_READ)
5725  {
5726  if (pgbuf_is_exist_blocked_reader_writer (bufptr) == false)
5727  {
5728  /* there is not any blocked reader/writer. */
5729  /* grant the request */
5730 
5731  /* increment the fix count */
5732  bufptr->fcnt++;
5733  assert (0 < bufptr->fcnt);
5734 
5735  PGBUF_BCB_UNLOCK (bufptr);
5736 
5737  /* allocate a BCB holder entry */
5738 
5739  holder = pgbuf_find_thrd_holder (thread_p, bufptr);
5740  if (holder != NULL)
5741  {
5742  /* the caller is the holder of the buffer page */
5743  holder->fix_count++;
5744  /* holder->dirty_before_holder not changed */
5745  if (request_mode == PGBUF_LATCH_WRITE)
5746  {
5747  holder->perf_stat.hold_has_write_latch = 1;
5748  }
5749  else
5750  {
5751  holder->perf_stat.hold_has_read_latch = 1;
5752  }
5753  }
5754 #if defined(SERVER_MODE)
5755  else
5756  {
5757  /* the caller is not the holder of the buffer page */
5758  /* allocate a BCB holder entry */
5759  holder = pgbuf_allocate_thrd_holder_entry (thread_p);
5760  if (holder == NULL)
5761  {
5762  /* This situation must not be occurred. */
5763  assert (false);
5764  return ER_FAILED;
5765  }
5766 
5767  holder->fix_count = 1;
5768  holder->bufptr = bufptr;
5769  if (request_mode == PGBUF_LATCH_WRITE)
5770  {
5771  holder->perf_stat.hold_has_write_latch = 1;
5772  holder->perf_stat.hold_has_read_latch = 0;
5773  }
5774  else
5775  {
5776  holder->perf_stat.hold_has_read_latch = 1;
5777  holder->perf_stat.hold_has_write_latch = 0;
5778  }
5779  holder->perf_stat.dirtied_by_holder = 0;
5780  holder->perf_stat.dirty_before_hold = buf_is_dirty;
5781  }
5782 #endif /* SERVER_MODE */
5783 
5784  return NO_ERROR;
5785  }
5786 
5787 #if defined (SA_MODE)
5788  /* It is impossible to have a blocked waiter under SA_MODE. */
5789  assert (0);
5790 #endif /* SA_MODE */
5791 
5792  /* at here, there is some blocked reader/writer. */
5793 
5794  holder = pgbuf_find_thrd_holder (thread_p, bufptr);
5795  if (holder == NULL)
5796  {
5797  /* in case that the caller is not the holder */
5798  goto do_block;
5799  }
5800 
5801  /* in case that the caller is the holder */
5802  bufptr->fcnt++;
5803  assert (0 < bufptr->fcnt);
5804 
5805  PGBUF_BCB_UNLOCK (bufptr);
5806 
5807  /* set BCB holder entry */
5808 
5809  holder->fix_count++;
5810  /* holder->dirty_before_holder not changed */
5811  if (request_mode == PGBUF_LATCH_WRITE)
5812  {
5813  holder->perf_stat.hold_has_write_latch = 1;
5814  }
5815  else
5816  {
5817  holder->perf_stat.hold_has_read_latch = 1;
5818  }
5819 
5820  return NO_ERROR;
5821  }
5822 
5823  holder = pgbuf_find_thrd_holder (thread_p, bufptr);
5824  if (holder == NULL)
5825  {
5826  /* in case that the caller is not the holder */
5827 #if defined (SA_MODE)
5828  assert (0);
5829 #endif
5830  goto do_block;
5831  }
5832 
5833  /* in case that the caller is holder */
5834 
5835  if (bufptr->latch_mode != PGBUF_LATCH_WRITE)
5836  {
5837  /* check iff nested write mode fix */
5838  assert_release (request_mode != PGBUF_LATCH_WRITE);
5839 
5840 #if !defined(NDEBUG)
5841  if (request_mode == PGBUF_LATCH_WRITE)
5842  {
5843  /* This situation must not be occurred. */
5844  assert (false);
5845 
5846  PGBUF_BCB_UNLOCK (bufptr);
5847 
5848  return ER_FAILED;
5849  }
5850 #endif
5851  }
5852 
5853  if (bufptr->latch_mode == PGBUF_LATCH_WRITE)
5854  { /* only the holder */
5855  assert (bufptr->fcnt == holder->fix_count);
5856 
5857  bufptr->fcnt++;
5858  assert (0 < bufptr->fcnt);
5859 
5860  PGBUF_BCB_UNLOCK (bufptr);
5861 
5862  /* set BCB holder entry */
5863 
5864  holder->fix_count++;
5865  /* holder->dirty_before_holder not changed */
5866  if (request_mode == PGBUF_LATCH_WRITE)
5867  {
5868  holder->perf_stat.hold_has_write_latch = 1;
5869  }
5870  else
5871  {
5872  holder->perf_stat.hold_has_read_latch = 1;
5873  }
5874 
5875  return NO_ERROR;
5876  }
5877  else if (bufptr->latch_mode == PGBUF_LATCH_READ)
5878  {
5879 #if 0 /* TODO: do not delete me */
5880  assert (false);
5881 #endif
5882 
5883  assert (request_mode == PGBUF_LATCH_WRITE);
5884 
5885  if (bufptr->fcnt == holder->fix_count)
5886  {
5887  bufptr->latch_mode = request_mode; /* PGBUF_LATCH_WRITE */
5888  bufptr->fcnt++;
5889  assert (0 < bufptr->fcnt);
5890 
5891  PGBUF_BCB_UNLOCK (bufptr);
5892 
5893  /* set BCB holder entry */
5894 
5895  holder->fix_count++;
5896  /* holder->dirty_before_holder not changed */
5897  if (request_mode == PGBUF_LATCH_WRITE)
5898  {
5899  holder->perf_stat.hold_has_write_latch = 1;
5900  }
5901  else
5902  {
5903  holder->perf_stat.hold_has_read_latch = 1;
5904  }
5905 
5906  return NO_ERROR;
5907  }
5908 
5909  assert (bufptr->fcnt > holder->fix_count);
5910 
5911  if (condition == PGBUF_CONDITIONAL_LATCH)
5912  {
5913  goto do_block; /* will return immediately */
5914  }
5915 
5916  assert (request_fcnt == 1);
5917 
5918  request_fcnt += holder->fix_count;
5919  bufptr->fcnt -= holder->fix_count;
5920  holder->fix_count = 0;
5921 
5922  INIT_HOLDER_STAT (&holder->perf_stat);
5923 
5924  if (pgbuf_remove_thrd_holder (thread_p, holder) != NO_ERROR)
5925  {
5926  /* This situation must not be occurred. */
5927  assert (false);
5928 
5929  PGBUF_BCB_UNLOCK (bufptr);
5930 
5931  return ER_FAILED;
5932  }
5933 
5934  /* at here, goto do_block; */
5935  }
5936  else
5937  {
5938 #if 0 /* TODO: do not delete me */
5939  assert (false);
5940 #endif
5941 
5942  /* at here, goto do_block; */
5943  }
5944 
5945 do_block:
5946 
5947 #if defined (SA_MODE)
5948  assert (0);
5949 #endif
5950 
5951  if (condition == PGBUF_CONDITIONAL_LATCH)
5952  {
5953  /* reject the request */
5954  int tran_index;
5955  int wait_msec;
5956 
5957  tran_index = LOG_FIND_THREAD_TRAN_INDEX (thread_p);
5958  wait_msec = logtb_find_wait_msecs (tran_index);
5959 
5960  if (wait_msec == LK_ZERO_WAIT)
5961  {
5962  const char *client_prog_name; /* Client program name for tran */
5963  const char *client_user_name; /* Client user name for tran */
5964  const char *client_host_name; /* Client host for tran */
5965  int client_pid; /* Client process identifier for tran */
5966 
5967  /* setup timeout error, if wait_msec == LK_ZERO_WAIT */
5968 
5969  PGBUF_BCB_UNLOCK (bufptr);
5970 
5971  (void) logtb_find_client_name_host_pid (tran_index, &client_prog_name, &client_user_name, &client_host_name,
5972  &client_pid);
5973 
5974  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_LK_PAGE_TIMEOUT, 8, tran_index, client_user_name,
5975  client_host_name, client_pid, (request_mode == PGBUF_LATCH_READ ? "READ" : "WRITE"),
5976  bufptr->vpid.volid, bufptr->vpid.pageid, NULL);
5977  }
5978  else
5979  {
5980  PGBUF_BCB_UNLOCK (bufptr);
5981  }
5982 
5983  return ER_FAILED;
5984  }
5985  else
5986  {
5987  /* block the request */
5988 
5989  if (pgbuf_block_bcb (thread_p, bufptr, request_mode, request_fcnt, false) != NO_ERROR)
5990  {
5991  return ER_FAILED;
5992  }
5993  /* Above function released bufptr->mutex unconditionally */
5994 
5995  assert (pgbuf_find_thrd_holder (thread_p, bufptr) == NULL);
5996 
5997  holder = pgbuf_allocate_thrd_holder_entry (thread_p);
5998  if (holder == NULL)
5999  {
6000  /* This situation must not be occurred. */
6001  assert (false);
6002  return ER_FAILED;
6003  }
6004 
6005  /* set BCB holder entry */
6006  holder->fix_count = request_fcnt;
6007  holder->bufptr = bufptr;
6008  if (request_mode == PGBUF_LATCH_WRITE)
6009  {
6010  holder->perf_stat.hold_has_write_latch = 1;
6011  }
6012  else if (request_mode == PGBUF_LATCH_READ)
6013  {
6014  holder->perf_stat.hold_has_read_latch = 1;
6015  }
6016  holder->perf_stat.dirtied_by_holder = 0;
6017  holder->perf_stat.dirty_before_hold = buf_is_dirty;
6018  *is_latch_wait = true;
6019 
6020  return NO_ERROR;
6021  }
6022 }
6023 
6024 /*
6025  * pgbuf_unlatch_bcb_upon_unfix () - Unlatches BCB
6026  * return: NO_ERROR, or ER_code
6027  * bufptr(in):
6028  *
6029  * Note: It decrements FixCount by one.
6030  * If FixCount becomes 0,
6031  * (1) if LatchMode != FLUSH and LatchMode != VICTIM,
6032  * set LatchMode = NO_LATCH.
6033  * (2) if BCB waiting queue is empty and Wait is false,
6034  * replace the BCB to the top of LRU list.
6035  * If Flush_Request == TRUE,
6036  * set LatchMode = FLUSH,
6037  * flush the buffer by WAL protocol and wake up
6038  * threads on the BCB waiting queue.
6039  * If Flush_Request == FALSE
6040  * if LatchMode == NO_LATCH,
6041  * then, wake up the threads on the BCB waiting queue.
6042  * Before return, it releases BCB mutex.
6043  */
6044 STATIC_INLINE int
6045 pgbuf_unlatch_bcb_upon_unfix (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, int holder_status)
6046 {
6047  PAGE_PTR pgptr;
6048  int th_lru_idx;
6049  PGBUF_ZONE zone;
6050  int error_code = NO_ERROR;
6051 
6052  assert (holder_status == NO_ERROR);
6053 
6054  /* the caller is holding bufptr->mutex */
6055 
6056  assert (!VPID_ISNULL (&bufptr->vpid));
6057  assert (pgbuf_check_bcb_page_vpid (bufptr, false) == true);
6058 
6059  CAST_BFPTR_TO_PGPTR (pgptr, bufptr);
6060 
6061  /* decrement the fix count */
6062  bufptr->fcnt--;
6063  if (bufptr->fcnt < 0)
6064  {
6065  /* This situation must not be occurred. */
6066  assert (false);
6068  fileio_get_volume_label (bufptr->vpid.volid, PEEK));
6069  bufptr->fcnt = 0;
6070  }
6071 
6072  if (holder_status != NO_ERROR)
6073  {
6074  /* This situation must not be occurred. */
6075  assert (false);
6076  PGBUF_BCB_UNLOCK (bufptr);
6077  return ER_FAILED;
6078  }
6079 
6080  if (bufptr->fcnt == 0)
6081  {
6082  /* When oldest_unflush_lsa of a page is set, its dirty mark should also be set */
6083  assert (LSA_ISNULL (&bufptr->oldest_unflush_lsa) || pgbuf_bcb_is_dirty (bufptr));
6084 
6085  /* there could be some synchronous flushers on the BCB queue */
6086  /* When the page buffer in LRU_1_Zone, do not move the page buffer into the top of LRU. This is an intention for
6087  * performance. */
6089  {
6090  pgbuf_move_bcb_to_bottom_lru (thread_p, bufptr);
6091  }
6092  else if (pgbuf_is_exist_blocked_reader_writer (bufptr) == false)
6093  {
6094  ATOMIC_INC_32 (&pgbuf_Pool.monitor.pg_unfix_cnt, 1);
6095 
6096  if (PGBUF_THREAD_HAS_PRIVATE_LRU (thread_p))
6097  {
6099  }
6100  else
6101  {
6102  th_lru_idx = -1;
6103  }
6104 
6105  zone = pgbuf_bcb_get_zone (bufptr);
6106  switch (zone)
6107  {
6108  case PGBUF_VOID_ZONE:
6109  /* bcb was recently allocated. the case may vary from never being used (or almost never), to up to few
6110  * percent (when hit ratio is very low). in any case, this is not needed to be very optimized here,
6111  * so the code was moved outside unlatch... do not inline it */
6112  pgbuf_unlatch_void_zone_bcb (thread_p, bufptr, th_lru_idx);
6113  break;
6114 
6115  case PGBUF_LRU_1_ZONE:
6116  /* note: this is most often accessed code and must be highly optimized! */
6117  if (PGBUF_THREAD_SHOULD_IGNORE_UNFIX (thread_p))
6118  {
6119  /* do nothing */
6120  /* ... except collecting statistics */
6122  break;
6123  }
6124  if (pgbuf_should_move_private_to_shared (thread_p, bufptr, th_lru_idx))
6125  {
6126  /* move to shared */
6127  pgbuf_lru_move_from_private_to_shared (thread_p, bufptr);
6129  break;
6130  }
6131  /* do not move or boost */
6133  {
6135  }
6136  else
6137  {
6139  }
6141  break;
6142 
6143  case PGBUF_LRU_2_ZONE:
6144  /* this is the buffer zone between hot and victimized. is less hot than zone one and we allow boosting
6145  * (if bcb's are old enough). */
6146  if (PGBUF_THREAD_SHOULD_IGNORE_UNFIX (thread_p))
6147  {
6148  /* do nothing */
6149  /* ... except collecting statistics */
6151  break;
6152  }
6153  if (pgbuf_should_move_private_to_shared (thread_p, bufptr, th_lru_idx))
6154  {
6155  /* move to shared */
6156  pgbuf_lru_move_from_private_to_shared (thread_p, bufptr);
6158  break;
6159  }
6160  if (PGBUF_IS_BCB_OLD_ENOUGH (bufptr, pgbuf_lru_list_from_bcb (bufptr)))
6161  {
6162  /* boost */
6163  pgbuf_lru_boost_bcb (thread_p, bufptr);
6164  }
6165  else
6166  {
6167  /* bcb is too new to tell if it really deserves a boost */
6169  {
6171  }
6172  else
6173  {
6175  }
6176  }
6178  break;
6179 
6180  case PGBUF_LRU_3_ZONE:
6181  if (PGBUF_THREAD_SHOULD_IGNORE_UNFIX (thread_p))
6182  {
6183  if (!pgbuf_bcb_avoid_victim (bufptr) && pgbuf_assign_direct_victim (thread_p, bufptr))
6184  {
6185  /* assigned victim directly */
6187  {
6189  }
6190  }
6191  else
6192  {
6194  }
6195  break;
6196  }
6197  if (pgbuf_should_move_private_to_shared (thread_p, bufptr, th_lru_idx))
6198  {
6199  /* move to shared */
6200  pgbuf_lru_move_from_private_to_shared (thread_p, bufptr);
6202  break;
6203  }
6204  /* boost */
6205  pgbuf_lru_boost_bcb (thread_p, bufptr);
6207  break;
6208 
6209  default:
6210  /* unexpected */
6211  assert (false);
6212  break;
6213  }
6214  }
6215 
6216  bufptr->latch_mode = PGBUF_NO_LATCH;
6217 #if defined(SERVER_MODE)
6218  pgbuf_wakeup_reader_writer (thread_p, bufptr);
6219 #endif /* SERVER_MODE */
6220  }
6221 
6222  assert (bufptr->latch_mode != PGBUF_LATCH_FLUSH);
6223 
6224  if (pgbuf_bcb_is_async_flush_request (bufptr))
6225  {
6226  /* PGBUF_LATCH_READ is possible, when a reader and a flusher was blocked by a writer.
6227  * Blocked readers are already wakened by the ex-owner.
6228  */
6229  assert (bufptr->fcnt == 0 || bufptr->latch_mode == PGBUF_LATCH_WRITE || bufptr->latch_mode == PGBUF_LATCH_READ);
6230 
6231  /* we need to flush bcb. we won't need the bcb mutex afterwards */
6232  error_code = pgbuf_bcb_safe_flush_force_unlock (thread_p, bufptr, false);
6233  /* what to do with the error? we failed to flush it... */
6234  if (error_code != NO_ERROR)
6235  {
6236  er_clear ();
6237  error_code = NO_ERROR;
6238  }
6239  }
6240  else
6241  {
6242  PGBUF_BCB_UNLOCK (bufptr);
6243  }
6244 
6245  return NO_ERROR;
6246 }
6247 
6248 /*
6249  * pgbuf_unlatch_void_zone_bcb () - unlatch bcb that is currently in void zone.
6250  *
6251  * return : void
6252  * thread_p (in) : thread entry
6253  * bcb (in) : void zone bcb to unlatch
6254  * thread_private_lru_index (in) : thread's private lru index. -1 if thread does not have any private list.
6255  *
6256  * note: this is part of unlatch/unfix algorithm.
6257  */
6258 static void
6259 pgbuf_unlatch_void_zone_bcb (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, int thread_private_lru_index)
6260 {
6261  bool aout_enabled = false;
6262  int aout_list_id = PGBUF_AOUT_NOT_FOUND;
6263 
6265 
6266  if (pgbuf_Pool.buf_AOUT_list.max_count > 0)
6267  {
6268  aout_enabled = true;
6269  aout_list_id = pgbuf_remove_vpid_from_aout_list (thread_p, &bcb->vpid);
6270  }
6271 
6272  if (PGBUF_THREAD_SHOULD_IGNORE_UNFIX (thread_p))
6273  {
6274  /* we are not registering unfix for activity and we are not boosting or moving bcb's */
6275  if (aout_list_id == PGBUF_AOUT_NOT_FOUND)
6276  {
6278  }
6279  else
6280  {
6282  }
6283 
6284  /* can we feed direct victims? */
6285  if (!pgbuf_bcb_avoid_victim (bcb) && pgbuf_assign_direct_victim (thread_p, bcb))
6286  {
6287  /* assigned victim directly */
6289  {
6291  }
6292 
6293  /* add to AOUT */
6294  if (pgbuf_Pool.buf_AOUT_list.max_count > 0)
6295  {
6296  pgbuf_add_vpid_to_aout_list (thread_p, &bcb->vpid, aout_list_id);
6297  }
6298  return;
6299  }
6300 
6301  /* reset aout_list_id */
6302  aout_list_id = PGBUF_AOUT_NOT_FOUND;
6303  }
6304  else
6305  {
6306  if (aout_list_id == PGBUF_AOUT_NOT_FOUND)
6307  {
6309  }
6310  else
6311  {
6313  }
6314  }
6315 
6316  if (thread_private_lru_index != -1)
6317  {
6318  if (PGBUF_THREAD_SHOULD_IGNORE_UNFIX (thread_p))
6319  {
6320  /* add to top of current private list */
6321  pgbuf_lru_add_new_bcb_to_top (thread_p, bcb, thread_private_lru_index);
6323  return;
6324  }
6325 
6326  if (!aout_enabled || thread_private_lru_index == aout_list_id)
6327  {
6328  /* add to top of current private list */
6329  pgbuf_lru_add_new_bcb_to_top (thread_p, bcb, thread_private_lru_index);
6332  return;
6333  }
6334 
6335  if (aout_list_id == PGBUF_AOUT_NOT_FOUND)
6336  {
6337  /* add to middle of current private list */
6338  pgbuf_lru_add_new_bcb_to_middle (thread_p, bcb, thread_private_lru_index);
6341  return;
6342  }
6343 
6344  /* fall through to add to shared */
6345  }
6346  /* add to middle of shared list. */
6349  if (!PGBUF_THREAD_SHOULD_IGNORE_UNFIX (thread_p))
6350  {
6352  }
6353 }
6354 
6355 /*
6356  * pgbuf_should_move_private_to_shared () - return true if bcb belongs to private lru list and if should be moved to a
6357  * shared lru list.
6358  *
6359  * return : true if move from private to shared is needed.
6360  * thread_p (in) : thread entry
6361  * bcb (in) : bcb
6362  * thread_private_lru_index (in) : thread's private lru index. -1 if thread does not have any private list.
6363  */
6364 STATIC_INLINE bool
6365 pgbuf_should_move_private_to_shared (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, int thread_private_lru_index)
6366 {
6367  int bcb_lru_idx = pgbuf_bcb_get_lru_index (bcb);
6368 
6369  if (PGBUF_IS_SHARED_LRU_INDEX (bcb_lru_idx))
6370  {
6371  /* not a private list */
6372  return false;
6373  }
6374 
6375  /* two conditions to move from private to shared:
6376  * 1. bcb is fixed by more than one transaction.
6377  * 2. bcb is very hot and old enough. */
6378 
6379  /* cond 1 */
6380  if (thread_private_lru_index != bcb_lru_idx)
6381  {
6382  return true;
6383  }
6384  /* cond 2 */
6385  if (!pgbuf_bcb_is_hot (bcb))
6386  {
6387  /* not hot enough */
6388  return false;
6389  }
6390  if (!PGBUF_IS_BCB_OLD_ENOUGH (bcb, PGBUF_GET_LRU_LIST (bcb_lru_idx)))
6391  {
6392  /* not old enough */
6393  return false;
6394  }
6395  /* hot and old enough */
6396  return true;
6397 }
6398 
6399 /*
6400  * pgbuf_block_bcb () - Adds it on the BCB waiting queue and block thread
6401  * return: NO_ERROR, or ER_code
6402  * bufptr(in):
6403  * request_mode(in):
6404  * request_fcnt(in):
6405  * as_promote(in): if true, will wait as first promoter
6406  *
6407  * Note: Promoter will be the first waiter. Others will be appended to waiting queue.
6408  */
6409 static int
6410 pgbuf_block_bcb (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, PGBUF_LATCH_MODE request_mode, int request_fcnt,
6411  bool as_promote)
6412 {
6413 #if defined(SERVER_MODE)
6414  THREAD_ENTRY *cur_thrd_entry, *thrd_entry;
6415 
6416  /* caller is holding bufptr->mutex */
6417  /* request_mode == PGBUF_LATCH_READ/PGBUF_LATCH_WRITE/PGBUF_LATCH_FLUSH */
6418  assert (request_mode == PGBUF_LATCH_READ || request_mode == PGBUF_LATCH_WRITE || request_mode == PGBUF_LATCH_FLUSH);
6419 
6420  if (thread_p == NULL)
6421  {
6422  assert (thread_p != NULL);
6423  thread_p = thread_get_thread_entry_info ();
6424  }
6425 
6426  cur_thrd_entry = thread_p;
6427  cur_thrd_entry->request_latch_mode = request_mode;
6428  cur_thrd_entry->request_fix_count = request_fcnt; /* SPECIAL_NOTE */
6429 
6430  if (as_promote)
6431  {
6432  /* place cur_thrd_entry as first in BCB waiting queue */
6433 
6434  /* Safe guard: there can be only one promoter. */
6435  assert (bufptr->next_wait_thrd == NULL || !bufptr->next_wait_thrd->wait_for_latch_promote);
6436 
6437  cur_thrd_entry->next_wait_thrd = bufptr->next_wait_thrd;
6438  bufptr->next_wait_thrd = cur_thrd_entry;
6439  }
6440  else
6441  {
6442  /* append cur_thrd_entry to the BCB waiting queue */
6443  cur_thrd_entry->next_wait_thrd = NULL;
6444  thrd_entry = bufptr->next_wait_thrd;
6445  if (thrd_entry == NULL)
6446  {
6447  bufptr->next_wait_thrd = cur_thrd_entry;
6448  }
6449  else
6450  {
6451  while (thrd_entry->next_wait_thrd != NULL)
6452  {
6453  thrd_entry = thrd_entry->next_wait_thrd;
6454  }
6455  thrd_entry->next_wait_thrd = cur_thrd_entry;
6456  }
6457  }
6458 
6459  if (request_mode == PGBUF_LATCH_FLUSH)
6460  {
6461  /* is it safe to use infinite wait instead of timed sleep? */
6462  thread_lock_entry (cur_thrd_entry);
6463  PGBUF_BCB_UNLOCK (bufptr);
6465 
6466  if (cur_thrd_entry->resume_status != THREAD_PGBUF_RESUMED)
6467  {
6468  /* interrupt operation */
6469  THREAD_ENTRY *thrd_entry, *prev_thrd_entry = NULL;
6470 
6471  PGBUF_BCB_LOCK (bufptr);
6472  thrd_entry = bufptr->next_wait_thrd;
6473 
6474  while (thrd_entry != NULL)
6475  {
6476  if (thrd_entry == cur_thrd_entry)
6477  {
6478  if (prev_thrd_entry == NULL)
6479  {
6480  bufptr->next_wait_thrd = thrd_entry->next_wait_thrd;
6481  }
6482  else
6483  {
6484  prev_thrd_entry->next_wait_thrd = thrd_entry->next_wait_thrd;
6485  }
6486 
6487  thrd_entry->next_wait_thrd = NULL;
6488  PGBUF_BCB_UNLOCK (bufptr);
6489  return ER_FAILED;
6490  }
6491 
6492  prev_thrd_entry = thrd_entry;
6493  thrd_entry = thrd_entry->next_wait_thrd;
6494  }
6495  PGBUF_BCB_UNLOCK (bufptr);
6496  }
6497  }
6498  else
6499  {
6500  /*
6501  * We do not guarantee that there is no deadlock between page latches.
6502  * So, we made a decision that when read/write buffer fix request is
6503  * not granted immediately, block the request with timed sleep method.
6504  * That is, unless the request is not waken up by other threads within
6505  * some time interval, the request will be waken up by timeout.
6506  * When the request is waken up, the request is treated as a victim.
6507  */
6508  if (pgbuf_timed_sleep (thread_p, bufptr, cur_thrd_entry) != NO_ERROR)
6509  {
6510  return ER_FAILED;
6511  }
6512 
6513 #if !defined (NDEBUG)
6514  /* To hold mutex is not required because I hold the latch. This means at least my fix count is kept. */
6515  assert (0 < bufptr->fcnt);
6516 #endif
6517  }
6518 #endif /* SERVER_MODE */
6519 
6520  return NO_ERROR;
6521 }
6522 
6523 #if defined(SERVER_MODE)
6524 /*
6525  * pgbuf_timed_sleep_error_handling () -
6526  * return:
6527  * bufptr(in):
6528  * thrd_entry(in):
6529  */
6530 static int
6531 pgbuf_timed_sleep_error_handling (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, THREAD_ENTRY * thrd_entry)
6532 {
6533  THREAD_ENTRY *prev_thrd_entry;
6534  THREAD_ENTRY *curr_thrd_entry;
6535 
6536  PGBUF_BCB_LOCK (bufptr);
6537 
6538  /* case 1 : empty waiting queue */
6539  if (bufptr->next_wait_thrd == NULL)
6540  {
6541  /* The thread entry has been already removed from the BCB waiting queue by another thread. */
6542  return NO_ERROR;
6543  }
6544 
6545  /* case 2 : first waiting thread != thrd_entry */
6546  if (bufptr->next_wait_thrd != thrd_entry)
6547  {
6548  prev_thrd_entry = bufptr->next_wait_thrd;
6549  while (prev_thrd_entry->next_wait_thrd != NULL)
6550  {
6551  if (prev_thrd_entry->next_wait_thrd == thrd_entry)
6552  {
6553  prev_thrd_entry->next_wait_thrd = thrd_entry->next_wait_thrd;
6554  thrd_entry->next_wait_thrd = NULL;
6555  break;
6556  }
6557  prev_thrd_entry = prev_thrd_entry->next_wait_thrd;
6558  }
6559  return NO_ERROR;
6560  }
6561 
6562  /* case 3 : first waiting thread == thrd_entry */
6563  bufptr->next_wait_thrd = thrd_entry->next_wait_thrd;
6564  thrd_entry->next_wait_thrd = NULL;
6565  while (bufptr->next_wait_thrd != NULL)
6566  {
6567  curr_thrd_entry = bufptr->next_wait_thrd;
6568  if (bufptr->latch_mode == PGBUF_LATCH_READ && curr_thrd_entry->request_latch_mode == PGBUF_LATCH_READ)
6569  {
6570  /* grant the request */
6571  thread_lock_entry (curr_thrd_entry);
6572  if (curr_thrd_entry->request_latch_mode == PGBUF_LATCH_READ)
6573  {
6574  bufptr->fcnt += curr_thrd_entry->request_fix_count;
6575 
6576  /* do not handle BCB holder entry, at here. refer pgbuf_latch_bcb_upon_fix () */
6577 
6578  /* remove thrd_entry from BCB waiting queue. */
6579  bufptr->next_wait_thrd = curr_thrd_entry->next_wait_thrd;
6580  curr_thrd_entry->next_wait_thrd = NULL;
6581 
6582  /* wake up the thread */
6583  pgbuf_wakeup (curr_thrd_entry);
6584  }
6585  else
6586  {
6587  thread_unlock_entry (curr_thrd_entry);
6588  break;
6589  }
6590  }
6591  else
6592  {
6593  break;
6594  }
6595  }
6596 
6597  return NO_ERROR;
6598 }
6599 
6600 /*
6601  * pgbuf_timed_sleep () -
6602  * return: NO_ERROR, or ER_code
6603  * bufptr(in):
6604  * thrd_entry(in):
6605  */
6606 static int
6607 pgbuf_timed_sleep (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, THREAD_ENTRY * thrd_entry)
6608 {
6609  int r;
6610  struct timespec to;
6611  int wait_secs;
6612  int old_wait_msecs;
6613  int save_request_latch_mode;
6614  const char *client_prog_name; /* Client program name for trans */
6615  const char *client_user_name; /* Client user name for tran */
6616  const char *client_host_name; /* Client host for tran */
6617  int client_pid; /* Client process identifier for tran */
6618 
6619  TSC_TICKS start_tick, end_tick;
6620  TSCTIMEVAL tv_diff;
6621 
6622  /* After holding the mutex associated with conditional variable, release the bufptr->mutex. */
6623  thread_lock_entry (thrd_entry);
6624  PGBUF_BCB_UNLOCK (bufptr);
6625 
6626  old_wait_msecs = wait_secs = pgbuf_find_current_wait_msecs (thread_p);
6627 
6628  assert (wait_secs == LK_INFINITE_WAIT || wait_secs == LK_ZERO_WAIT || wait_secs == LK_FORCE_ZERO_WAIT
6629  || wait_secs > 0);
6630 
6631  if (wait_secs == LK_ZERO_WAIT || wait_secs == LK_FORCE_ZERO_WAIT)
6632  {
6633  wait_secs = 0;
6634  }
6635  else
6636  {
6637  wait_secs = PGBUF_TIMEOUT;
6638  }
6639 
6640 try_again:
6641  to.tv_sec = (int) time (NULL) + wait_secs;
6642  to.tv_nsec = 0;
6643 
6644  if (thrd_entry->event_stats.trace_slow_query == true)
6645  {
6646  tsc_getticks (&start_tick);
6647  }
6648 
6649  thrd_entry->resume_status = THREAD_PGBUF_SUSPENDED;
6650  r = pthread_cond_timedwait (&thrd_entry->wakeup_cond, &thrd_entry->th_entry_lock, &to);
6651 
6652  if (thrd_entry->event_stats.trace_slow_query == true)
6653  {
6654  tsc_getticks (&end_tick);
6655  tsc_elapsed_time_usec (&tv_diff, end_tick, start_tick);
6656  TSC_ADD_TIMEVAL (thrd_entry->event_stats.latch_waits, tv_diff);
6657  }
6658 
6659  if (r == 0)
6660  {
6661  /* someone wakes up me */
6662  if (thrd_entry->resume_status == THREAD_PGBUF_RESUMED)
6663  {
6664  thread_unlock_entry (thrd_entry);
6665  return NO_ERROR;
6666  }
6667 
6668  /* interrupt operation */
6669  thrd_entry->request_latch_mode = PGBUF_NO_LATCH;
6670  thrd_entry->resume_status = THREAD_PGBUF_RESUMED;
6671  thread_unlock_entry (thrd_entry);
6672 
6673  if (pgbuf_timed_sleep_error_handling (thread_p, bufptr, thrd_entry) == NO_ERROR)
6674  {
6675  PGBUF_BCB_UNLOCK (bufptr);
6676  }
6677 
6679  return ER_FAILED;
6680  }
6681  else if (r == ETIMEDOUT)
6682  {
6683  /* rollback operation, postpone operation, etc. */
6684  if (thrd_entry->resume_status == THREAD_PGBUF_RESUMED)
6685  {
6686  thread_unlock_entry (thrd_entry);
6687  return NO_ERROR;
6688  }
6689 
6690  if (logtb_is_current_active (thread_p) == false)
6691  {
6692  goto try_again;
6693  }
6694 
6695  /* buffer page deadlock victim by timeout */
6696  /* following order of execution is important. */
6697  /* request_latch_mode == PGBUF_NO_LATCH means that the thread has waken up by timeout. This value must be set
6698  * before release the mutex. */
6699  save_request_latch_mode = thrd_entry->request_latch_mode;
6700  thrd_entry->request_latch_mode = PGBUF_NO_LATCH;
6701  thread_unlock_entry (thrd_entry);
6702 
6703  if (pgbuf_timed_sleep_error_handling (thread_p, bufptr, thrd_entry) == NO_ERROR)
6704  {
6705  goto er_set_return;
6706  }
6707 
6708  return ER_FAILED;
6709  }
6710  else
6711  {
6712  thread_unlock_entry (thrd_entry);
6713  /* error setting */
6715  return ER_FAILED;
6716  }
6717 
6718 er_set_return:
6719  /* error setting */
6720  if (old_wait_msecs == LK_INFINITE_WAIT)
6721  {
6723 
6724  /* FIXME: remove it. temporarily added for debugging */
6725  assert (0);
6726 
6727  PGBUF_BCB_UNLOCK (bufptr);
6728  if (logtb_is_current_active (thread_p) == true)
6729  {
6730  const char *client_prog_name; /* Client user name for transaction */
6731  const char *client_user_name; /* Client user name for transaction */
6732  const char *client_host_name; /* Client host for transaction */
6733  int client_pid; /* Client process identifier for transaction */
6734  int tran_index;
6735 
6736  tran_index = LOG_FIND_THREAD_TRAN_INDEX (thread_p);
6737  (void) logtb_find_client_name_host_pid (tran_index, &client_prog_name, &client_user_name, &client_host_name,
6738  &client_pid);
6739 
6740  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_LK_UNILATERALLY_ABORTED, 4, tran_index, client_user_name,
6741  client_host_name, client_pid);
6742  }
6743  else
6744  {
6745  /*
6746  * We are already aborting, fall through. Don't do
6747  * double aborts that could cause an infinite loop.
6748  */
6750  "pgbuf_timed_sleep: Likely a system error. Trying to abort a transaction twice.\n");
6751  /* We can release all the page latches held by current thread. */
6752  }
6753  }
6754  else if (old_wait_msecs > 0)
6755  {
6757 
6758  PGBUF_BCB_UNLOCK (bufptr);
6759 
6760  (void) logtb_find_client_name_host_pid (thrd_entry->tran_index, &client_prog_name, &client_user_name,
6761  &client_host_name, &client_pid);
6762 
6763  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_LK_PAGE_TIMEOUT, 8, thrd_entry->tran_index, client_user_name,
6764  client_host_name, client_pid, (save_request_latch_mode == PGBUF_LATCH_READ ? "READ" : "WRITE"),
6765  bufptr->vpid.volid, bufptr->vpid.pageid, NULL);
6766  }
6767  else
6768  {
6769  PGBUF_BCB_UNLOCK (bufptr);
6770  }
6771 
6772  return ER_FAILED;
6773 }
6774 
6775 /*
6776  * pgbuf_wakeup_reader_writer () - Wakes up blocked threads on the BCB queue with read or write latch mode
6777  *
6778  * return : error code
6779  * thread_p (in) : thread entry
6780  * bufptr (in) : bcb
6781  */
6782 STATIC_INLINE void
6783 pgbuf_wakeup_reader_writer (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr)
6784 {
6785  THREAD_ENTRY *thrd_entry = NULL;
6786  THREAD_ENTRY *prev_thrd_entry = NULL;
6787  THREAD_ENTRY *next_thrd_entry = NULL;
6788 
6789  /* the caller is holding bufptr->mutex */
6790 
6791  assert (bufptr->latch_mode == PGBUF_NO_LATCH && bufptr->fcnt == 0);
6792 
6793  /* fcnt == 0, bufptr->latch_mode == PGBUF_NO_LATCH */
6794 
6795  /* how it works:
6796  *
6797  * we can have here multiple types of waiters:
6798  * 1. PGBUF_NO_LATCH - thread gave up waiting for bcb (interrupted or timed out). just remove it from list.
6799  * 2. PGBUF_LATCH_FLUSH - thread is waiting for bcb to be flushed. this is not actually a latch and thread is not
6800  * awaken here. bcb must be either marked to be flushed asynchronously or is currently in process of being flushed.
6801  * 3. PGBUF_LATCH_READ - multiple threads can be waked at once (all readers at the head of the list).
6802  * 4. PGBUF_LATCH_WRITE - only first waiter is waked.
6803  */
6804 
6805  for (thrd_entry = bufptr->next_wait_thrd; thrd_entry != NULL; thrd_entry = next_thrd_entry)
6806  {
6807  next_thrd_entry = thrd_entry->next_wait_thrd;
6808 
6809  /* if thrd_entry->request_latch_mode is PGBUF_NO_LATCH, it means the corresponding thread has been waken up
6810  * by timeout. */
6811  if (thrd_entry->request_latch_mode == PGBUF_NO_LATCH)
6812  {
6813  if (prev_thrd_entry == NULL)
6814  {
6815  bufptr->next_wait_thrd = next_thrd_entry;
6816  }
6817  else
6818  {
6819  prev_thrd_entry->next_wait_thrd = next_thrd_entry;
6820  }
6821  thrd_entry->next_wait_thrd = NULL;
6822  continue;
6823  }
6824 
6825  if (thrd_entry->request_latch_mode == PGBUF_LATCH_FLUSH)
6826  {
6827  /* must wait for flush. we do not wake it until flush is executed. */
6829 
6830  /* leave it in the wait list */
6831  prev_thrd_entry = thrd_entry;
6832  continue;
6833  }
6834 
6835  if ((bufptr->latch_mode == PGBUF_NO_LATCH)
6836  || (bufptr->latch_mode == PGBUF_LATCH_READ && thrd_entry->request_latch_mode == PGBUF_LATCH_READ))
6837  {
6838  thread_lock_entry (thrd_entry);
6839 
6840  if (thrd_entry->request_latch_mode != PGBUF_NO_LATCH)
6841  {
6842  /* grant the request */
6843  bufptr->latch_mode = (PGBUF_LATCH_MODE) thrd_entry->request_latch_mode;
6844  bufptr->fcnt += thrd_entry->request_fix_count;
6845 
6846  /* do not handle BCB holder entry, at here. refer pgbuf_latch_bcb_upon_fix () */
6847 
6848  /* remove thrd_entry from BCB waiting queue. */
6849  if (prev_thrd_entry == NULL)
6850  {
6851  bufptr->next_wait_thrd = next_thrd_entry;
6852  }
6853  else
6854  {
6855  prev_thrd_entry->next_wait_thrd = next_thrd_entry;
6856  }
6857  thrd_entry->next_wait_thrd = NULL;
6858 
6859  /* wake up the thread */
6860  pgbuf_wakeup (thrd_entry);
6861  }
6862  else
6863  {
6864  if (prev_thrd_entry == NULL)
6865  {
6866  bufptr->next_wait_thrd = next_thrd_entry;
6867  }
6868  else
6869  {
6870  prev_thrd_entry->next_wait_thrd = next_thrd_entry;
6871  }
6872  thrd_entry->next_wait_thrd = NULL;
6873  thread_unlock_entry (thrd_entry);
6874  }
6875  }
6876  else if (bufptr->latch_mode == PGBUF_LATCH_READ)
6877  {
6878  /* Look for other readers. */
6879  prev_thrd_entry = thrd_entry;
6880  continue;
6881  }
6882  else
6883  {
6884  assert (bufptr->latch_mode == PGBUF_LATCH_WRITE);
6885  break;
6886  }
6887  }
6888 }
6889 #endif /* SERVER_MODE */
6890 
6891 /*
6892  * pgbuf_search_hash_chain () - searches the buffer hash chain to find a BCB with page identifier
6893  * return: if success, BCB pointer, otherwise NULL
6894  * hash_anchor(in):
6895  * vpid(in):
6896  */
6898 pgbuf_search_hash_chain (THREAD_ENTRY * thread_p, PGBUF_BUFFER_HASH * hash_anchor, const VPID * vpid)
6899 {
6900  PGBUF_BCB *bufptr;
6901  int mbw_cnt;
6902 #if defined(SERVER_MODE)
6903  int rv;
6904  int loop_cnt;
6905 #endif
6906  TSC_TICKS start_tick, end_tick;
6907  UINT64 lock_wait_time = 0;
6908 
6909  mbw_cnt = 0;
6910 
6911 /* one_phase: no hash-chain mutex */
6912 one_phase:
6913 
6914  bufptr = hash_anchor->hash_next;
6915  while (bufptr != NULL)
6916  {
6917  if (VPID_EQ (&(bufptr->vpid), vpid))
6918  {
6919 #if defined(SERVER_MODE)
6920  loop_cnt = 0;
6921 
6922  mutex_lock:
6923 
6924  rv = PGBUF_BCB_TRYLOCK (bufptr);
6925  if (rv == 0)
6926  {
6927  /* OK. go ahead */
6928  }
6929  else
6930  {
6931  if (rv != EBUSY)
6932  {
6933  /* give up one_phase */
6934  goto two_phase;
6935  }
6936 
6937  if (loop_cnt++ < mbw_cnt)
6938  {
6939  goto mutex_lock;
6940  }
6941 
6942  /* An unconditional request is given for acquiring mutex */
6943  PGBUF_BCB_LOCK (bufptr);
6944  }
6945 #else /* SERVER_MODE */
6946  PGBUF_BCB_LOCK (bufptr);
6947 #endif /* SERVER_MODE */
6948 
6949  if (!VPID_EQ (&(bufptr->vpid), vpid))
6950  {
6951  /* updated or replaced */
6952  PGBUF_BCB_UNLOCK (bufptr);
6953  /* retry one_phase */
6954  goto one_phase;
6955  }
6956  break;
6957  }
6958  bufptr = bufptr->hash_next;
6959  }
6960 
6961  if (bufptr != NULL)
6962  {
6963  return bufptr;
6964  }
6965 
6966 #if defined(SERVER_MODE)
6967 /* two_phase: hold hash-chain mutex */
6968 two_phase:
6969 #endif
6970 
6971 try_again:
6972 
6974  {
6975  tsc_getticks (&start_tick);
6976  }
6977 
6978  rv = pthread_mutex_lock (&hash_anchor->hash_mutex);
6979 
6981  {
6982  tsc_getticks (&end_tick);
6983  lock_wait_time = tsc_elapsed_utime (end_tick, start_tick);
6985  perfmon_add_stat (thread_p, PSTAT_PB_TIME_HASH_ANCHOR_WAIT, lock_wait_time);
6986  }
6987 
6988  bufptr = hash_anchor->hash_next;
6989  while (bufptr != NULL)
6990  {
6991  if (VPID_EQ (&(bufptr->vpid), vpid))
6992  {
6993 #if defined(SERVER_MODE)
6994  loop_cnt = 0;
6995 
6996  mutex_lock2:
6997 
6998  rv = PGBUF_BCB_TRYLOCK (bufptr);
6999  if (rv == 0)
7000  {
7001  /* bufptr->mutex is held */
7002  pthread_mutex_unlock (&hash_anchor->hash_mutex);
7003  }
7004  else
7005  {
7006  if (rv != EBUSY)
7007  {
7009  return NULL;
7010  }
7011 
7012  if (loop_cnt++ < mbw_cnt)
7013  {
7014  goto mutex_lock2;
7015  }
7016 
7017  /* ret == EBUSY : bufptr->mutex is not held */
7018  /* An unconditional request is given for acquiring mutex after releasing hash_mutex. */
7019  pthread_mutex_unlock (&hash_anchor->hash_mutex);
7020  PGBUF_BCB_LOCK (bufptr);
7021  }
7022 #else /* SERVER_MODE */
7023  pthread_mutex_unlock (&hash_anchor->hash_mutex);
7024  PGBUF_BCB_LOCK (bufptr);
7025 #endif /* SERVER_MODE */
7026 
7027  if (!VPID_EQ (&(bufptr->vpid), vpid))
7028  {
7029  /* updated or replaced */
7030  PGBUF_BCB_UNLOCK (bufptr);
7031  goto try_again;
7032  }
7033  break;
7034  }
7035  bufptr = bufptr->hash_next;
7036  }
7037  /* at this point, if (bufptr != NULL) caller holds bufptr->mutex but not hash_anchor->hash_mutex if (bufptr ==
7038  * NULL) caller holds hash_anchor->hash_mutex. */
7039  return bufptr;
7040 }
7041 
7042 /*
7043  * pgbuf_insert_into_hash_chain () - Inserts BCB into the hash chain
7044  * return: NO_ERROR
7045  * hash_anchor(in): hash anchor
7046  * bufptr(in): pointer to buffer page (BCB)
7047  *
7048  * Note: Before insertion, it must hold the mutex of the hash anchor.
7049  * It doesn't release the mutex of the hash anchor.
7050  * The mutex of the hash anchor will be released in the next call of pgbuf_unlock_page ().
7051  */
7052 STATIC_INLINE int
7054 {
7055 #if defined(SERVER_MODE)
7056  int rv;
7057 #endif /* SERVER_MODE */
7058  TSC_TICKS start_tick, end_tick;
7059  UINT64 lock_wait_time = 0;
7060 
7062  {
7063  if (perfmon_is_perf_tracking ())
7064  {
7065  tsc_getticks (&start_tick);
7066  }
7067  }
7068 
7069  /* Note that the caller is not holding bufptr->mutex */
7070  rv = pthread_mutex_lock (&hash_anchor->hash_mutex);
7071 
7073  {
7074  tsc_getticks (&end_tick);
7075  lock_wait_time = tsc_elapsed_utime (end_tick, start_tick);
7077  perfmon_add_stat (thread_p, PSTAT_PB_TIME_HASH_ANCHOR_WAIT, lock_wait_time);
7078  }
7079 
7080  bufptr->hash_next = hash_anchor->hash_next;
7081  hash_anchor->hash_next = bufptr;
7082 
7083  /*
7084  * hash_anchor->hash_mutex is not released at this place.
7085  * The current BCB is the newly allocated BCB by the caller and
7086  * it is connected into the corresponding buffer hash chain, now.
7087  * hash_anchor->hahs_mutex will be released in pgbuf_unlock_page ()
7088  * after releasing the acquired buffer lock on the BCB.
7089  */
7090  return NO_ERROR;
7091 }
7092 
7093 /*
7094  * pgbuf_delete_from_hash_chain () - Deletes BCB from the hash chain
7095  * return: NO_ERROR, or ER_code
7096  * bufptr(in): pointer to buffer page
7097  */
7098 STATIC_INLINE int
7100 {
7101  PGBUF_BUFFER_HASH *hash_anchor;
7102  PGBUF_BCB *prev_bufptr;
7103  PGBUF_BCB *curr_bufptr;
7104 #if defined(SERVER_MODE)
7105  int rv;
7106 #endif /* SERVER_MODE */
7107  TSC_TICKS start_tick, end_tick;
7108  UINT64 lock_wait_time = 0;
7109 
7111  {
7112  if (perfmon_is_perf_tracking ())
7113  {
7114  tsc_getticks (&start_tick);
7115  }
7116  }
7117 
7118  /* the caller is holding bufptr->mutex */
7119 
7120  /* fcnt==0, next_wait_thrd==NULL, latch_mode==PGBUF_NO_LATCH */
7121  /* if (bufptr->latch_mode==PGBUF_NO_LATCH) invoked by an invalidator */
7122  hash_anchor = &(pgbuf_Pool.buf_hash_table[PGBUF_HASH_VALUE (&(bufptr->vpid))]);
7123  rv = pthread_mutex_lock (&hash_anchor->hash_mutex);
7124 
7126  {
7127  tsc_getticks (&end_tick);
7128  lock_wait_time = tsc_elapsed_utime (end_tick, start_tick);
7130  perfmon_add_stat (thread_p, PSTAT_PB_TIME_HASH_ANCHOR_WAIT, lock_wait_time);
7131  }
7132 
7133  if (pgbuf_bcb_is_flushing (bufptr))
7134  {
7135  assert (false);
7136 
7137  /* Someone tries to fix the current buffer page. So, give up selecting current buffer page as a victim. */
7138  pthread_mutex_unlock (&hash_anchor->hash_mutex);
7139  bufptr->latch_mode = PGBUF_NO_LATCH;
7140  PGBUF_BCB_UNLOCK (bufptr);
7141  return ER_FAILED;
7142  }
7143  else
7144  {
7145  /* find current BCB in buffer hash chain */
7146  prev_bufptr = NULL;
7147  curr_bufptr = hash_anchor->hash_next;
7148 
7149  while (curr_bufptr != NULL)
7150  {
7151  if (curr_bufptr == bufptr)
7152  {
7153  break;
7154  }
7155  prev_bufptr = curr_bufptr;
7156  curr_bufptr = curr_bufptr->hash_next;
7157  }
7158 
7159  if (curr_bufptr == NULL)
7160  {
7161  assert (false);
7162 
7163  pthread_mutex_unlock (&hash_anchor->hash_mutex);
7164 
7165  /* Now, the caller is holding bufptr->mutex. */
7166  /* bufptr->mutex will be released in following function. */
7167  pgbuf_put_bcb_into_invalid_list (thread_p, bufptr);
7168 
7169  return ER_FAILED;
7170  }
7171 
7172  /* disconnect the BCB from the buffer hash chain */
7173  if (prev_bufptr == NULL)
7174  {
7175  hash_anchor->hash_next = curr_bufptr->hash_next;
7176  }
7177  else
7178  {
7179  prev_bufptr->hash_next = curr_bufptr->hash_next;
7180  }
7181 
7182  curr_bufptr->hash_next = NULL;
7183  pthread_mutex_unlock (&hash_anchor->hash_mutex);
7184  VPID_SET_NULL (&(bufptr->vpid));
7186 
7187  return NO_ERROR;
7188  }
7189 }
7190 
7191 /*
7192  * pgbuf_lock_page () - Puts a buffer lock on the buffer lock chain
7193  * return: If success, PGBUF_LOCK_HOLDER, otherwise PGBUF_LOCK_WAITER
7194  * hash_anchor(in):
7195  * vpid(in):
7196  *
7197  * Note: This function is invoked only when the page is not in the buffer hash
7198  * chain. The caller is holding hash_anchor->hash_mutex.
7199  * Before return, the thread releases hash_anchor->hash_mutex.
7200  */
7201 static int
7202 pgbuf_lock_page (THREAD_ENTRY * thread_p, PGBUF_BUFFER_HASH * hash_anchor, const VPID * vpid)
7203 {
7204 #if defined(SERVER_MODE)
7205  PGBUF_BUFFER_LOCK *cur_buffer_lock;
7206  THREAD_ENTRY *cur_thrd_entry;
7207  TSC_TICKS start_tick, end_tick;
7208  UINT64 lock_wait_time = 0;
7209 
7210  /* the caller is holding hash_anchor->hash_mutex */
7211  /* check whether the page is in the Buffer Lock Chain */
7212 
7213  if (thread_p == NULL)
7214  {
7215  assert (thread_p != NULL);
7216  thread_p = thread_get_thread_entry_info ();
7217  }
7218 
7219  cur_thrd_entry = thread_p;
7220  cur_buffer_lock = hash_anchor->lock_next;
7221 
7222  /* find vpid in buffer lock chain */
7223  while (cur_buffer_lock != NULL)
7224  {
7225  if (VPID_EQ (&(cur_buffer_lock->vpid), vpid))
7226  {
7227  /* found */
7228  cur_thrd_entry->next_wait_thrd = cur_buffer_lock->next_wait_thrd;
7229  cur_buffer_lock->next_wait_thrd = cur_thrd_entry;
7230  pgbuf_sleep (cur_thrd_entry, &hash_anchor->hash_mutex);
7231 
7232  if (cur_thrd_entry->resume_status != THREAD_PGBUF_RESUMED)
7233  {
7234  /* interrupt operation */
7235  THREAD_ENTRY *thrd_entry, *prev_thrd_entry = NULL;
7236  int r;
7237 
7239  {
7240  tsc_getticks (&start_tick);
7241  }
7242 
7243  r = pthread_mutex_lock (&hash_anchor->hash_mutex);
7244 
7246  {
7247  tsc_getticks (&end_tick);
7248  lock_wait_time = tsc_elapsed_utime (end_tick, start_tick);
7250  perfmon_add_stat (thread_p, PSTAT_PB_TIME_HASH_ANCHOR_WAIT, lock_wait_time);
7251  }
7252 
7253  thrd_entry = cur_buffer_lock->next_wait_thrd;
7254 
7255  while (thrd_entry != NULL)
7256  {
7257  if (thrd_entry == cur_thrd_entry)
7258  {
7259  if (prev_thrd_entry == NULL)
7260  {
7261  cur_buffer_lock->next_wait_thrd = thrd_entry->next_wait_thrd;
7262  }
7263  else
7264  {
7265  prev_thrd_entry->next_wait_thrd = thrd_entry->next_wait_thrd;
7266  }
7267 
7268  thrd_entry->next_wait_thrd = NULL;
7269  pthread_mutex_unlock (&hash_anchor->hash_mutex);
7270 
7271  perfmon_inc_stat (thread_p, PSTAT_LK_NUM_WAITED_ON_PAGES); /* monitoring */
7272  return PGBUF_LOCK_WAITER;
7273  }
7274  prev_thrd_entry = thrd_entry;
7275  thrd_entry = thrd_entry->next_wait_thrd;
7276  }
7277  pthread_mutex_unlock (&hash_anchor->hash_mutex);
7278  }
7279  perfmon_inc_stat (thread_p, PSTAT_LK_NUM_WAITED_ON_PAGES); /* monitoring */
7280  return PGBUF_LOCK_WAITER;
7281  }
7282  cur_buffer_lock = cur_buffer_lock->lock_next;
7283  }
7284 
7285  /* buf_lock_table is implemented to have one entry for each thread. At first design, it had one entry for each
7286  * thread. cur_thrd_entry->index : thread entry index cur_thrd_entry->tran_index : transaction entry index */
7287 
7288  /* vpid is not found in the Buffer Lock Chain */
7289  cur_buffer_lock = &(pgbuf_Pool.buf_lock_table[cur_thrd_entry->index]);
7290  cur_buffer_lock->vpid = *vpid;
7291  cur_buffer_lock->next_wait_thrd = NULL;
7292  cur_buffer_lock->lock_next = hash_anchor->lock_next;
7293  hash_anchor->lock_next = cur_buffer_lock;
7294  pthread_mutex_unlock (&hash_anchor->hash_mutex);
7295 #endif /* SERVER_MODE */
7296 
7297  perfmon_inc_stat (thread_p, PSTAT_LK_NUM_ACQUIRED_ON_PAGES); /* monitoring */
7298  return PGBUF_LOCK_HOLDER;
7299 }
7300 
7301 /*
7302  * pgbuf_unlock_page () - Deletes a buffer lock from the buffer lock chain
7303  * return: NO_ERROR
7304  * hash_anchor(in):
7305  * vpid(in):
7306  * need_hash_mutex(in):
7307  *
7308  * Note: This function is invoked only after the page is read into buffer and
7309  * the BCB is connected into its corresponding buffer hash chain.
7310  * Before return, the thread releases the hash mutex on the hash
7311  * anchor and wakes up all the threads blocked on the queue of the
7312  * buffer lock record.
7313  */
7314 static int
7315 pgbuf_unlock_page (THREAD_ENTRY * thread_p, PGBUF_BUFFER_HASH * hash_anchor, const VPID * vpid, int need_hash_mutex)
7316 {
7317 #if defined(SERVER_MODE)
7318  int rv;
7319 
7320  TSC_TICKS start_tick, end_tick;
7321  UINT64 lock_wait_time = 0;
7322 
7323  PGBUF_BUFFER_LOCK *prev_buffer_lock, *cur_buffer_lock;
7324  THREAD_ENTRY *cur_thrd_entry;
7325 
7326  if (need_hash_mutex)
7327  {
7329  {
7330  if (perfmon_is_perf_tracking ())
7331  {
7332  tsc_getticks (&start_tick);
7333  }
7334  }
7335  rv = pthread_mutex_lock (&hash_anchor->hash_mutex);
7336 
7338  {
7339  tsc_getticks (&end_tick);
7340  lock_wait_time = tsc_elapsed_utime (end_tick, start_tick);
7342  perfmon_add_stat (thread_p, PSTAT_PB_TIME_HASH_ANCHOR_WAIT, lock_wait_time);
7343  }
7344  }
7345 
7346  /* check whether the page is in the Buffer Lock Chain */
7347  prev_buffer_lock = NULL;
7348  cur_buffer_lock = hash_anchor->lock_next;
7349 
7350  while (cur_buffer_lock != NULL)
7351  {
7352  if (VPID_EQ (&(cur_buffer_lock->vpid), vpid))
7353  {
7354  break;
7355  }
7356 
7357  prev_buffer_lock = cur_buffer_lock;
7358  cur_buffer_lock = cur_buffer_lock->lock_next;
7359  }
7360 
7361  if (cur_buffer_lock != NULL)
7362  {
7363  if (prev_buffer_lock == NULL)
7364  {
7365  hash_anchor->lock_next = cur_buffer_lock->lock_next;
7366  }
7367  else
7368  {
7369  prev_buffer_lock->lock_next = cur_buffer_lock->lock_next;
7370  }
7371 
7372  cur_buffer_lock->lock_next = NULL;
7373  pthread_mutex_unlock (&hash_anchor->hash_mutex);
7374 
7375  while ((cur_thrd_entry = cur_buffer_lock->next_wait_thrd) != NULL)
7376  {
7377  cur_buffer_lock->next_wait_thrd = cur_thrd_entry->next_wait_thrd;
7378  cur_thrd_entry->next_wait_thrd = NULL;
7379  pgbuf_wakeup_uncond (cur_thrd_entry);
7380  }
7381  }
7382  else
7383  {
7384  pthread_mutex_unlock (&hash_anchor->hash_mutex);
7385  }
7386 #endif /* SERVER_MODE */
7387 
7388  return NO_ERROR;
7389 }
7390 
7391 /*
7392  * pgbuf_allocate_bcb () - Allocates a BCB
7393  * return: If success, a newly allocated BCB, otherwise NULL
7394  * src_vpid(in):
7395  *
7396  * Note: This function allocates a BCB from the buffer invalid list or the LRU list.
7397  * It is invoked only when a page is not in buffer.
7398  */
7399 static PGBUF_BCB *
7400 pgbuf_allocate_bcb (THREAD_ENTRY * thread_p, const VPID * src_vpid)
7401 {
7402  PGBUF_BCB *bufptr;
7403  PERF_UTIME_TRACKER time_tracker_alloc_bcb = PERF_UTIME_TRACKER_INITIALIZER;
7404  PERF_UTIME_TRACKER time_tracker_alloc_search_and_wait = PERF_UTIME_TRACKER_INITIALIZER;
7406  int tran_index = LOG_FIND_THREAD_TRAN_INDEX (thread_p);
7407  PGBUF_STATUS *show_status = &pgbuf_Pool.show_status[tran_index];
7408 
7409 #if defined (SERVER_MODE)
7410  struct timespec to;
7411  int r = 0;
7412  PERF_STAT_ID pstat_cond_wait;
7413  bool high_priority = false;
7414 #endif /* SERVER_MODE */
7415 
7416  /* how it works: we need to free a bcb for new VPID.
7417  * 1. first source should be invalid list. initially, all bcb's will be in this list. sometimes, bcb's can be added to
7418  * this list during runtime. in any case, these bcb's are not used by anyone, do not need any flush or other
7419  * actions and are the best option for allocating a bcb.
7420  * 2. search the bcb in lru lists by calling pgbuf_get_victim.
7421  * 3. if search failed then:
7422  * SERVER_MODE: thread is added to one of two queues: high priority waiting threads queue or low priority waiting
7423  * threads queue. high priority is usually populated by vacuum threads or by threads holding latch
7424  * on very hot pages (b-tree roots, heap headers, volume header or file headers).
7425  * thread will then be assigned a victim directly (there are multiple ways this can happen) and woken
7426  * up.
7427  * TODO: we have one big vulnerability with waiting threads. what if, for any reason, no one feeds the
7428  * waiting thread with a victim. page flush thread may be sleeping and no one wakes it, and the
7429  * activity may be so reduced that no adjustments are made to lists. thread ends up with
7430  * timeout. right now, after we added the victim rich hack, this may not happen. we could
7431  * consider a backup plan to generate victims for a forgotten waiter.
7432  * SA_MODE: pages are flushed and victim is searched again (and we expect this time to find a victim).
7433  *
7434  * note: SA_MODE approach also applies to server-mode recovery (or in any circumstance which has page flush thread
7435  * unavailable).
7436  */
7437 
7438  /* allocate a BCB from invalid BCB list */
7439  bufptr = pgbuf_get_bcb_from_invalid_list (thread_p);
7440  if (bufptr != NULL)
7441  {
7442  return bufptr;
7443  }
7444 
7445  PERF_UTIME_TRACKER_START (thread_p, &time_tracker_alloc_bcb);
7446  if (detailed_perf)
7447  {
7448  PERF_UTIME_TRACKER_START (thread_p, &time_tracker_alloc_search_and_wait);
7449  }
7450 
7451  /* search lru lists */
7452  bufptr = pgbuf_get_victim (thread_p);
7453  PERF_UTIME_TRACKER_TIME_AND_RESTART (thread_p, &time_tracker_alloc_search_and_wait, PSTAT_PB_ALLOC_BCB_SEARCH_VICTIM);
7454  if (bufptr != NULL)
7455  {
7456  goto end;
7457  }
7458 
7459 #if defined (SERVER_MODE)
7461  {
7462  retry:
7463  high_priority = high_priority || VACUUM_IS_THREAD_VACUUM (thread_p) || pgbuf_is_thread_high_priority (thread_p);
7464 
7465  /* add to waiters thread list to be assigned victim directly */
7466  to.tv_sec = (int) time (NULL) + PGBUF_TIMEOUT;
7467  to.tv_nsec = 0;
7468 
7469  thread_lock_entry (thread_p);
7470 
7471  assert (pgbuf_Pool.direct_victims.bcb_victims[thread_p->index] == NULL);
7472 
7473  /* push to waiter thread list */
7474  if (high_priority)
7475  {
7476  if (detailed_perf && VACUUM_IS_THREAD_VACUUM (thread_p))
7477  {
7479  }
7480  if (!pgbuf_Pool.direct_victims.waiter_threads_high_priority->produce (thread_p))
7481  {
7482  assert (false);
7483  thread_unlock_entry (thread_p);
7484  return NULL;
7485  }
7486  pstat_cond_wait = PSTAT_PB_ALLOC_BCB_COND_WAIT_HIGH_PRIO;
7487  }
7488  else
7489  {
7490  if (!pgbuf_Pool.direct_victims.waiter_threads_low_priority->produce (thread_p))
7491  {
7492  /* ok, we have this very weird case when a consumer can be preempted for a very long time (which prevents
7493  * producers from being able to push to queue). I don't know how is this even possible, I just know I
7494  * found a case. I cannot tell exactly how long the consumer is preempted, but I know the time difference
7495  * between the producer still waiting to be waken by that consumer and the producer failing to add was 93
7496  * milliseconds. Which is huge if you ask me.
7497  * I doubled the size of the queue, but theoretically, this is still possible. I also removed the
7498  * ABORT_RELEASE, but we may have to think of a way to handle this preempted consumer case. */
7499 
7500  /* we do a hack for this case. we add the thread to high-priority instead, which is usually less used and
7501  * the same case is (almost) impossible to happen. */
7502  if (!pgbuf_Pool.direct_victims.waiter_threads_high_priority->produce (thread_p))
7503  {
7504  assert (false);
7505  thread_unlock_entry (thread_p);
7506  goto end;
7507  }
7508  pstat_cond_wait = PSTAT_PB_ALLOC_BCB_COND_WAIT_HIGH_PRIO;
7509  }
7510  else
7511  {
7512  pstat_cond_wait = PSTAT_PB_ALLOC_BCB_COND_WAIT_LOW_PRIO;
7513  }
7514  }
7515 
7516  /* make sure at least flush will feed us with bcb's. */
7517  // before migration of the page_flush_daemon it was a try_wakeup, check if still needed
7518  pgbuf_wakeup_page_flush_daemon (thread_p);
7519 
7520  show_status->num_flusher_waiting_threads++;
7521 
7523 
7524  show_status->num_flusher_waiting_threads--;
7525 
7526  PERF_UTIME_TRACKER_TIME (thread_p, &time_tracker_alloc_search_and_wait, pstat_cond_wait);
7527 
7528  if (r == NO_ERROR)
7529  {
7530  if (thread_p->resume_status == THREAD_ALLOC_BCB_RESUMED)
7531  {
7532  bufptr = pgbuf_get_direct_victim (thread_p);
7533  if (bufptr == NULL)
7534  {
7535  /* bcb was fixed again */
7536  high_priority = true;
7537  goto retry;
7538  }
7539  goto end;
7540  }
7541 
7542  /* no bcb should be allocated. */
7543  /* interrupted */
7544  assert (thread_p->resume_status == THREAD_RESUME_DUE_TO_INTERRUPT
7545  || thread_p->resume_status == THREAD_RESUME_DUE_TO_SHUTDOWN);
7546  if (pgbuf_Pool.direct_victims.bcb_victims[thread_p->index] != NULL)
7547  {
7548  /* a bcb was assigned before being interrupted. it must be "unassigned" */
7549  pgbuf_bcb_update_flags (thread_p, pgbuf_Pool.direct_victims.bcb_victims[thread_p->index], 0,
7551  pgbuf_Pool.direct_victims.bcb_victims[thread_p->index] = NULL;
7552  }
7554  }
7555  else
7556  {
7557  /* should not timeout! */
7559 
7560  thread_p->resume_status = THREAD_ALLOC_BCB_RESUMED;
7561  thread_unlock_entry (thread_p);
7562 
7564  {
7566  }
7567  }
7568  }
7569 #endif /* SERVER_MODE */
7570  else
7571  {
7572  /* flush */
7573  pgbuf_wakeup_page_flush_daemon (thread_p);
7574 
7575  /* search lru lists again */
7576  bufptr = pgbuf_get_victim (thread_p);
7577  PERF_UTIME_TRACKER_TIME (thread_p, &time_tracker_alloc_search_and_wait, PSTAT_PB_ALLOC_BCB_SEARCH_VICTIM);
7578 
7579  assert (bufptr != NULL);
7580  }
7581 
7582 end:
7583  if (bufptr != NULL)
7584  {
7585  /* victimize the buffer */
7586  if (pgbuf_victimize_bcb (thread_p, bufptr) != NO_ERROR)
7587  {
7588  assert (false);
7589  bufptr = NULL;
7590  }
7591  }
7592  else
7593  {
7594  if (er_errid () == NO_ERROR)
7595  {
7597  }
7598  }
7599 
7600  PERF_UTIME_TRACKER_TIME (thread_p, &time_tracker_alloc_bcb, PSTAT_PB_ALLOC_BCB);
7601 
7602  return bufptr;
7603 }
7604 
7605 /*
7606  * pgbuf_claim_bcb_for_fix () - function used for page fix to claim a bcb when page is not found in buffer
7607  *
7608  * return : claimed BCB
7609  * thread_p (in) : thread entry
7610  * vpid (in) : page identifier
7611  * fetch_mode (in) : fetch mode
7612  * hash_anchor (in/out) : hash anchor
7613  * perf (in/out) : page fix performance monitoring helper
7614  * try_again (out) : output true to trying getting bcb again
7615  */
7616 static PGBUF_BCB *
7617 pgbuf_claim_bcb_for_fix (THREAD_ENTRY * thread_p, const VPID * vpid, PAGE_FETCH_MODE fetch_mode,
7618  PGBUF_BUFFER_HASH * hash_anchor, PGBUF_FIX_PERF * perf, bool * try_again)
7619 {
7620  PGBUF_BCB *bufptr = NULL;
7621  PAGE_PTR pgptr = NULL;
7622  TDE_ALGORITHM tde_algo = TDE_ALGORITHM_NONE;
7623  bool success;
7624  int tran_index = LOG_FIND_THREAD_TRAN_INDEX (thread_p);
7625  PGBUF_STATUS *show_status = &pgbuf_Pool.show_status[tran_index];
7626 
7627 #if defined (ENABLE_SYSTEMTAP)
7628  bool monitored = false;
7629  QUERY_ID query_id = NULL_QUERY_ID;
7630 #endif /* ENABLE_SYSTEMTAP */
7631 
7632  assert (fetch_mode != OLD_PAGE_IF_IN_BUFFER);
7633 
7634  /* The page is not found in the hash chain the caller is holding hash_anchor->hash_mutex */
7636  {
7637  pthread_mutex_unlock (&hash_anchor->hash_mutex);
7639  return NULL;
7640  }
7641 
7642  /* In this case, the caller is holding only hash_anchor->hash_mutex. The hash_anchor->hash_mutex is to be
7643  * released in pgbuf_lock_page (). */
7644  if (pgbuf_lock_page (thread_p, hash_anchor, vpid) != PGBUF_LOCK_HOLDER)
7645  {
7646  if (perf->is_perf_tracking)
7647  {
7648  tsc_getticks (&perf->end_tick);
7649  tsc_elapsed_time_usec (&perf->tv_diff, perf->end_tick, perf->start_tick);
7650  perf->lock_wait_time = perf->tv_diff.tv_sec * 1000000LL + perf->tv_diff.tv_usec;
7651  }
7652 
7653  if (fetch_mode == NEW_PAGE)
7654  {
7656  }
7657  else
7658  {
7660  }
7661  *try_again = true;
7662  return NULL;
7663  }
7664 
7666  {
7667  if (fetch_mode == NEW_PAGE)
7668  {
7670  }
7671  else
7672  {
7674  }
7675  }
7676 
7677  /* Now, the caller is not holding any mutex. */
7678  bufptr = pgbuf_allocate_bcb (thread_p, vpid);
7679  if (bufptr == NULL)
7680  {
7681  ASSERT_ERROR ();
7682  (void) pgbuf_unlock_page (thread_p, hash_anchor, vpid, true);
7684  return NULL;
7685  }
7686 
7687  /* Currently, caller has one allocated BCB and is holding mutex */
7688 
7689  /* initialize the BCB */
7690  bufptr->vpid = *vpid;
7691  assert (!pgbuf_bcb_avoid_victim (bufptr));
7692  bufptr->latch_mode = PGBUF_NO_LATCH;
7693  pgbuf_bcb_update_flags (thread_p, bufptr, 0, PGBUF_BCB_ASYNC_FLUSH_REQ); /* todo: why this?? */
7695  LSA_SET_NULL (&bufptr->oldest_unflush_lsa);
7696 
7697  if (fetch_mode != NEW_PAGE)
7698  {
7699  /* Record number of reads in statistics */
7701  show_status->num_pages_read++;
7702 
7703 #if defined(ENABLE_SYSTEMTAP)
7704  query_id = qmgr_get_current_query_id (thread_p);
7705  if (query_id != NULL_QUERY_ID)
7706  {
7707  monitored = true;
7708  CUBRID_IO_READ_START (query_id);
7709  }
7710 #endif /* ENABLE_SYSTEMTAP */
7711 
7712  if (dwb_read_page (thread_p, vpid, &bufptr->iopage_buffer->iopage, &success) != NO_ERROR)
7713  {
7714  /* Should not happen */
7715  assert (false);
7716  return NULL;
7717  }
7718  else if (success == true)
7719  {
7720  /* Nothing to do, copied from DWB */
7721  }
7722  else if (fileio_read (thread_p, fileio_get_volume_descriptor (vpid->volid), &bufptr->iopage_buffer->iopage,
7723  vpid->pageid, IO_PAGESIZE) == NULL)
7724  {
7725  /* There was an error in reading the page. Clean the buffer... since it may have been corrupted */
7726  ASSERT_ERROR ();
7727 
7728  /* bufptr->mutex will be released in following function. */
7729  pgbuf_put_bcb_into_invalid_list (thread_p, bufptr);
7730 
7731  /*
7732  * Now, caller is not holding any mutex.
7733  * the last argument of pgbuf_unlock_page () is true that
7734  * means hash_mutex must be held before unlocking page.
7735  */
7736  (void) pgbuf_unlock_page (thread_p, hash_anchor, vpid, true);
7737 
7738 #if defined(ENABLE_SYSTEMTAP)
7739  if (monitored == true)
7740  {
7741  CUBRID_IO_READ_END (query_id, IO_PAGESIZE, 1);
7742  }
7743 #endif /* ENABLE_SYSTEMTAP */
7744 
7746  return NULL;
7747  }
7748 
7749  CAST_IOPGPTR_TO_PGPTR (pgptr, &bufptr->iopage_buffer->iopage);
7750  tde_algo = pgbuf_get_tde_algorithm (pgptr);
7751  if (tde_algo != TDE_ALGORITHM_NONE)
7752  {
7754  (&bufptr->iopage_buffer->iopage, tde_algo, pgbuf_is_temporary_volume (vpid->volid),
7755  &bufptr->iopage_buffer->iopage) != NO_ERROR)
7756  {
7757  ASSERT_ERROR ();
7758  pgbuf_put_bcb_into_invalid_list (thread_p, bufptr);
7759  (void) pgbuf_unlock_page (thread_p, hash_anchor, vpid, true);
7761  return NULL;
7762  }
7763  }
7764 
7765 #if defined(ENABLE_SYSTEMTAP)
7766  if (monitored == true)
7767  {
7768  CUBRID_IO_READ_END (query_id, IO_PAGESIZE, 0);
7769  }
7770 #endif /* ENABLE_SYSTEMTAP */
7771  if (pgbuf_is_temporary_volume (vpid->volid) == true)
7772  {
7773  /* Check if the first time to access */
7774  if (!pgbuf_is_temp_lsa (bufptr->iopage_buffer->iopage.prv.lsa))
7775  {
7777  pgbuf_set_dirty_buffer_ptr (thread_p, bufptr);
7778  }
7779  }
7780 
7781 #if !defined (NDEBUG)
7782  /* perm volume */
7783  if (bufptr->vpid.volid > NULL_VOLID)
7784  {
7785  if (!log_is_in_crash_recovery ())
7786  {
7787  if (!LSA_ISNULL (&bufptr->iopage_buffer->iopage.prv.lsa))
7788  {
7789  assert (bufptr->iopage_buffer->iopage.prv.pageid != -1);
7790  assert (bufptr->iopage_buffer->iopage.prv.volid != -1);
7791  }
7792  }
7793  }
7794 #endif /* NDEBUG */
7795 
7796  if (thread_get_sort_stats_active (thread_p))
7797  {
7799  }
7800  }
7801  else
7802  {
7803  /* the caller is holding bufptr->mutex */
7804 
7805 #if defined(CUBRID_DEBUG)
7806  pgbuf_scramble (&bufptr->iopage_buffer->iopage);
7807 #endif /* CUBRID_DEBUG */
7808 
7809  /* Don't need to read page from disk since it is a new page. */
7810  if (pgbuf_is_temporary_volume (vpid->volid) == true)
7811  {
7813  }
7814  else
7815  {
7817  }
7818 
7819  /* perm volume */
7820  if (bufptr->vpid.volid > NULL_VOLID)
7821  {
7822  /* Init Page identifier of NEW_PAGE */
7823  bufptr->iopage_buffer->iopage.prv.pageid = -1;
7824  bufptr->iopage_buffer->iopage.prv.volid = -1;
7825  }
7826 
7827  if (thread_get_sort_stats_active (thread_p))
7828  {
7830  }
7831 
7832  show_status->num_pages_created++;
7833  show_status->num_hit++;
7834  }
7835 
7836  return bufptr;
7837 }
7838 
7839 /*
7840  * pgbuf_victimize_bcb () - Victimize given buffer page
7841  * return: NO_ERROR, or ER_code
7842  * bufptr(in): pointer to buffer page
7843  */
7844 static int
7846 {
7847 #if defined(SERVER_MODE)
7848  if (thread_p == NULL)
7849  {
7850  assert (thread_p != NULL);
7851  thread_p = thread_get_thread_entry_info ();
7852  }
7853 #endif /* SERVER_MODE */
7854 
7855  /* the caller is holding bufptr->mutex */
7856 
7857  /* before-flush, check victim condition again */
7858  if (!pgbuf_is_bcb_victimizable (bufptr, true))
7859  {
7860  assert (false);
7861  PGBUF_BCB_UNLOCK (bufptr);
7862  return ER_FAILED;
7863  }
7864 
7865  if (pgbuf_bcb_is_to_vacuum (bufptr))
7866  {
7867  pgbuf_bcb_update_flags (thread_p, bufptr, 0, PGBUF_BCB_TO_VACUUM_FLAG);
7868  }
7869  assert (bufptr->latch_mode == PGBUF_NO_LATCH);
7870 
7871  /* a safe victim */
7872  if (pgbuf_delete_from_hash_chain (thread_p, bufptr) != NO_ERROR)
7873  {
7874  return ER_FAILED;
7875  }
7876 
7877  /* If above function returns success, the caller is still holding bufptr->mutex.
7878  * Otherwise, the caller does not hold bufptr->mutex.
7879  */
7880 
7881  /* at this point, the caller is holding bufptr->mutex */
7882 
7883  return NO_ERROR;
7884 }
7885 
7886 /*
7887  * pgbuf_invalidate_bcb () - Invalidates BCB
7888  * return: NO_ERROR, or ER_code
7889  * bufptr(in): pointer to buffer page
7890  */
7891 static int
7893 {
7894  /* the caller is holding bufptr->mutex */
7895  /* be sure that there is not any reader/writer */
7896 
7897  if (bufptr->latch_mode == PGBUF_LATCH_INVALID)
7898  {
7899  PGBUF_BCB_UNLOCK (bufptr);
7900  return NO_ERROR;
7901  }
7902 
7903  if (pgbuf_bcb_is_direct_victim (bufptr))
7904  {
7905  /* bcb is already assigned as direct victim, should be victimized soon, so there is no point in invalidating it
7906  * here */
7907  PGBUF_BCB_UNLOCK (bufptr);
7908  return NO_ERROR;
7909  }
7910 
7911  pgbuf_bcb_clear_dirty (thread_p, bufptr);
7912 
7913  LSA_SET_NULL (&bufptr->oldest_unflush_lsa);
7914 
7915  /* bufptr->mutex is still held by the caller. */
7916  switch (pgbuf_bcb_get_zone (bufptr))
7917  {
7918  case PGBUF_VOID_ZONE:
7919  break;
7920 
7921  default:
7922  assert (PGBUF_IS_BCB_IN_LRU (bufptr));
7923  pgbuf_lru_remove_bcb (thread_p, bufptr);
7924  break;
7925  }
7926 
7927  if (bufptr->latch_mode == PGBUF_NO_LATCH)
7928  {
7929  if (pgbuf_delete_from_hash_chain (thread_p, bufptr) != NO_ERROR)
7930  {
7931  return ER_FAILED;
7932  }
7933 
7934  /* If above function returns failure, the caller does not hold bufptr->mutex. Otherwise, the caller is
7935  * holding bufptr->mutex. */
7936 
7937  /* Now, the caller is holding bufptr->mutex. */
7938  /* bufptr->mutex will be released in following function. */
7939  pgbuf_put_bcb_into_invalid_list (thread_p, bufptr);
7940  }
7941  else
7942  {
7943  /* todo: what to do? */
7944  assert (false);
7945  bufptr->latch_mode = PGBUF_NO_LATCH;
7946  PGBUF_BCB_UNLOCK (bufptr);
7947  }
7948 
7949  return NO_ERROR;
7950 }
7951 
7952 /*
7953  * pgbuf_bcb_safe_flush_force_unlock () - safe-flush bcb and make sure it does not remain locked.
7954  *
7955  * return : error code
7956  * thread_p (in) : thread entry
7957  * bufptr (in) : bcb to flush
7958  * synchronous (in) : true if caller wants to wait for bcb to be flushed (if it cannot flush immediately it gets
7959  * blocked). if false, the caller will only request flush and continue.
7960  */
7961 static int
7962 pgbuf_bcb_safe_flush_force_unlock (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, bool synchronous)
7963 {
7964  int error_code = NO_ERROR;
7965  bool locked = true;
7966 
7967  error_code = pgbuf_bcb_safe_flush_internal (thread_p, bufptr, synchronous, &locked);
7968  if (locked)
7969  {
7970  PGBUF_BCB_UNLOCK (bufptr);
7971  }
7972  return error_code;
7973 }
7974 
7975 /*
7976  * pgbuf_bcb_safe_flush_force_lock () - safe-flush bcb and make sure it remains locked.
7977  *
7978  * return : error code
7979  * thread_p (in) : thread entry
7980  * bufptr (in) : bcb to flush
7981  * synchronous (in) : true if caller wants to wait for bcb to be flushed (if it cannot flush immediately it gets
7982  * blocked). if false, the caller will only request flush and continue.
7983  */
7984 static int
7985 pgbuf_bcb_safe_flush_force_lock (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, bool synchronous)
7986 {
7987  int error_code = NO_ERROR;
7988  bool locked = true;
7989 
7990  error_code = pgbuf_bcb_safe_flush_internal (thread_p, bufptr, synchronous, &locked);
7991  if (error_code != NO_ERROR)
7992  {
7993  if (locked)
7994  {
7995  PGBUF_BCB_UNLOCK (bufptr);
7996  }
7997  return error_code;
7998  }
7999  if (!locked)
8000  {
8001  PGBUF_BCB_LOCK (bufptr);
8002  }
8003  return NO_ERROR;
8004 }
8005 
8006 /*
8007  * pgbuf_bcb_safe_flush_internal () - safe-flush bcb. function will do all the necessary checks. flush is executed only
8008  * bcb is dirty. function is safe in regard with concurrent latches and flushes.
8009  *
8010  * return : error code
8011  * thread_p (in) : thread entry
8012  * bufptr (in) : bcb to flush
8013  * synchronous (in) : true if caller wants to wait for bcb to be flushed (if it cannot flush immediately it gets
8014  * blocked). if false, the caller will only request flush and continue.
8015  * locked (out) : output if bcb is locked.
8016  */
8017 static int
8018 pgbuf_bcb_safe_flush_internal (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, bool synchronous, bool * locked)
8019 {
8020  int error_code = NO_ERROR;
8021 
8022  assert (bufptr->latch_mode != PGBUF_LATCH_FLUSH);
8023 
8024  PGBUF_BCB_CHECK_OWN (bufptr);
8025  *locked = true;
8026 
8027  /* the caller is holding bufptr->mutex */
8028  if (!pgbuf_bcb_is_dirty (bufptr))
8029  {
8030  /* not dirty; flush is not required */
8031  return NO_ERROR;
8032  }
8033 
8034  /* there are two cases when we cannot flush immediately:
8035  * 1. page is write latched. we cannot know when the latcher makes modifications, so it is not safe to flush the page.
8036  * 2. another thread is already flushing. allowing multiple concurrent flushes is not safe (we cannot guarantee the
8037  * order of disk writing, therefore it is theoretically possible to write an old version over a newer version of
8038  * the page).
8039  *
8040  * for the first case, we use the PGBUF_BCB_ASYNC_FLUSH_REQ flag to request a flush from the thread holding latch.
8041  * for the second case, we know the bcb is already being flushed. if we need to be sure page is flushed, we'll put
8042  * ourselves in bcb's waiting list (and a thread doing flush should wake us).
8043  */
8044 
8045  if (!pgbuf_bcb_is_flushing (bufptr)
8046  && (bufptr->latch_mode == PGBUF_NO_LATCH || bufptr->latch_mode == PGBUF_LATCH_READ
8047  || (bufptr->latch_mode == PGBUF_LATCH_WRITE && pgbuf_find_thrd_holder (thread_p, bufptr) != NULL)))
8048  {
8049  /* don't have to wait for writer/flush */
8050  return pgbuf_bcb_flush_with_wal (thread_p, bufptr, false, locked);
8051  }
8052 
8053  /* page is write latched. notify the holder to flush it on unfix. */
8054  assert (pgbuf_bcb_is_flushing (bufptr) || bufptr->latch_mode == PGBUF_LATCH_WRITE);
8055  if (!pgbuf_bcb_is_flushing (bufptr))
8056  {
8057  pgbuf_bcb_update_flags (thread_p, bufptr, PGBUF_BCB_ASYNC_FLUSH_REQ, 0);
8058  }
8059 
8060  if (synchronous == true)
8061  {
8062  /* wait for bcb to be flushed. */
8063  *locked = false;
8064  error_code = pgbuf_block_bcb (thread_p, bufptr, PGBUF_LATCH_FLUSH, 0, false);
8065  if (error_code != NO_ERROR)
8066  {
8067  ASSERT_ERROR ();
8068  }
8069  return error_code;
8070  }
8071 
8072  /* don't wait for flush */
8073  return NO_ERROR;
8074 }
8075 
8076 /*
8077  * pgbuf_get_bcb_from_invalid_list () - Get BCB from buffer invalid list
8078  *
8079  * return: If success, a newly allocated BCB, otherwise NULL
8080  * thread_p (in) : thread entry
8081  *
8082  * Note: This function disconnects a BCB on the top of the buffer invalid list
8083  * and returns it. Before disconnection, the thread must hold the
8084  * invalid list mutex and after disconnection, release the mutex.
8085  */
8086 static PGBUF_BCB *
8088 {
8089  PGBUF_BCB *bufptr;
8090 #if defined(SERVER_MODE)
8091  int rv;
8092 #endif /* SERVER_MODE */
8093 
8094  /* check if invalid BCB list is empty (step 1) */
8095  if (pgbuf_Pool.buf_invalid_list.invalid_top == NULL)
8096  {
8097  return NULL;
8098  }
8099 
8100  rv = pthread_mutex_lock (&pgbuf_Pool.buf_invalid_list.invalid_mutex);
8101 
8102  /* check if invalid BCB list is empty (step 2) */
8103  if (pgbuf_Pool.buf_invalid_list.invalid_top == NULL)
8104  {
8105  /* invalid BCB list is empty */
8106  pthread_mutex_unlock (&pgbuf_Pool.buf_invalid_list.invalid_mutex);
8107  return NULL;
8108  }
8109  else
8110  {
8111  /* invalid BCB list is not empty */
8112  bufptr = pgbuf_Pool.buf_invalid_list.invalid_top;
8113  pgbuf_Pool.buf_invalid_list.invalid_top = bufptr->next_BCB;
8114  pgbuf_Pool.buf_invalid_list.invalid_cnt -= 1;
8115  pthread_mutex_unlock (&pgbuf_Pool.buf_invalid_list.invalid_mutex);
8116 
8117  PGBUF_BCB_LOCK (bufptr);
8118  bufptr->next_BCB = NULL;
8119  pgbuf_bcb_change_zone (thread_p, bufptr, 0, PGBUF_VOID_ZONE);
8120 
8122  return bufptr;
8123  }
8124 }
8125 
8126 /*
8127  * pgbuf_put_bcb_into_invalid_list () - Put BCB into buffer invalid list
8128  * return: NO_ERROR
8129  * bufptr(in):
8130  *
8131  * Note: This function connects BCB to the top of the buffer invalid list and
8132  * makes its zone PB_INVALIDZone. Before connection, must hold the
8133  * invalid list mutex and after connection, release the mutex.
8134  */
8135 static int
8137 {
8138 #if defined(SERVER_MODE)
8139  int rv;
8140 #endif /* SERVER_MODE */
8141 
8142  /* the caller is holding bufptr->mutex */
8143  VPID_SET_NULL (&bufptr->vpid);
8144  bufptr->latch_mode = PGBUF_LATCH_INVALID;
8145  assert ((bufptr->flags & PGBUF_BCB_FLAGS_MASK) == 0);
8146  pgbuf_bcb_change_zone (thread_p, bufptr, 0, PGBUF_INVALID_ZONE);
8148 
8149  rv = pthread_mutex_lock (&pgbuf_Pool.buf_invalid_list.invalid_mutex);
8150  bufptr->next_BCB = pgbuf_Pool.buf_invalid_list.invalid_top;
8151  pgbuf_Pool.buf_invalid_list.invalid_top = bufptr;
8152  pgbuf_Pool.buf_invalid_list.invalid_cnt += 1;
8153  PGBUF_BCB_UNLOCK (bufptr);
8154  pthread_mutex_unlock (&pgbuf_Pool.buf_invalid_list.invalid_mutex);
8155 
8156  return NO_ERROR;
8157 }
8158 
8159 /*
8160  * pgbuf_get_shared_lru_index_for_add () - get a shared index to add a new bcb. we'll use a round-robin way to choose
8161  * next list, but we'll avoid biggest list (just to keep things balanced).
8162  *
8163  * return : shared lru index
8164  */
8165 STATIC_INLINE int
8167 {
8168 #define PAGE_ADD_REFRESH_STAT \
8169  MAX (2 * pgbuf_Pool.num_buffers / PGBUF_SHARED_LRU_COUNT, 10000)
8170 
8171  int i;
8172  unsigned int lru_idx, refresh_stat_cnt;
8173 
8174  lru_idx = ATOMIC_INC_32 (&pgbuf_Pool.quota.add_shared_lru_idx, 1);
8175  refresh_stat_cnt = lru_idx % PAGE_ADD_REFRESH_STAT;
8176 
8177  /* check if there is an in-balance BCBs distribution across shared LRUs */
8178  if (refresh_stat_cnt == 0)
8179  {
8180  int shared_lru_bcb_sum;
8181  int max_bcb, min_bcb;
8182  int lru_idx_with_max;
8183  int this_lru_cnt;
8184  int curr_avoid_lru_idx;
8185 
8186  shared_lru_bcb_sum = 0;
8187  max_bcb = 0;
8188  min_bcb = pgbuf_Pool.num_buffers;
8189  lru_idx_with_max = -1;
8190  /* update unbalanced LRU idx */
8191  for (i = 0; i < PGBUF_SHARED_LRU_COUNT; i++)
8192  {
8193  this_lru_cnt = PGBUF_LRU_LIST_COUNT (PGBUF_GET_LRU_LIST (i));
8194  shared_lru_bcb_sum += this_lru_cnt;
8195 
8196  if (this_lru_cnt > max_bcb)
8197  {
8198  max_bcb = this_lru_cnt;
8199  lru_idx_with_max = i;
8200  }
8201 
8202  if (this_lru_cnt < min_bcb)
8203  {
8204  min_bcb = this_lru_cnt;
8205  }
8206  }
8207 
8208  if (shared_lru_bcb_sum > pgbuf_Pool.num_buffers / 10
8209  && (max_bcb > (int) (1.3f * shared_lru_bcb_sum) / PGBUF_SHARED_LRU_COUNT || max_bcb > 2 * min_bcb))
8210  {
8211  ATOMIC_TAS_32 (&pgbuf_Pool.quota.avoid_shared_lru_idx, lru_idx_with_max);
8212  }
8213  else
8214  {
8215  curr_avoid_lru_idx = pgbuf_Pool.quota.avoid_shared_lru_idx;
8216  if (curr_avoid_lru_idx == -1
8217  || (PGBUF_LRU_LIST_COUNT (PGBUF_GET_LRU_LIST (curr_avoid_lru_idx))
8218  < shared_lru_bcb_sum / PGBUF_SHARED_LRU_COUNT))
8219  {
8220  ATOMIC_TAS_32 (&pgbuf_Pool.quota.avoid_shared_lru_idx, -1);
8221  }
8222  }
8223  }
8224 
8225  lru_idx = lru_idx % PGBUF_SHARED_LRU_COUNT;
8226 
8227  /* avoid to add in shared LRU idx having too many BCBs */
8228  if (pgbuf_Pool.quota.avoid_shared_lru_idx == (int) lru_idx)
8229  {
8230  lru_idx = ATOMIC_INC_32 (&pgbuf_Pool.quota.add_shared_lru_idx, 1);
8231  lru_idx = lru_idx % PGBUF_SHARED_LRU_COUNT;
8232  }
8233 
8234  return lru_idx;
8235 #undef PAGE_ADD_REFRESH_STAT
8236 }
8237 
8238 /*
8239  * pgbuf_get_victim () - get a victim bcb from page buffer.
8240  *
8241  * return : victim candidate or NULL if no candidate was found
8242  * thread_p (in) : thread entry
8243  *
8244  * Note: If a victim BCB is found, this function will already lock it. This means that the caller will have exclusive
8245  * access to the returned BCB.
8246  */
8247 static PGBUF_BCB *
8249 {
8250 #define PERF(id) if (detailed_perf) perfmon_inc_stat (thread_p, id)
8251 
8252  PGBUF_BCB *victim = NULL;
8254  bool has_flush_thread = pgbuf_is_page_flush_daemon_available ();
8255  int nloops = 0; /* used as safe-guard against infinite loops */
8256  int private_lru_idx;
8257  PGBUF_LRU_LIST *lru_list = NULL;
8258  bool restrict_other = false;
8259  bool searched_own = false;
8260  UINT64 initial_consume_cursor, current_consume_cursor;
8261  PERF_UTIME_TRACKER perf_tracker = PERF_UTIME_TRACKER_INITIALIZER;
8262 
8263  ATOMIC_INC_32 (&pgbuf_Pool.monitor.lru_victim_req_cnt, 1);
8264 
8265  /* how this works:
8266  * we need to find a victim in one of all lru lists. we have two lru list types: private and shared. private are pages
8267  * fixed by a single transaction, while shared are pages fix by multiple transactions. we usually prioritize the
8268  * private lists.
8269  * the order we look for victimize is this:
8270  * 1. first search in own private list if it is not under quota.
8271  * 2. look in another private list.
8272  * 3. look in a shared list.
8273  *
8274  * normally, if the system does not lack victims, one of the three searches should provide a victim candidate.
8275  * however, we can be unlucky and not find a candidate with the three steps. this is especially possible when we have
8276  * only one active transaction, with long transactions, and many vacuum workers trying to catch up. all candidates
8277  * are found in a single private list, which means that many vacuum workers may not find the lists in lru queue.
8278  * for this case, we loop the three searches, as long as pgbuf_Pool.monitor.victim_rich is true.
8279  *
8280  * note: if quota is disabled (although this is not recommended), only shared lists are searched.
8281  *
8282  * note: if all above failed to produce a victim, we'll try to victimize from own private even if it is under quota.
8283  * we found a strange particular case when all private lists were on par with their quota's (but just below),
8284  * shared lists had no lru 3 zone and nothing could be victimized or flushed.
8285  */
8286 
8287  /* 1. search own private list */
8288  if (PGBUF_THREAD_HAS_PRIVATE_LRU (thread_p))
8289  {
8290  /* first try my own private list */
8291  private_lru_idx = PGBUF_LRU_INDEX_FROM_PRIVATE (PGBUF_PRIVATE_LRU_FROM_THREAD (thread_p));
8292  lru_list = PGBUF_GET_LRU_LIST (private_lru_idx);
8293 
8294  /* don't victimize from own list if it is under quota */
8296  || (PGBUF_LRU_LIST_IS_OVER_QUOTA (lru_list) && lru_list->count_vict_cand > 0))
8297  {
8298  if (detailed_perf)
8299  {
8300  PERF_UTIME_TRACKER_START (thread_p, &perf_tracker);
8301  }
8302  victim = pgbuf_get_victim_from_lru_list (thread_p, private_lru_idx);
8303  if (victim != NULL)
8304  {
8306  if (detailed_perf)
8307  {
8308  PERF_UTIME_TRACKER_TIME (thread_p, &perf_tracker, PSTAT_PB_VICTIM_SEARCH_OWN_PRIVATE_LISTS);
8309  }
8310  return victim;
8311  }
8312  /* failed */
8314  if (detailed_perf)
8315  {
8316  PERF_UTIME_TRACKER_TIME (thread_p, &perf_tracker, PSTAT_PB_VICTIM_SEARCH_OWN_PRIVATE_LISTS);
8317  }
8318 
8319  /* if over quota, we are not allowed to search in other lru lists. we'll wait for victim.
8320  * note: except vacuum threads who ignore unfixes and have no quota. */
8321  if (!PGBUF_THREAD_SHOULD_IGNORE_UNFIX (thread_p))
8322  {
8323  /* still, offer a chance to those that are just slightly over quota. this actually targets new
8324  * transactions that do not have a quota yet... let them get a few bcb's first until their activity
8325  * becomes relevant. */
8326  restrict_other = PGBUF_LRU_LIST_IS_OVER_QUOTA_WITH_BUFFER (lru_list);
8327  }
8328  searched_own = true;
8329  }
8330  }
8331 
8332  /* 2. search other private list.
8333  *
8334  * note: in single-thread context, the only list is mine. no point in trying to victimize again
8335  * note: if restrict_other is true, only other big private lists can be used for victimization
8336  */
8337  if (PGBUF_PAGE_QUOTA_IS_ENABLED && has_flush_thread)
8338  {
8339  if (detailed_perf)
8340  {
8341  PERF_UTIME_TRACKER_START (thread_p, &perf_tracker);
8342  }
8343  victim = pgbuf_lfcq_get_victim_from_private_lru (thread_p, restrict_other);
8344  if (victim != NULL)
8345  {
8346  if (detailed_perf)
8347  {
8348  PERF_UTIME_TRACKER_TIME (thread_p, &perf_tracker, PSTAT_PB_VICTIM_SEARCH_OTHERS_PRIVATE_LISTS);
8349  }
8350  return victim;
8351  }
8352  if (detailed_perf)
8353  {
8354  PERF_UTIME_TRACKER_TIME (thread_p, &perf_tracker, PSTAT_PB_VICTIM_SEARCH_OTHERS_PRIVATE_LISTS);
8355  }
8356  }
8357 
8358  /* loop:
8359  *
8360  * DOESN'T HAVE FLUSH THREAD: one iteration could fail, because the shared list's last victims have been set dirty.
8361  * however, if there are other lists having victims, we should find them.
8362  * it is possible to not have any victims, in which case the shared list queue should become empty. we'll have to do a
8363  * flush and search again.
8364  * we'd like to avoid looping infinitely (if there's a bug), so we use the nloops safe-guard. Each shared list should
8365  * be removed after a failed search, so the maximum accepted number of loops is pgbuf_Pool.num_LRU_list.
8366  */
8367 
8368  if (detailed_perf)
8369  {
8370  PERF_UTIME_TRACKER_START (thread_p, &perf_tracker);
8371  }
8372 
8373  initial_consume_cursor = pgbuf_Pool.shared_lrus_with_victims->get_consumer_cursor ();
8374  do
8375  {
8376  /* 3. search a shared list. */
8377  victim = pgbuf_lfcq_get_victim_from_shared_lru (thread_p, has_flush_thread);
8378  if (victim != NULL)
8379  {
8380  if (detailed_perf)
8381  {
8382  PERF_UTIME_TRACKER_TIME (thread_p, &perf_tracker, PSTAT_PB_VICTIM_SEARCH_SHARED_LISTS);
8383  }
8384  return victim;
8385  }
8386  current_consume_cursor = pgbuf_Pool.shared_lrus_with_victims->get_consumer_cursor ();
8387  }
8388  while (!has_flush_thread && !pgbuf_Pool.shared_lrus_with_victims->is_empty ()
8389  && ((int) (current_consume_cursor - initial_consume_cursor) <= pgbuf_Pool.num_LRU_list)
8390  && (++nloops <= pgbuf_Pool.num_LRU_list));
8391  /* todo: maybe we can find a less complicated condition of looping. Probably no need to use nloops <= pgbuf_Pool.num_LRU_list. */
8392  if (detailed_perf)
8393  {
8394  PERF_UTIME_TRACKER_TIME (thread_p, &perf_tracker, PSTAT_PB_VICTIM_SEARCH_SHARED_LISTS);
8395  }
8396 
8397  /* no victim found... */
8398  assert (victim == NULL);
8399 
8401 
8402  if (PGBUF_THREAD_HAS_PRIVATE_LRU (thread_p) && !searched_own)
8403  {
8404  /* try on own private even if it is under quota. */
8405  private_lru_idx = PGBUF_LRU_INDEX_FROM_PRIVATE (PGBUF_PRIVATE_LRU_FROM_THREAD (thread_p));
8406  lru_list = PGBUF_GET_LRU_LIST (private_lru_idx);
8407 
8408  victim = pgbuf_get_victim_from_lru_list (thread_p, private_lru_idx);
8409  if (victim != NULL)
8410  {
8412  return victim;
8413  }
8414  /* failed */
8415  if (detailed_perf)
8416  {
8418  }
8419  }
8420  assert (victim == NULL);
8421 
8422  return victim;
8423 
8424 #undef PERF
8425 }
8426 
8427 /*
8428  * pgbuf_is_bcb_fixed_by_any () - is page fixed by any thread?
8429  *
8430  * return : NO_ERROR
8431  * PGBUF_BCB * bcb (in) : bcb
8432  * has_mutex_lock (in) : true if current thread has lock on bcb
8433  *
8434  * note: if has_mutex_lock is true, even if bcb->latch_mode is not PGBUF_NO_LATCH, we consider this to be temporary.
8435  * this must be during pgbuf_unfix and latch_mode will be set to PGBUF_NO_LATCH before bcb mutex is released.
8436  */
8437 STATIC_INLINE bool
8438 pgbuf_is_bcb_fixed_by_any (PGBUF_BCB * bcb, bool has_mutex_lock)
8439 {
8440 #if defined (SERVER_MODE)
8441  if (has_mutex_lock)
8442  {
8443  PGBUF_BCB_CHECK_OWN (bcb);
8444  }
8445 
8446  /* note: sometimes, the next wait thread could only be threads waiting for flush. however, these are exceptional
8447  * cases. we'd rather miss a few good bcb's from time to time, rather than processing the waiting list for
8448  * every bcb. */
8449 
8450  return bcb->fcnt > 0 || bcb->next_wait_thrd != NULL || (!has_mutex_lock && bcb->latch_mode != PGBUF_NO_LATCH);
8451 #else /* !SERVER_MODE */
8452  return bcb->fcnt != 0;
8453 #endif /* !SERVER_MODE */
8454 }
8455 
8456 /*
8457  * pgbuf_is_bcb_victimizable () - check whether bcb can be victimized.
8458  *
8459  * return : true if bcb can be victimized, false otherwise
8460  * bcb (in) : bcb
8461  * has_mutex_lock (in) : true if bcb mutex is owned
8462  */
8463 STATIC_INLINE bool
8464 pgbuf_is_bcb_victimizable (PGBUF_BCB * bcb, bool has_mutex_lock)
8465 {
8466  /* must not be dirty */
8467  if (pgbuf_bcb_avoid_victim (bcb))
8468  {
8469  return false;
8470  }
8471 
8472 #if defined (SERVER_MODE)
8473  /* must not be fixed and must not have waiters. */
8474  if (pgbuf_is_bcb_fixed_by_any (bcb, has_mutex_lock))
8475  {
8476  return false;
8477  }
8478 #endif /* SERVER_MODE */
8479 
8480  /* valid */
8481  return true;
8482 }
8483 
8484 /*
8485  * pgbuf_get_victim_from_lru_list () - Get victim BCB from the bottom of LRU list
8486  * return: If success, BCB, otherwise NULL
8487  * lru_idx (in) : index of LRU list
8488  *
8489  * Note: This function disconnects BCB from the bottom of the LRU list and returns it if its fcnt == 0.
8490  * If its fcnt != 0, makes bufptr->PrevBCB bottom and retry.
8491  * While this processing, the caller must be the holder of the LRU list.
8492  */
8493 static PGBUF_BCB *
8494 pgbuf_get_victim_from_lru_list (THREAD_ENTRY * thread_p, const int lru_idx)
8495 {
8496 #define PERF(pstatid) if (perf_tracking) perfmon_inc_stat (thread_p, pstatid)
8497 #define MAX_DEPTH 1000
8498 
8499  PGBUF_BCB *bufptr;
8500  int found_victim_cnt = 0;
8501  int search_cnt = 0;
8502  int lru_victim_cnt = 0;
8503  PGBUF_LRU_LIST *lru_list;
8504  PGBUF_BCB *bufptr_victimizable = NULL;
8505  PGBUF_BCB *bufptr_start = NULL;
8506  PGBUF_BCB *victim_hint = NULL;
8507 
8509 
8510  lru_list = &pgbuf_Pool.buf_LRU_list[lru_idx];
8511 
8513 
8514  /* check if LRU list is empty */
8515  if (lru_list->count_vict_cand == 0)
8516  {
8518  return NULL;
8519  }
8520 
8521  pthread_mutex_lock (&lru_list->mutex);
8522  if (lru_list->bottom == NULL || !PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (lru_list->bottom))
8523  {
8524  /* no zone 3 */
8526  pthread_mutex_unlock (&lru_list->mutex);
8527  return NULL;
8528  }
8529 
8531  {
8532  /* first adjust lru1 zone */
8533  pgbuf_lru_adjust_zones (thread_p, lru_list, false);
8534  }
8535 
8536  /* search for non dirty bcb */
8537  lru_victim_cnt = lru_list->count_vict_cand;
8538  if (lru_victim_cnt <= 0)
8539  {
8540  /* no victims */
8542  assert (lru_victim_cnt == 0);
8543  pthread_mutex_unlock (&lru_list->mutex);
8544  return NULL;
8545  }
8546 
8547  if (!pgbuf_bcb_is_dirty (lru_list->bottom) && lru_list->victim_hint != lru_list->bottom)
8548  {
8549  /* update hint to bottom. sometimes it may be out of sync. */
8551  if (PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (lru_list->bottom))
8552  {
8553  (void) ATOMIC_TAS_ADDR (&lru_list->victim_hint, lru_list->bottom);
8554  }
8555  else
8556  {
8557  (void) ATOMIC_TAS_ADDR (&lru_list->victim_hint, (PGBUF_BCB *) NULL);
8558  }
8559  }
8560 
8561  /* we will search */
8562  found_victim_cnt = 0;
8563  bufptr_victimizable = NULL;
8564 
8565  /* start searching with victim hint */
8566  victim_hint = lru_list->victim_hint;
8567  if (victim_hint == NULL)
8568  {
8569  bufptr_start = lru_list->bottom;
8570  }
8571  else
8572  {
8573  bufptr_start = victim_hint;
8574  }
8575 
8576  for (bufptr = bufptr_start; bufptr != NULL && PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (bufptr) && search_cnt < MAX_DEPTH;
8577  bufptr = bufptr->prev_BCB, search_cnt++)
8578  {
8579  /* must not be any other case that invalidates a victim: is flushing, direct victim */
8580  if (pgbuf_bcb_avoid_victim (bufptr))
8581  {
8582  /* this bcb is not valid for victimization */
8583  continue;
8584  }
8585 
8586  /* must not be fixed */
8587  if (pgbuf_is_bcb_fixed_by_any (bufptr, false))
8588  {
8589  /* this bcb cannot be used now, but it is a valid victim candidate. maybe we should update victim hint */
8590  if (bufptr_victimizable == NULL)
8591  {
8592  bufptr_victimizable = bufptr;
8593 
8594  /* update hint if this is not bufptr_start and hint has not changed in the meantime. */
8595  if (bufptr_victimizable != victim_hint
8596  && ATOMIC_CAS_ADDR (&lru_list->victim_hint, victim_hint, bufptr_victimizable))
8597  {
8598  /* hint advanced */
8599  }
8600 
8601  assert (lru_list->victim_hint == NULL || PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (lru_list->victim_hint));
8602  }
8603 
8604  found_victim_cnt++;
8605  if (found_victim_cnt >= lru_victim_cnt)
8606  {
8607  /* early out: probably we won't find others */
8608  break;
8609  }
8610  continue;
8611  }
8612 
8613  /* a victim candidate. we need to lock its BCB, but since we have LRU mutex, we can only do it conditionally.
8614  * chances are we'll get the mutex. */
8615  if (PGBUF_BCB_TRYLOCK (bufptr) == 0)
8616  {
8617  if (pgbuf_is_bcb_victimizable (bufptr, true))
8618  {
8619  if (bufptr_victimizable == NULL)
8620  {
8621  /* try to update hint to bufptr->prev_BCB */
8622  pgbuf_lru_advance_victim_hint (thread_p, lru_list, victim_hint, bufptr->prev_BCB, false);
8623  }
8624  pgbuf_remove_from_lru_list (thread_p, bufptr, lru_list);
8625 
8626 #if defined (SERVER_MODE)
8627  /* todo: this is a hack */
8628  if (pgbuf_Pool.direct_victims.waiter_threads_low_priority->size ()
8629  >= (5 + (thread_num_total_threads () / 20)))
8630  {
8631  pgbuf_panic_assign_direct_victims_from_lru (thread_p, lru_list, bufptr->prev_BCB);
8632  }
8633 #endif /* SERVER_MODE */
8634 
8635  if (lru_list->bottom != NULL && pgbuf_bcb_is_dirty (lru_list->bottom)
8637  {
8638  /* new bottom is dirty... make sure that flush will wake up */
8639  pgbuf_wakeup_page_flush_daemon (thread_p);
8640  }
8641  pthread_mutex_unlock (&lru_list->mutex);
8642 
8643  pgbuf_add_vpid_to_aout_list (thread_p, &bufptr->vpid, lru_idx);
8644 
8645  return bufptr;
8646  }
8647  else
8648  {
8649  PGBUF_BCB_UNLOCK (bufptr);
8650  }
8651  }
8652  else
8653  {
8654  /* failed try lock in single-threaded? impossible */
8656 
8657  /* save the avoid victim bufptr. maybe it will be reset until we finish the search */
8658  if (bufptr_victimizable == NULL)
8659  {
8660  bufptr_victimizable = bufptr;
8661  /* try to replace victim if it was not already changed. */
8662  if (bufptr != victim_hint && ATOMIC_CAS_ADDR (&lru_list->victim_hint, victim_hint, bufptr_victimizable))
8663  {
8664  /* modified hint */
8665  }
8666 
8667  assert (lru_list->victim_hint == NULL || PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (lru_list->victim_hint));
8668  }
8669  found_victim_cnt++;
8670  if (found_victim_cnt >= lru_victim_cnt)
8671  {
8672  /* early out: probably we won't find others */
8673  break;
8674  }
8675  }
8676  }
8677 
8679  if (bufptr_victimizable == NULL && victim_hint != NULL)
8680  {
8681  /* we had a hint and we failed to find any victim candidates. */
8684  if (lru_list->count_vict_cand > 0 && PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (lru_list->bottom))
8685  {
8686  /* set victim hint to bottom */
8687  (void) ATOMIC_CAS_ADDR (&lru_list->victim_hint, victim_hint, lru_list->bottom);
8688  }
8689  else
8690  {
8691  /* no hint */
8692  (void) ATOMIC_CAS_ADDR (&lru_list->victim_hint, victim_hint, (PGBUF_BCB *) NULL);
8693  }
8694  }
8695 
8696  pthread_mutex_unlock (&lru_list->mutex);
8697 
8698  /* we need more victims */
8699  pgbuf_wakeup_page_flush_daemon (thread_p);
8700  /* failed finding victim in single-threaded, although the number of victim candidates is positive? impossible!
8701  * note: not really impossible. the thread may have the victimizable fixed. but bufptr_victimizable must not be
8702  * NULL. */
8703  assert (pgbuf_is_page_flush_daemon_available () || (bufptr_victimizable != NULL) || (search_cnt == MAX_DEPTH));
8704  return NULL;
8705 
8706 #undef PERF
8707 #undef MAX_DEPTH
8708 }
8709 
8710 #if defined (SERVER_MODE)
8711 /*
8712  * pgbuf_panic_assign_direct_victims_from_lru () - panic assign direct victims from lru.
8713  *
8714  * return : number of assigned victims.
8715  * thread_p (in) : thread entry
8716  * lru_list (in) : lru list
8717  * bcb_start (in) : starting bcb
8718  */
8719 static int
8720 pgbuf_panic_assign_direct_victims_from_lru (THREAD_ENTRY * thread_p, PGBUF_LRU_LIST * lru_list, PGBUF_BCB * bcb_start)
8721 {
8722 #define MAX_DEPTH 1000
8723  PGBUF_BCB *bcb = NULL;
8724  int n_assigned = 0;
8725  int count = 0;
8726 
8727  /* statistics shows not useful */
8728 
8729  if (bcb_start == NULL)
8730  {
8731  return 0;
8732  }
8733  assert (pgbuf_bcb_get_lru_index (bcb_start) == lru_list->index);
8734 
8735  /* panic victimization function */
8736 
8737  for (bcb = bcb_start;
8738  bcb != NULL && PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (bcb) && lru_list->count_vict_cand > 0 && count < MAX_DEPTH;
8739  bcb = bcb->prev_BCB, count++)
8740  {
8741  assert (pgbuf_bcb_get_lru_index (bcb) == lru_list->index);
8742  if (!pgbuf_is_bcb_victimizable (bcb, false))
8743  {
8744  continue;
8745  }
8746 
8747  /* lock mutex. just try. */
8748  if (PGBUF_BCB_TRYLOCK (bcb) != 0)
8749  {
8750  continue;
8751  }
8752  if (!pgbuf_is_bcb_victimizable (bcb, true))
8753  {
8754  PGBUF_BCB_UNLOCK (bcb);
8755  continue;
8756  }
8757  if (!pgbuf_assign_direct_victim (thread_p, bcb))
8758  {
8759  /* no more waiting threads */
8760  PGBUF_BCB_UNLOCK (bcb);
8761  break;
8762  }
8763  /* assigned directly */
8764  PGBUF_BCB_UNLOCK (bcb);
8766  {
8768  }
8769  n_assigned++;
8770  }
8771 
8772  return n_assigned;
8773 
8774 #undef MAX_DEPTH
8775 }
8776 
8777 /*
8778  * pgbuf_direct_victims_maintenance () - assign direct victims via searching. the purpose of function is to make sure a
8779  * victim is assigned even when system has low to no activity, which prevents
8780  * bcb's from being assigned to a waiting thread. basically, this is the backup
8781  * plan.
8782  *
8783  * return : void
8784  * thread_p (in) : thread entry
8785  */
8786 void
8787 pgbuf_direct_victims_maintenance (THREAD_ENTRY * thread_p)
8788 {
8789 #define DEFAULT_ASSIGNS_PER_ITERATION 5
8790  int nassigns = DEFAULT_ASSIGNS_PER_ITERATION;
8791  bool restarted;
8792  int index;
8793 
8794  /* note this is designed for single-threaded use only. the static values are used for pick lists with a round-robin
8795  * system */
8796  static int prv_index = 0;
8797  static int shr_index = 0;
8798 
8799  /* privates */
8800  for (index = prv_index, restarted = false;
8801  pgbuf_is_any_thread_waiting_for_direct_victim () && nassigns > 0 && index != prv_index && !restarted;
8802  (index == PGBUF_PRIVATE_LRU_COUNT - 1) ? index = 0, restarted = true : index++)
8803  {
8804  pgbuf_lfcq_assign_direct_victims (thread_p, PGBUF_LRU_INDEX_FROM_PRIVATE (index), &nassigns);
8805  }
8806  prv_index = index;
8807 
8808  /* shared */
8809  for (index = shr_index, restarted = false;
8810  pgbuf_is_any_thread_waiting_for_direct_victim () && nassigns > 0 && index != shr_index && !restarted;
8811  (index == PGBUF_SHARED_LRU_COUNT - 1) ? index = 0, restarted = true : index++)
8812  {
8813  pgbuf_lfcq_assign_direct_victims (thread_p, index, &nassigns);
8814  }
8815  shr_index = index;
8816 
8817 #undef DEFAULT_ASSIGNS_PER_ITERATION
8818 }
8819 
8820 /*
8821  * pgbuf_lfcq_assign_direct_victims () - get list from queue and assign victims directly.
8822  *
8823  * return : void
8824  * thread_p (in) : thread entry
8825  * lru_idx (in) : lru index
8826  * nassign_inout (in/out) : update the number of victims to assign
8827  */
8828 STATIC_INLINE void
8829 pgbuf_lfcq_assign_direct_victims (THREAD_ENTRY * thread_p, int lru_idx, int *nassign_inout)
8830 {
8831  PGBUF_LRU_LIST *lru_list;
8832  PGBUF_BCB *victim_hint = NULL;
8833  int nassigned = 0;
8834 
8835  lru_list = PGBUF_GET_LRU_LIST (lru_idx);
8836  if (lru_list->count_vict_cand > 0)
8837  {
8838  pthread_mutex_lock (&lru_list->mutex);
8839  victim_hint = lru_list->victim_hint;
8840  nassigned = pgbuf_panic_assign_direct_victims_from_lru (thread_p, lru_list, victim_hint);
8841  if (nassigned == 0 && lru_list->count_vict_cand > 0 && pgbuf_is_any_thread_waiting_for_direct_victim ())
8842  {
8843  /* maybe hint was bad? that's most likely case. reset the hint to bottom. */
8845  if (PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (lru_list->bottom))
8846  {
8847  (void) ATOMIC_CAS_ADDR (&lru_list->victim_hint, victim_hint, lru_list->bottom);
8848  }
8849  else
8850  {
8851  (void) ATOMIC_CAS_ADDR (&lru_list->victim_hint, victim_hint, (PGBUF_BCB *) NULL);
8852  }
8853 
8854  /* check from bottom anyway */
8855  nassigned = pgbuf_panic_assign_direct_victims_from_lru (thread_p, lru_list, lru_list->bottom);
8856  }
8857  pthread_mutex_unlock (&lru_list->mutex);
8858 
8859  (*nassign_inout) -= nassigned;
8860  }
8861 }
8862 #endif /* SERVER_MODE */
8863 
8864 /*
8865  * pgbuf_lru_add_bcb_to_top () - add a bcb to lru list top
8866  *
8867  * return : void
8868  * thread_p (in) : thread entry
8869  * bcb (in) : bcb added to top
8870  * lru_list (in) : lru list
8871  */
8872 STATIC_INLINE void
8874 {
8875  /* there will be no previous BCB */
8876  bcb->prev_BCB = NULL;
8877 
8878  /* next bcb is current top */
8879  bcb->next_BCB = lru_list->top;
8880 
8881  /* is list empty? */
8882  if (lru_list->top == NULL)
8883  {
8884  /* yeah. bottom should also be NULL */
8885  assert (lru_list->bottom == NULL);
8886  /* bcb is top and bottom of list */
8887  lru_list->bottom = bcb;
8888  }
8889  else
8890  {
8891  /* update previous top link and change top */
8892  lru_list->top->prev_BCB = bcb;
8893  }
8894  /* we have new top */
8895  lru_list->top = bcb;
8896 
8897  if (lru_list->bottom_1 == NULL)
8898  {
8899  /* empty lru 1 zone */
8900  assert (lru_list->count_lru1 == 0);
8901  /* set middle to this bcb */
8902  lru_list->bottom_1 = bcb;
8903  }
8904 
8905  /* increment list tick when adding to top */
8906  if (++lru_list->tick_list >= DB_INT32_MAX)
8907  {
8908  lru_list->tick_list = 0;
8909  }
8910 
8911  pgbuf_bcb_change_zone (thread_p, bcb, lru_list->index, PGBUF_LRU_1_ZONE);
8912 }
8913 
8914 /*
8915  * pgbuf_lru_add_bcb_to_middle () - add a bcb to lru list middle
8916  *
8917  * return : void
8918  * thread_p (in) : thread entry
8919  * bcb (in) : bcb added to middle
8920  * lru_list (in) : lru list
8921  */
8922 STATIC_INLINE void
8924 {
8925  /* is lru 1 zone empty? */
8926  if (lru_list->bottom_1 == NULL)
8927  {
8928  /* yes, zone 1 is empty */
8929  /* is list empty? */
8930  if (lru_list->top == NULL)
8931  {
8932  /* yes, list is empty. set top and bottom to this bcb. */
8933  assert (lru_list->bottom == NULL);
8934  lru_list->top = bcb;
8935  lru_list->bottom = bcb;
8936 
8937  /* null prev/next links */
8938  bcb->prev_BCB = NULL;
8939  bcb->next_BCB = NULL;
8940  }
8941  else
8942  {
8943  /* no. we should add the bcb before top. */
8944  assert (pgbuf_bcb_get_zone (lru_list->top) != PGBUF_LRU_1_ZONE);
8945  assert (lru_list->bottom != NULL);
8946 
8947  /* link current top with new bcb */
8948  lru_list->top->prev_BCB = bcb;
8949  bcb->next_BCB = lru_list->top;
8950 
8951  /* no previous bcb's */
8952  bcb->prev_BCB = NULL;
8953 
8954  /* update top */
8955  lru_list->top = bcb;
8956  }
8957  }
8958  else
8959  {
8960  /* no, zone 1 is not empty */
8961  PGBUF_BCB *bcb_next = lru_list->bottom_1->next_BCB;
8962 
8963  assert (lru_list->top != NULL);
8964  assert (lru_list->bottom != NULL);
8965 
8966  /* insert after middle */
8967  lru_list->bottom_1->next_BCB = bcb;
8968  bcb->prev_BCB = lru_list->bottom_1;
8969 
8970  /* and before bcb_next */
8971  bcb->next_BCB = bcb_next;
8972  /* are zones 2/3 empty? */
8973  if (bcb_next == NULL)
8974  {
8975  /* yes. */
8976  /* middle must be also bottom */
8977  assert (lru_list->bottom == lru_list->bottom_1);
8978 
8979  /* update bottom */
8980  lru_list->bottom = bcb;
8981  }
8982  else
8983  {
8984  bcb_next->prev_BCB = bcb;
8985  }
8986  }
8987  if (lru_list->bottom_2 == NULL)
8988  {
8989  assert (lru_list->count_lru2 == 0);
8990  lru_list->bottom_2 = bcb;
8991  }
8992 
8993  /* save and increment list tick */
8994  if (++lru_list->tick_list >= DB_INT32_MAX)
8995  {
8996  lru_list->tick_list = 0;
8997  }
8998 
8999  pgbuf_bcb_change_zone (thread_p, bcb, lru_list->index, PGBUF_LRU_2_ZONE);
9000 }
9001 
9002 /*
9003  * pgbuf_lru_add_bcb_to_bottom () - add a bcb to lru list bottom
9004  *
9005  * return : void
9006  * thread_p (in) : thread entry
9007  * bcb (in) : bcb added to bottom
9008  * lru_list (in) : lru list
9009  */
9010 STATIC_INLINE void
9012 {
9013  /* is list empty? */
9014  if (lru_list->bottom == NULL)
9015  {
9016  /* yes, list is empty. top must be NULL */
9017  assert (lru_list->top == NULL);
9018 
9019  /* update bottom and top */
9020  lru_list->bottom = bcb;
9021  lru_list->top = bcb;
9022  bcb->prev_BCB = NULL;
9023  bcb->next_BCB = NULL;
9024 
9025  /* get tick_lru3 */
9026  bcb->tick_lru3 = lru_list->tick_lru3 - 1;
9027  }
9028  else
9029  {
9030  /* no, list is not empty. added after current bottom. */
9031  lru_list->bottom->next_BCB = bcb;
9032  bcb->prev_BCB = lru_list->bottom;
9033  bcb->next_BCB = NULL;
9034 
9035  /* set tick_lru3 smaller that current bottom's */
9036  bcb->tick_lru3 =
9037  PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (lru_list->bottom) ? lru_list->bottom->tick_lru3 - 1 : lru_list->tick_lru3 - 1;
9038 
9039  /* update bottom */
9040  lru_list->bottom = bcb;
9041  }
9042  /* make sure tick_lru3 is not negative */
9043  if (bcb->tick_lru3 < 0)
9044  {
9045  bcb->tick_lru3 += DB_INT32_MAX;
9046  }
9047 
9048  pgbuf_bcb_change_zone (thread_p, bcb, lru_list->index, PGBUF_LRU_3_ZONE);
9049 }
9050 
9051 /*
9052  * pgbuf_lru_adjust_zone1 () - adjust zone 1 of lru list
9053  *
9054  * return : void
9055  * thread_p (in) : thread entry
9056  * lru_list (in) : lru list
9057  * min_one (in) : true to stop to at least one entry.
9058  */
9059 STATIC_INLINE void
9060 pgbuf_lru_adjust_zone1 (THREAD_ENTRY * thread_p, PGBUF_LRU_LIST * lru_list, bool min_one)
9061 {
9062  int threshold;
9063  PGBUF_BCB *bcb_bottom;
9064 
9065  threshold = lru_list->threshold_lru1;
9066  if (min_one)
9067  {
9068  threshold = MAX (1, threshold);
9069  }
9070  if (threshold >= lru_list->count_lru1)
9071  {
9072  /* no adjustments can be made */
9073  return;
9074  }
9075 
9076  assert (lru_list->count_lru1 > 0);
9077  assert (lru_list->bottom_1 != NULL);
9078 
9079  /* change bcb zones from 1 to 2 until lru 1 zone count is down to zone 1 desired threshold.
9080  * note: if zone 1 desired threshold is bigger, its bottom is not moved. */
9081  if (lru_list->bottom_2 == NULL)
9082  {
9083  /* bottom 1 will become bottom 2. */
9084  lru_list->bottom_2 = lru_list->bottom_1;
9085  }
9086 
9087  for (bcb_bottom = lru_list->bottom_1; threshold < lru_list->count_lru1; bcb_bottom = bcb_bottom->prev_BCB)
9088  {
9089  pgbuf_bcb_change_zone (thread_p, bcb_bottom, lru_list->index, PGBUF_LRU_2_ZONE);
9090  }
9091 
9092  /* update bottom of lru 1 */
9093  if (lru_list->count_lru1 == 0)
9094  {
9095  lru_list->bottom_1 = NULL;
9096  }
9097  else
9098  {
9099  assert (bcb_bottom != NULL && pgbuf_bcb_get_zone (bcb_bottom) == PGBUF_LRU_1_ZONE);
9100  lru_list->bottom_1 = bcb_bottom;
9101  }
9102 }
9103 
9104 /*
9105  * pgbuf_lru_adjust_zone2 () - adjust zone 2 of lru list based on desired threshold.
9106  *
9107  * return : void
9108  * thread_p (in) : thread entry
9109  * lru_list (in) : lru list
9110  * min_one (in) : true to stop to at least one entry.
9111  */
9112 STATIC_INLINE void
9113 pgbuf_lru_adjust_zone2 (THREAD_ENTRY * thread_p, PGBUF_LRU_LIST * lru_list, bool min_one)
9114 {
9115  PGBUF_BCB *bcb_bottom;
9116  PGBUF_BCB *bcb_prev;
9117  int threshold;
9118 
9119  threshold = lru_list->threshold_lru2;
9120  if (min_one)
9121  {
9122  threshold = MAX (1, threshold);
9123  }
9124  if (threshold >= lru_list->count_lru2)
9125  {
9126  /* no adjustments can be made */
9127  return;
9128  }
9129 
9130  assert (lru_list->count_lru2 > 0);
9131  assert (lru_list->bottom_2 != NULL);
9133 
9134  /* change bcb zones from 2 to 3 until lru 2 zone count is down to zone 2 desired threshold. */
9135  for (bcb_bottom = lru_list->bottom_2; threshold < lru_list->count_lru2; bcb_bottom = bcb_prev)
9136  {
9137  /* save prev BCB in case this is removed from list */
9138  bcb_prev = bcb_bottom->prev_BCB;
9139  assert (bcb_bottom != NULL && pgbuf_bcb_get_zone (bcb_bottom) == PGBUF_LRU_2_ZONE);
9140  pgbuf_lru_fall_bcb_to_zone_3 (thread_p, bcb_bottom, lru_list);
9141  }
9142  /* update bottom of lru 2 */
9143  if (lru_list->count_lru2 == 0)
9144  {
9145  lru_list->bottom_2 = NULL;
9146  }
9147  else
9148  {
9149  assert (bcb_bottom != NULL && pgbuf_bcb_get_zone (bcb_bottom) == PGBUF_LRU_2_ZONE);
9150  lru_list->bottom_2 = bcb_bottom;
9151  }
9152 }
9153 
9154 /*
9155  * pgbuf_lru_adjust_zones () - adjust the middle of lru list and update bcb zones
9156  *
9157  * return : void
9158  * thread_p (in) : thread entry
9159  * lru_list (in) : lru list
9160  * min_one (in) : true to keep at least one entry in 1&2 zones.
9161  */
9162 STATIC_INLINE void
9163 pgbuf_lru_adjust_zones (THREAD_ENTRY * thread_p, PGBUF_LRU_LIST * lru_list, bool min_one)
9164 {
9165  PGBUF_BCB *bcb_bottom;
9166  PGBUF_BCB *bcb_prev;
9167  int threshold;
9168 
9169  /* first adjust zone 1 & 2 and convert to zone 3. then we'll adjust zone 1 (and convert to 2) */
9170  threshold = lru_list->threshold_lru1 + lru_list->threshold_lru2;
9171  if (min_one)
9172  {
9173  threshold = MAX (1, threshold);
9174  }
9175  if (threshold >= PGBUF_LRU_ZONE_ONE_TWO_COUNT (lru_list))
9176  {
9177  /* just try to adjust zone 1. */
9178  pgbuf_lru_adjust_zone1 (thread_p, lru_list, min_one);
9179  return;
9180  }
9181 
9182  assert (PGBUF_LRU_ZONE_ONE_TWO_COUNT (lru_list) > 0);
9183  assert (lru_list->bottom_1 != NULL || lru_list->bottom_2 != NULL);
9184 
9185  for (bcb_bottom = lru_list->bottom_2 != NULL ? lru_list->bottom_2 : lru_list->bottom_1;
9186  threshold < PGBUF_LRU_ZONE_ONE_TWO_COUNT (lru_list); bcb_bottom = bcb_prev)
9187  {
9188  /* save prev BCB in case this is removed from list */
9189  bcb_prev = bcb_bottom->prev_BCB;
9190 
9191  assert (bcb_bottom != NULL && pgbuf_bcb_get_zone (bcb_bottom) != PGBUF_LRU_3_ZONE);
9192 
9193  pgbuf_lru_fall_bcb_to_zone_3 (thread_p, bcb_bottom, lru_list);
9194  }
9195 
9196  if (lru_list->count_lru2 == 0)
9197  {
9198  lru_list->bottom_2 = NULL;
9199  if (lru_list->count_lru1 == 0)
9200  {
9201  lru_list->bottom_1 = NULL;
9202  }
9203  else
9204  {
9205  assert (bcb_bottom != NULL && pgbuf_bcb_get_zone (bcb_bottom) == PGBUF_LRU_1_ZONE);
9206  lru_list->bottom_1 = bcb_bottom;
9207  }
9208  }
9209  else
9210  {
9211  assert (bcb_bottom != NULL && pgbuf_bcb_get_zone (bcb_bottom) == PGBUF_LRU_2_ZONE);
9212  lru_list->bottom_2 = bcb_bottom;
9213  }
9214 
9215  pgbuf_lru_sanity_check (lru_list);
9216 
9217  pgbuf_lru_adjust_zone1 (thread_p, lru_list, min_one);
9218 }
9219 
9220 /*
9221  * pgbuf_lru_fall_bcb_to_zone_3 () - bcb falls to zone 3 of lru list
9222  *
9223  * return : void
9224  * thread_p (in) : thread entry
9225  * bcb (in) : bcb in lru list
9226  * lru_list (in) : lru list
9227  */
9228 STATIC_INLINE void
9230 {
9232 
9233 #if defined (SERVER_MODE)
9234  /* can we assign this directly as victim? */
9235 
9236  if (pgbuf_is_bcb_victimizable (bcb, false) && pgbuf_is_any_thread_waiting_for_direct_victim ())
9237  {
9238  if (pgbuf_bcb_is_to_vacuum (bcb))
9239  {
9241  {
9243  }
9244  /* fall through */
9245  }
9246  else
9247  {
9248  /* we first need mutex on bcb. however, we'd normally first get mutex on bcb and then on list. since we don't
9249  * want to over complicate things, just try a conditional lock on mutex. if it fails, we'll just give up
9250  * assigning the bcb directly as victim */
9251  if (PGBUF_BCB_TRYLOCK (bcb) == 0)
9252  {
9253  VPID vpid_copy = bcb->vpid;
9254  if (pgbuf_is_bcb_victimizable (bcb, true) && pgbuf_assign_direct_victim (thread_p, bcb))
9255  {
9257  {
9259  }
9260 
9261  /* since bcb is going to be removed from list and I have both lru and bcb mutex, why not do it now. */
9262  pgbuf_remove_from_lru_list (thread_p, bcb, lru_list);
9263 
9264  PGBUF_BCB_UNLOCK (bcb);
9265 
9266  pgbuf_add_vpid_to_aout_list (thread_p, &vpid_copy, lru_list->index);
9267  return;
9268  }
9269  /* not assigned. unlock bcb mutex and fall through */
9270  PGBUF_BCB_UNLOCK (bcb);
9271  }
9272  else
9273  {
9274  /* don't try too hard. it will be victimized eventually. */
9275  /* fall through */
9276  }
9277  }
9278  }
9279  /* not assigned directly */
9280 #endif /* SERVER_MODE */
9281 
9282  /* tick_lru3 */
9283  bcb->tick_lru3 = lru_list->tick_lru3;
9284  if (++lru_list->tick_lru3 >= DB_INT32_MAX)
9285  {
9286  lru_list->tick_lru3 = 0;
9287  }
9288  pgbuf_bcb_change_zone (thread_p, bcb, lru_list->index, PGBUF_LRU_3_ZONE);
9289 }
9290 
9291 /*
9292  * pgbuf_lru_boost_bcb () - boost bcb.
9293  *
9294  * return : void
9295  * thread_p (in) : thread entry
9296  * bcb (in) : bcb to move to top
9297  */
9298 static void
9300 {
9301  PGBUF_LRU_LIST *lru_list;
9302  PGBUF_ZONE zone = pgbuf_bcb_get_zone (bcb);
9303  bool is_private;
9304 
9305  assert (PGBUF_IS_BCB_IN_LRU (bcb));
9306 
9307  lru_list = pgbuf_lru_list_from_bcb (bcb);
9308  is_private = PGBUF_IS_PRIVATE_LRU_INDEX (lru_list->index);
9309 
9310  /* rules to boosting bcb's in lru lists (also see code in pgbuf_unlatch_bcb_upon_unfix):
9311  * 1. never boost bcb's in zone 1. this is usually the hottest part of the lists and should have a big hit ratio.
9312  * we'd like to avoid locking list mutex and making changes, these bcb's are in no danger of being victimized,
9313  * so we just don't move them.
9314  * 2. avoid boosting new and cold bcb's. a bcb can be fixed/unfixed several times and still be cold. many operations
9315  * will fix a page at least twice (once to read and once to write), and we'd like to avoid boosting the bcb on
9316  * second unfix. we do have a trick to detect such cases. we keep the list tick whenever new bcb's are inserted
9317  * to zones 1 and 2. if a page is quickly fixed several times, its "age" is really small (age being the difference
9318  * between the bcb's saved tick and current list tick), and we don't boost it. it should be unfixed again after
9319  * aging a little before being boosted to top.
9320  * 3. always boost from third zone, since these are decently old.
9321  *
9322  * note: early outs should be handled in pgbuf_unlatch_bcb_upon_unfix.
9323  */
9324 
9325  assert (zone != PGBUF_LRU_1_ZONE);
9326 
9327  /* we'll boost. collect stats */
9328  if (zone == PGBUF_LRU_2_ZONE)
9329  {
9331  }
9332  else
9333  {
9334  assert (zone == PGBUF_LRU_3_ZONE);
9335  perfmon_inc_stat (thread_p,
9337  }
9338 
9339  /* lock list */
9340  pthread_mutex_lock (&lru_list->mutex);
9341 
9342  /* remove from current position */
9343  pgbuf_remove_from_lru_list (thread_p, bcb, lru_list);
9344 
9345  /* add to top */
9346  pgbuf_lru_add_bcb_to_top (thread_p, bcb, lru_list);
9347 
9348  /* since we added a new bcb to lru 1, we should adjust zones */
9349  if (zone == PGBUF_LRU_2_ZONE)
9350  {
9351  /* adjust only zone 1 */
9352  pgbuf_lru_adjust_zone1 (thread_p, lru_list, true);
9353  }
9354  else
9355  {
9356  pgbuf_lru_adjust_zones (thread_p, lru_list, true);
9357  }
9358 
9359  pgbuf_lru_sanity_check (lru_list);
9360 
9361  /* unlock list */
9362  pthread_mutex_unlock (&lru_list->mutex);
9363 }
9364 
9365 /*
9366  * pgbuf_lru_add_new_bcb_to_top () - add a new bcb to top of lru list
9367  *
9368  * return : void
9369  * thread_p (in) : thread entry
9370  * bcb (in) : new bcb
9371  * lru_idx (in) : lru list index
9372  */
9373 STATIC_INLINE void
9374 pgbuf_lru_add_new_bcb_to_top (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, int lru_idx)
9375 {
9376  PGBUF_LRU_LIST *lru_list;
9377 
9378  /* this is not meant for changes in this list */
9379  assert (!PGBUF_IS_BCB_IN_LRU (bcb));
9380 
9381  /* lock list */
9382  lru_list = &pgbuf_Pool.buf_LRU_list[lru_idx];
9383  pthread_mutex_lock (&lru_list->mutex);
9384 
9385  /* add to top */
9386  /* this is new bcb, we must init its list tick */
9387  bcb->tick_lru_list = lru_list->tick_list;
9388  pgbuf_lru_add_bcb_to_top (thread_p, bcb, lru_list);
9389 
9390  pgbuf_lru_sanity_check (lru_list);
9391 
9392  /* since we added a new bcb to lru 1, we should adjust zones */
9393  pgbuf_lru_adjust_zones (thread_p, lru_list, true);
9394 
9395  pgbuf_lru_sanity_check (lru_list);
9396 
9397  /* unlock list */
9398  pthread_mutex_unlock (&lru_list->mutex);
9399 }
9400 
9401 /*
9402  * pgbuf_lru_add_new_bcb_to_middle () - add a new bcb to middle of lru list
9403  *
9404  * return : void
9405  * thread_p (in) : thread entry
9406  * bcb (in) : new bcb
9407  * lru_idx (in) : lru list index
9408  */
9409 STATIC_INLINE void
9411 {
9412  PGBUF_LRU_LIST *lru_list;
9413 
9414  /* this is not meant for changes in this list */
9415  assert (!PGBUF_IS_BCB_IN_LRU (bcb));
9416 
9417  lru_list = &pgbuf_Pool.buf_LRU_list[lru_idx];
9418  pthread_mutex_lock (&lru_list->mutex);
9419 
9420  bcb->tick_lru_list = lru_list->tick_list;
9421  pgbuf_lru_add_bcb_to_middle (thread_p, bcb, lru_list);
9422 
9423  pgbuf_lru_sanity_check (lru_list);
9424 
9425  /* adjust zone 2 */
9426  pgbuf_lru_adjust_zone2 (thread_p, lru_list, true);
9427 
9428  pgbuf_lru_sanity_check (lru_list);
9429 
9430  pthread_mutex_unlock (&lru_list->mutex);
9431 }
9432 
9433 /*
9434  * pgbuf_lru_add_new_bcb_to_bottom () - add a new bcb to bottom of lru list
9435  *
9436  * return : void
9437  * thread_p (in) : thread entry
9438  * bcb (in) : new bcb
9439  * lru_idx (in) : lru list index
9440  */
9441 STATIC_INLINE void
9443 {
9444  PGBUF_LRU_LIST *lru_list;
9445 
9446  /* this is not meant for changes in this list */
9447  assert (!PGBUF_IS_BCB_IN_LRU (bcb));
9448 
9449  if (pgbuf_is_bcb_victimizable (bcb, true) && pgbuf_assign_direct_victim (thread_p, bcb))
9450  {
9451  /* assigned directly */
9452  /* TODO: add stat. this is actually not used for now. */
9453  return;
9454  }
9455 
9456  /* lock list */
9457  lru_list = &pgbuf_Pool.buf_LRU_list[lru_idx];
9458  pthread_mutex_lock (&lru_list->mutex);
9459 
9460  bcb->tick_lru_list = lru_list->tick_list;
9461  pgbuf_lru_add_bcb_to_bottom (thread_p, bcb, lru_list);
9462 
9463  pgbuf_lru_sanity_check (lru_list);
9464 
9465  /* unlock list */
9466  pthread_mutex_unlock (&lru_list->mutex);
9467 }
9468 
9469 /*
9470  * pgbuf_lru_remove_bcb () - remove bcb from lru list
9471  *
9472  * return : void
9473  * thread_p (in) : thread entry
9474  * bcb (in) : bcb
9475  */
9476 STATIC_INLINE void
9478 {
9479  PGBUF_LRU_LIST *lru_list;
9480 
9481  assert (PGBUF_IS_BCB_IN_LRU (bcb));
9482 
9483  lru_list = pgbuf_lru_list_from_bcb (bcb);
9484 
9485  /* lock list */
9486  pthread_mutex_lock (&lru_list->mutex);
9487 
9488  /* remove bcb from list */
9489  pgbuf_remove_from_lru_list (thread_p, bcb, lru_list);
9490 
9491  pgbuf_lru_sanity_check (lru_list);
9492 
9493  /* unlock list */
9494  pthread_mutex_unlock (&lru_list->mutex);
9495 }
9496 
9497 /*
9498  * pgbuf_lru_move_from_private_to_shared () - move a bcb from private list to shared list
9499  *
9500  * return : void
9501  * thread_p (in) : thread entry
9502  * bcb (in) : private list bcb
9503  */
9504 static void
9506 {
9507  /* bcb must be in private list */
9509 
9510  /* note: from statistics analysis, moves from private to shared are very rare, so we don't inline the function */
9511 
9512  /* remove bcb from its lru list */
9513  pgbuf_lru_remove_bcb (thread_p, bcb);
9514 
9515  /* add bcb to middle of shared list */
9517 
9519 }
9520 
9521 /*
9522  * pgbuf_remove_from_lru_list () - Remove a BCB from the LRU list
9523  * return : void
9524  * bufptr (in) : BCB
9525  * lru_list (in) : LRU list to which BVB currently belongs to
9526  *
9527  * Note: The caller MUST hold the LRU list mutex.
9528  */
9529 STATIC_INLINE void
9531 {
9532  PGBUF_BCB *bcb_prev = NULL;
9533 
9534  if (lru_list->top == bufptr)
9535  {
9536  lru_list->top = bufptr->next_BCB;
9537  }
9538 
9539  if (lru_list->bottom == bufptr)
9540  {
9541  lru_list->bottom = bufptr->prev_BCB;
9542  }
9543 
9544  if (lru_list->bottom_1 == bufptr)
9545  {
9546  lru_list->bottom_1 = bufptr->prev_BCB;
9547  }
9548 
9549  if (lru_list->bottom_2 == bufptr)
9550  {
9551  if (bufptr->prev_BCB != NULL && pgbuf_bcb_get_zone (bufptr->prev_BCB) == PGBUF_LRU_2_ZONE)
9552  {
9553  lru_list->bottom_2 = bufptr->prev_BCB;
9554  }
9555  else
9556  {
9557  assert (lru_list->count_lru2 == 1);
9558  lru_list->bottom_2 = NULL;
9559  }
9560  }
9561 
9562  if (bufptr->next_BCB != NULL)
9563  {
9564  (bufptr->next_BCB)->prev_BCB = bufptr->prev_BCB;
9565  }
9566 
9567  bcb_prev = bufptr->prev_BCB;
9568  if (bcb_prev != NULL)
9569  {
9570  bcb_prev->next_BCB = bufptr->next_BCB;
9571  }
9572 
9573  bufptr->prev_BCB = NULL;
9574  bufptr->next_BCB = NULL;
9575 
9576  /* we need to update the victim hint now, since bcb has been disconnected from list.
9577  * pgbuf_lru_remove_victim_candidate will not which is the previous BCB. we cannot change the hint before
9578  * disconnecting the bcb from list, we need to be sure no one else sets the hint to this bcb. */
9579  pgbuf_lru_advance_victim_hint (thread_p, lru_list, bufptr, bcb_prev, false);
9580 
9581  /* update zone */
9582  pgbuf_bcb_change_zone (thread_p, bufptr, 0, PGBUF_VOID_ZONE);
9583 }
9584 
9585 /*
9586  * pgbuf_move_bcb_to_bottom_lru () - move a bcb to the bottom of its lru (or other lru if it is in the void zone).
9587  *
9588  * return : void
9589  * thread_p (in) : thread entry
9590  * bcb (in) : bcb
9591  */
9592 static void
9594 {
9595  PGBUF_ZONE zone = pgbuf_bcb_get_zone (bcb);
9596  int lru_idx;
9597  PGBUF_LRU_LIST *lru_list;
9598 
9600 
9601  if (zone == PGBUF_VOID_ZONE)
9602  {
9603  /* move to the bottom of a lru list so it can be found by flush thread */
9604  if (PGBUF_THREAD_HAS_PRIVATE_LRU (thread_p))
9605  {
9607  }
9608  else
9609  {
9611  }
9612  pgbuf_lru_add_new_bcb_to_bottom (thread_p, bcb, lru_idx);
9613  }
9614  else if (zone & PGBUF_LRU_ZONE_MASK)
9615  {
9616  lru_idx = pgbuf_bcb_get_lru_index (bcb);
9617  lru_list = PGBUF_GET_LRU_LIST (lru_idx);
9618  if (bcb == lru_list->bottom)
9619  {
9620  /* early out */
9621  return;
9622  }
9623  pthread_mutex_lock (&lru_list->mutex);
9624  pgbuf_remove_from_lru_list (thread_p, bcb, lru_list);
9625  pgbuf_lru_add_bcb_to_bottom (thread_p, bcb, lru_list);
9626  pthread_mutex_unlock (&lru_list->mutex);
9627  }
9628  else
9629  {
9630  assert (false);
9631  }
9632 }
9633 
9634 /*
9635  * pgbuf_add_vpid_to_aout_list () - add VPID to Aout list
9636  * return : void
9637  * thread_p (in) :
9638  * vpid (in) :
9639  * lru_idx (in) : LRU index in which the VPID had been
9640  */
9641 STATIC_INLINE void
9642 pgbuf_add_vpid_to_aout_list (THREAD_ENTRY * thread_p, const VPID * vpid, const int lru_idx)
9643 {
9644 #if defined(SERVER_MODE)
9645  int rv;
9646 #endif /* SERVER_MODE */
9647  PGBUF_AOUT_LIST *list;
9648  PGBUF_AOUT_BUF *aout_buf;
9649  int hash_idx = 0;
9650 
9651  if (pgbuf_Pool.buf_AOUT_list.max_count <= 0)
9652  {
9653  return;
9654  }
9655 
9656  assert (!VPID_ISNULL (vpid));
9657 
9658  list = &pgbuf_Pool.buf_AOUT_list;
9659 
9660  rv = pthread_mutex_lock (&pgbuf_Pool.buf_AOUT_list.Aout_mutex);
9661 
9662  if (list->Aout_free == NULL)
9663  {
9664  assert (list->Aout_bottom != NULL);
9665  /* disconnect the bottom */
9666  aout_buf = list->Aout_bottom;
9667  if (list->Aout_bottom->prev == NULL)
9668  {
9669  assert (false);
9670  }
9671  list->Aout_bottom = list->Aout_bottom->prev;
9672  list->Aout_bottom->next = NULL;
9673 
9674  /* also remove entry from hash table */
9675  hash_idx = AOUT_HASH_IDX (&aout_buf->vpid, list);
9676  mht_rem (list->aout_buf_ht[hash_idx], &aout_buf->vpid, NULL, NULL);
9677  }
9678  else
9679  {
9680  aout_buf = list->Aout_free;
9681  list->Aout_free = list->Aout_free->next;
9682  }
9683 
9684  aout_buf->next = NULL;
9685  aout_buf->prev = NULL;
9686  aout_buf->lru_idx = lru_idx;
9687  VPID_COPY (&aout_buf->vpid, vpid);
9688 
9689  /* add to hash */
9690  hash_idx = AOUT_HASH_IDX (&aout_buf->vpid, list);
9691  mht_put (list->aout_buf_ht[hash_idx], &aout_buf->vpid, aout_buf);
9692 
9693  if (list->Aout_top == NULL)
9694  {
9695  /* this is the only page in the Aout list */
9696  assert (list->Aout_bottom == NULL);
9697 
9698  aout_buf->next = NULL;
9699  aout_buf->prev = NULL;
9700 
9701  list->Aout_top = aout_buf;
9702  list->Aout_bottom = aout_buf;
9703  }
9704  else
9705  {
9706  aout_buf->next = list->Aout_top;
9707  list->Aout_top->prev = aout_buf;
9708  list->Aout_top = aout_buf;
9709  }
9710 
9711  pthread_mutex_unlock (&pgbuf_Pool.buf_AOUT_list.Aout_mutex);
9712 }
9713 
9714 /*
9715  * pgbuf_remove_vpid_from_aout_list () - Search for VPID in Aout and remove it from the queue
9716  * return : identifier of list from which was removed:
9717  * 0 and positive: LRU list
9718  * PGBUF_AOUT_NOT_FOUND: not found in Aout list
9719  * thread_p (in) :
9720  * vpid (in) :
9721  */
9722 static int
9724 {
9725 #if defined(SERVER_MODE)
9726  int rv;
9727 #endif /* SERVER_MODE */
9728  PGBUF_AOUT_BUF *aout_buf;
9729  int hash_idx;
9730  int aout_list_id = PGBUF_AOUT_NOT_FOUND;
9731 
9732  if (pgbuf_Pool.buf_AOUT_list.max_count <= 0)
9733  {
9734  /* Aout list not used */
9735  return PGBUF_AOUT_NOT_FOUND;
9736  }
9737 
9738  hash_idx = AOUT_HASH_IDX (vpid, (&pgbuf_Pool.buf_AOUT_list));
9739 
9740  rv = pthread_mutex_lock (&pgbuf_Pool.buf_AOUT_list.Aout_mutex);
9741  /* Search the vpid in the hash table */
9742  aout_buf = (PGBUF_AOUT_BUF *) mht_get (pgbuf_Pool.buf_AOUT_list.aout_buf_ht[hash_idx], vpid);
9743  if (aout_buf == NULL)
9744  {
9745  /* Not there, just return */
9746  pthread_mutex_unlock (&pgbuf_Pool.buf_AOUT_list.Aout_mutex);
9747  return PGBUF_AOUT_NOT_FOUND;
9748  }
9749 
9750  /* We can assume that aout_buf is what we're looking for if it still has the same VPID as before acquiring the mutex.
9751  * The reason for this is that nobody can change it while we're holding the mutex. Any changes must be visible before
9752  * we acquire this mutex */
9753  aout_list_id = aout_buf->lru_idx;
9754  if (aout_buf == pgbuf_Pool.buf_AOUT_list.Aout_bottom)
9755  {
9756  pgbuf_Pool.buf_AOUT_list.Aout_bottom = pgbuf_Pool.buf_AOUT_list.Aout_bottom->prev;
9757 
9758  if (pgbuf_Pool.buf_AOUT_list.Aout_bottom != NULL)
9759  {
9760  pgbuf_Pool.buf_AOUT_list.Aout_bottom->next = NULL;
9761  }
9762  aout_buf->prev = NULL;
9763  }
9764 
9765  if (aout_buf == pgbuf_Pool.buf_AOUT_list.Aout_top)
9766  {
9767  pgbuf_Pool.buf_AOUT_list.Aout_top = pgbuf_Pool.buf_AOUT_list.Aout_top->next;
9768 
9769  if (pgbuf_Pool.buf_AOUT_list.Aout_top != NULL)
9770  {
9771  pgbuf_Pool.buf_AOUT_list.Aout_top->prev = NULL;
9772  }
9773  aout_buf->next = NULL;
9774  }
9775 
9776  if (aout_buf->prev != NULL)
9777  {
9778  aout_buf->prev->next = aout_buf->next;
9779  }
9780  if (aout_buf->next != NULL)
9781  {
9782  aout_buf->next->prev = aout_buf->prev;
9783  }
9784 
9785  /* remove vpid from hash */
9786  mht_rem (pgbuf_Pool.buf_AOUT_list.aout_buf_ht[hash_idx], vpid, NULL, NULL);
9787 
9788  /* add to free list */
9789  VPID_SET_NULL (&aout_buf->vpid);
9790  aout_buf->lru_idx = PGBUF_AOUT_NOT_FOUND;
9791  aout_buf->next = NULL;
9792  aout_buf->prev = NULL;
9793 
9794  aout_buf->next = pgbuf_Pool.buf_AOUT_list.Aout_free;
9795  pgbuf_Pool.buf_AOUT_list.Aout_free = aout_buf;
9796 
9797  pthread_mutex_unlock (&pgbuf_Pool.buf_AOUT_list.Aout_mutex);
9798 
9799  return aout_list_id;
9800 }
9801 
9802 /*
9803  * pgbuf_remove_private_from_aout_list () - Search for VPID in Aout and removes all VPIDs having a specific LRU idx
9804  *
9805  * return : number of VPIDs removed
9806  * lru_idx (in) :
9807  */
9808 static int
9810 {
9811  PGBUF_AOUT_BUF *aout_buf;
9812  PGBUF_AOUT_BUF *aout_buf_next;
9813  int hash_idx;
9814  int cnt_removed = 0;
9815 
9816  if (pgbuf_Pool.buf_AOUT_list.max_count <= 0)
9817  {
9818  /* Aout list not used */
9819  return cnt_removed;
9820  }
9821 
9822  pthread_mutex_lock (&pgbuf_Pool.buf_AOUT_list.Aout_mutex);
9823  aout_buf = pgbuf_Pool.buf_AOUT_list.Aout_top;
9824  while (aout_buf != NULL)
9825  {
9826  if (aout_buf->lru_idx != lru_idx)
9827  {
9828  aout_buf = aout_buf->next;
9829  continue;
9830  }
9831 
9832  aout_buf_next = aout_buf->next;
9833 
9834  /* remove this item */
9835  if (aout_buf == pgbuf_Pool.buf_AOUT_list.Aout_bottom)
9836  {
9837  pgbuf_Pool.buf_AOUT_list.Aout_bottom = pgbuf_Pool.buf_AOUT_list.Aout_bottom->prev;
9838 
9839  if (pgbuf_Pool.buf_AOUT_list.Aout_bottom != NULL)
9840  {
9841  pgbuf_Pool.buf_AOUT_list.Aout_bottom->next = NULL;
9842  }
9843  aout_buf->prev = NULL;
9844  }
9845 
9846  if (aout_buf == pgbuf_Pool.buf_AOUT_list.Aout_top)
9847  {
9848  pgbuf_Pool.buf_AOUT_list.Aout_top = pgbuf_Pool.buf_AOUT_list.Aout_top->next;
9849 
9850  if (pgbuf_Pool.buf_AOUT_list.Aout_top != NULL)
9851  {
9852  pgbuf_Pool.buf_AOUT_list.Aout_top->prev = NULL;
9853  }
9854  aout_buf->next = NULL;
9855  }
9856 
9857  if (aout_buf->prev != NULL)
9858  {
9859  aout_buf->prev->next = aout_buf->next;
9860  }
9861  if (aout_buf->next != NULL)
9862  {
9863  aout_buf->next->prev = aout_buf->prev;
9864  }
9865 
9866  hash_idx = AOUT_HASH_IDX (&aout_buf->vpid, (&pgbuf_Pool.buf_AOUT_list));
9867  mht_rem (pgbuf_Pool.buf_AOUT_list.aout_buf_ht[hash_idx], &aout_buf->vpid, NULL, NULL);
9868 
9869  /* add to free list */
9870  VPID_SET_NULL (&aout_buf->vpid);
9871  aout_buf->lru_idx = PGBUF_AOUT_NOT_FOUND;
9872  aout_buf->next = NULL;
9873  aout_buf->prev = NULL;
9874 
9875  aout_buf->next = pgbuf_Pool.buf_AOUT_list.Aout_free;
9876  pgbuf_Pool.buf_AOUT_list.Aout_free = aout_buf;
9877 
9878  aout_buf = aout_buf_next;
9879  cnt_removed++;
9880  }
9881 
9882  pthread_mutex_unlock (&pgbuf_Pool.buf_AOUT_list.Aout_mutex);
9883 
9884  return cnt_removed;
9885 }
9886 
9887 /*
9888  * pgbuf_bcb_flush_with_wal () - write a buffer page to disk.
9889  *
9890  * return : error code
9891  * thread_p (in) : thread entry
9892  * bufptr (in) : bcb
9893  * is_page_flush_thread (in) : true if caller is page flush thread. false otherwise.
9894  * is_bcb_locked (out) : output whether bcb remains locked or not.
9895  */
9896 STATIC_INLINE int
9897 pgbuf_bcb_flush_with_wal (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, bool is_page_flush_thread, bool * is_bcb_locked)
9898 {
9899  char page_buf[IO_MAX_PAGE_SIZE + MAX_ALIGNMENT];
9900  FILEIO_PAGE *iopage = NULL;
9901  PAGE_PTR pgptr = NULL;
9902  LOG_LSA oldest_unflush_lsa;
9903  int error = NO_ERROR;
9904 #if defined(ENABLE_SYSTEMTAP)
9905  QUERY_ID query_id = NULL_QUERY_ID;
9906  bool monitored = false;
9907 #endif /* ENABLE_SYSTEMTAP */
9908  bool was_dirty = false, uses_dwb;
9909  DWB_SLOT *dwb_slot = NULL;
9910  LOG_LSA lsa;
9911  FILEIO_WRITE_MODE write_mode;
9912  bool is_temp = pgbuf_is_temporary_volume (bufptr->vpid.volid);
9913  TDE_ALGORITHM tde_algo = TDE_ALGORITHM_NONE;
9914  int tran_index = LOG_FIND_THREAD_TRAN_INDEX (thread_p);
9915  PGBUF_STATUS *show_status = &pgbuf_Pool.show_status[tran_index];
9916 
9917 
9918  PGBUF_BCB_CHECK_OWN (bufptr);
9919 
9920  /* the caller is holding bufptr->mutex */
9921  *is_bcb_locked = true;
9922 
9923  assert (bufptr->latch_mode == PGBUF_NO_LATCH || bufptr->latch_mode == PGBUF_LATCH_READ
9924  || bufptr->latch_mode == PGBUF_LATCH_WRITE);
9925 #if !defined (NDEBUG) && defined (SERVER_MODE)
9926  if (bufptr->latch_mode == PGBUF_LATCH_WRITE)
9927  {
9928  /* I must be the owner, or else we'll be in trouble. */
9929  int thread_index = thread_p->index;
9930  PGBUF_HOLDER_ANCHOR *thrd_holder_info = &pgbuf_Pool.thrd_holder_info[thread_index];
9931  PGBUF_HOLDER *holder = NULL;
9932 
9933  /* Search for bufptr in current thread holder list. */
9934  for (holder = thrd_holder_info->thrd_hold_list; holder != NULL; holder = holder->thrd_link)
9935  {
9936  if (holder->bufptr == bufptr)
9937  {
9938  break;
9939  }
9940  }
9941  /* Safe guard: I must be the bufptr holder. */
9942  assert (holder != NULL);
9943  }
9944 #endif /* !NDEBUG */
9945 
9946  /* how this works:
9947  *
9948  * caller should already have bcb locked. we don't do checks of opportunity or correctness here (that's up to the
9949  * caller).
9950  *
9951  * we copy the page and save oldest_unflush_lsa and then we try to write the page to disk. if writing fails, we
9952  * "revert" changes (restore dirty flag and oldest_unflush_lsa).
9953  *
9954  * if successful, we choose one of the paths:
9955  * 1. send the page to post-flush to process it and assign it directly (if this is page flush thread and victimization
9956  * system is stressed).
9957  * 2. lock bcb again, clear is flushing status, wake up of threads waiting for flush and return.
9958  */
9959 
9960  if (pgbuf_check_bcb_page_vpid (bufptr, false) != true)
9961  {
9962  assert (false);
9963  return ER_FAILED;
9964  }
9965 
9966  was_dirty = pgbuf_bcb_mark_is_flushing (thread_p, bufptr);
9967 
9968  uses_dwb = dwb_is_created () && !is_temp;
9969 
9970 start_copy_page:
9971  iopage = (FILEIO_PAGE *) PTR_ALIGN (page_buf, MAX_ALIGNMENT);
9972  CAST_BFPTR_TO_PGPTR (pgptr, bufptr);
9973  tde_algo = pgbuf_get_tde_algorithm (pgptr);
9974  if (tde_algo != TDE_ALGORITHM_NONE)
9975  {
9976  error = tde_encrypt_data_page (&bufptr->iopage_buffer->iopage, tde_algo, is_temp, iopage);
9977  if (error != NO_ERROR)
9978  {
9979  ASSERT_ERROR ();
9980  return error;
9981  }
9982  }
9983  else
9984  {
9985  memcpy ((void *) iopage, (void *) (&bufptr->iopage_buffer->iopage), IO_PAGESIZE);
9986  }
9987  if (uses_dwb)
9988  {
9989  error = dwb_set_data_on_next_slot (thread_p, iopage, false, &dwb_slot);
9990  if (error != NO_ERROR)
9991  {
9992  return error;
9993  }
9994  if (dwb_slot != NULL)
9995  {
9996  iopage = NULL;
9997  goto copy_unflushed_lsa;
9998  }
9999  }
10000 
10001 copy_unflushed_lsa:
10002  LSA_COPY (&lsa, &(bufptr->iopage_buffer->iopage.prv.lsa));
10003  LSA_COPY (&oldest_unflush_lsa, &bufptr->oldest_unflush_lsa);
10004  LSA_SET_NULL (&bufptr->oldest_unflush_lsa);
10005 
10006  PGBUF_BCB_UNLOCK (bufptr);
10007  *is_bcb_locked = false;
10008 
10009  if (!LSA_ISNULL (&oldest_unflush_lsa))
10010  {
10011  /* confirm WAL protocol */
10012  /* force log record to disk */
10013  logpb_flush_log_for_wal (thread_p, &lsa);
10014  }
10015  else
10016  {
10017  /* if page was changed, the change was not logged. this is a rare case, but can happen. */
10018  if (!pgbuf_is_temporary_volume (bufptr->vpid.volid))
10019  {
10020  er_log_debug (ARG_FILE_LINE, "flushing page %d|%d to disk without logging.\n", VPID_AS_ARGS (&bufptr->vpid));
10021  }
10022  }
10023 
10024 #if defined(ENABLE_SYSTEMTAP)
10025  query_id = qmgr_get_current_query_id (thread_p);
10026  if (query_id != NULL_QUERY_ID)
10027  {
10028  monitored = true;
10029  CUBRID_IO_WRITE_START (query_id);
10030  }
10031 #endif /* ENABLE_SYSTEMTAP */
10032 
10033  /* Activating/deactivating DWB while the server is alive, needs additional work. For now, we don't care about
10034  * this case, we can use it to test performance differences.
10035  */
10036  if (uses_dwb)
10037  {
10038  error = dwb_add_page (thread_p, iopage, &bufptr->vpid, &dwb_slot);
10039  if (error == NO_ERROR)
10040  {
10041  if (dwb_slot == NULL)
10042  {
10043  /* DWB disabled meanwhile, try again without DWB. */
10044  uses_dwb = false;
10045  PGBUF_BCB_LOCK (bufptr);
10046  *is_bcb_locked = true;
10047  goto start_copy_page;
10048  }
10049  }
10050  }
10051  else
10052  {
10053  show_status->num_pages_written++;
10054 
10055  /* Record number of writes in statistics */
10057 
10059  if (fileio_write (thread_p, fileio_get_volume_descriptor (bufptr->vpid.volid), iopage, bufptr->vpid.pageid,
10060  IO_PAGESIZE, write_mode) == NULL)
10061  {
10062  error = ER_FAILED;
10063  }
10064  }
10065 
10066 #if defined(ENABLE_SYSTEMTAP)
10067  if (monitored == true)
10068  {
10069  CUBRID_IO_WRITE_END (query_id, IO_PAGESIZE, (error != NO_ERROR));
10070  }
10071 #endif /* ENABLE_SYSTEMTAP */
10072 
10073  if (error != NO_ERROR)
10074  {
10075  PGBUF_BCB_LOCK (bufptr);
10076  *is_bcb_locked = true;
10077  pgbuf_bcb_mark_was_not_flushed (thread_p, bufptr, was_dirty);
10078  LSA_COPY (&bufptr->oldest_unflush_lsa, &oldest_unflush_lsa);
10079 
10080 #if defined (SERVER_MODE)
10081  if (bufptr->next_wait_thrd != NULL)
10082  {
10083  pgbuf_wake_flush_waiters (thread_p, bufptr);
10084  }
10085 #endif
10086 
10087  return ER_FAILED;
10088  }
10089 
10090  assert (bufptr->latch_mode != PGBUF_LATCH_FLUSH);
10091 
10092 #if defined (SERVER_MODE)
10093  /* if the flush thread is under pressure, we'll move some of the workload to post-flush thread. */
10094  if (is_page_flush_thread && (pgbuf_Page_post_flush_daemon != NULL)
10095  && pgbuf_is_any_thread_waiting_for_direct_victim () && pgbuf_Pool.flushed_bcbs->produce (bufptr))
10096  {
10097  /* page buffer maintenance thread will try to assign this bcb directly as victim. */
10098  pgbuf_Page_post_flush_daemon->wakeup ();
10100  {
10102  }
10103  }
10104  else
10105 #endif /* SERVER_MODE */
10106  {
10107  PGBUF_BCB_LOCK (bufptr);
10108  *is_bcb_locked = true;
10109  pgbuf_bcb_mark_was_flushed (thread_p, bufptr);
10110 
10111 #if defined (SERVER_MODE)
10112  if (bufptr->next_wait_thrd != NULL)
10113  {
10114  pgbuf_wake_flush_waiters (thread_p, bufptr);
10115  }
10116 #endif
10117  }
10118 
10120  {
10122  }
10123 
10124  return NO_ERROR;
10125 }
10126 
10127 /*
10128  * pgbuf_wake_flush_waiters () - wake up all threads waiting for flush
10129  *
10130  * return : void
10131  * thread_p (in) : thread entry
10132  * bcb (in) : flushed bcb
10133  */
10134 static void
10136 {
10137 #if defined (SERVER_MODE)
10138  THREAD_ENTRY *prev_waiter = NULL;
10139  THREAD_ENTRY *crt_waiter = NULL;
10140  THREAD_ENTRY *save_next_waiter = NULL;
10141  PERF_UTIME_TRACKER timetr;
10142 
10143  PERF_UTIME_TRACKER_START (thread_p, &timetr);
10144 
10145  PGBUF_BCB_CHECK_OWN (bcb);
10146 
10147  for (crt_waiter = bcb->next_wait_thrd; crt_waiter != NULL; crt_waiter = save_next_waiter)
10148  {
10149  save_next_waiter = crt_waiter->next_wait_thrd;
10150 
10151  if (crt_waiter->request_latch_mode == PGBUF_LATCH_FLUSH)
10152  {
10153  /* wakeup and remove from list */
10154  if (prev_waiter != NULL)
10155  {
10156  prev_waiter->next_wait_thrd = save_next_waiter;
10157  }
10158  else
10159  {
10160  bcb->next_wait_thrd = save_next_waiter;
10161  }
10162 
10163  crt_waiter->next_wait_thrd = NULL;
10164  pgbuf_wakeup_uncond (crt_waiter);
10165  }
10166  else
10167  {
10168  prev_waiter = crt_waiter;
10169  }
10170  }
10171 
10172  PERF_UTIME_TRACKER_TIME (thread_p, &timetr, PSTAT_PB_WAKE_FLUSH_WAITER);
10173 #endif /* SERVER_MODE */
10174 }
10175 
10176 /*
10177  * pgbuf_is_exist_blocked_reader_writer () - checks whether there exists any blocked reader/writer
10178  * return: if found, true, otherwise, false
10179  * bufptr(in): pointer to buffer page
10180  */
10181 STATIC_INLINE bool
10183 {
10184 #if defined(SERVER_MODE)
10185  THREAD_ENTRY *thrd_entry;
10186 
10187  /* check whether there exists any blocked reader/writer */
10188  thrd_entry = bufptr->next_wait_thrd;
10189  while (thrd_entry != NULL)
10190  {
10191  if (thrd_entry->request_latch_mode == PGBUF_LATCH_READ || thrd_entry->request_latch_mode == PGBUF_LATCH_WRITE)
10192  {
10193  return true;
10194  }
10195 
10196  thrd_entry = thrd_entry->next_wait_thrd;
10197  }
10198 #endif /* SERVER_MODE */
10199 
10200  return false;
10201 }
10202 
10203 /*
10204  * pgbuf_get_check_page_validation_level -
10205  * return:
10206  *
10207  */
10208 STATIC_INLINE bool
10209 pgbuf_get_check_page_validation_level (int page_validation_level)
10210 {
10211 #if !defined(NDEBUG)
10212  return prm_get_integer_value (PRM_ID_PB_DEBUG_PAGE_VALIDATION_LEVEL) >= page_validation_level;
10213 #else /* NDEBUG */
10214  return false;
10215 #endif /* NDEBUG */
10216 }
10217 
10218 /*
10219  * pgbuf_is_valid_page () - Verify if given page is a valid one
10220  * return: either: DISK_INVALID, DISK_VALID, DISK_ERROR
10221  * vpid(in): Complete Page identifier
10222  * fun(in): A second function to call to verify if the above page is valid
10223  * The function is called on vpid, and arguments
10224  * args(in): Additional argument for fun
10225  *
10226  * Note: Verify that the given page is valid according to functions:
10227  * 1) disk_isvalid_page
10228  * 2) given fun2 is any
10229  * The function is a NOOP if we are not running with full debugging
10230  * capabilities.
10231  */
10233 pgbuf_is_valid_page (THREAD_ENTRY * thread_p, const VPID * vpid, bool no_error,
10234  DISK_ISVALID (*fun) (const VPID * vpid, void *args), void *args)
10235 {
10236  DISK_ISVALID valid;
10237 
10238  /* TODO: fix me */
10239 
10240  if (fileio_get_volume_label (vpid->volid, PEEK) == NULL || VPID_ISNULL (vpid))
10241  {
10242  assert (no_error);
10243 
10244  return DISK_INVALID;
10245  }
10246 
10247  /*valid = disk_isvalid_page (thread_p, vpid->volid, vpid->pageid); */
10248  valid = disk_is_page_sector_reserved_with_debug_crash (thread_p, vpid->volid, vpid->pageid, !no_error);
10249  if (valid != DISK_VALID || (fun != NULL && (valid = (*fun) (vpid, args)) != DISK_VALID))
10250  {
10251  if (valid != DISK_ERROR && !no_error)
10252  {
10255 
10256  assert (false);
10257  }
10258  }
10259 
10260  return valid;
10261 }
10262 
10263 /*
10264  * pgbuf_is_valid_page_ptr () - Validate an in-memory page pointer
10265  * return: true/false
10266  * pgptr(in): Pointer to page
10267  *
10268  * Note: Verify if the given page pointer points to the beginning of a
10269  * in-memory page pointer. This function is used for debugging purposes.
10270  */
10271 static bool
10273 {
10274  PGBUF_BCB *bufptr;
10275  int bufid;
10276 
10277  assert (pgptr != NULL);
10278 
10279  /* NOTE: Does not need to hold mutex since the page is fixed */
10280  for (bufid = 0; bufid < pgbuf_Pool.num_buffers; bufid++)
10281  {
10282  bufptr = PGBUF_FIND_BCB_PTR (bufid);
10283  PGBUF_BCB_LOCK (bufptr);
10284 
10285  if (((PAGE_PTR) (&(bufptr->iopage_buffer->iopage.page[0]))) == pgptr)
10286  {
10287  if (bufptr->fcnt <= 0)
10288  {
10289  /* This situation must not be occurred. */
10290  assert (false);
10292  fileio_get_volume_label (bufptr->vpid.volid, PEEK));
10293  PGBUF_BCB_UNLOCK (bufptr);
10294 
10295  return false;
10296  }
10297  else
10298  {
10299  PGBUF_BCB_UNLOCK (bufptr);
10300 
10301  return true;
10302  }
10303  }
10304  else
10305  {
10306  PGBUF_BCB_UNLOCK (bufptr);
10307  }
10308  }
10309 
10311 
10312  assert (false);
10313 
10314  return false;
10315 }
10316 
10317 /*
10318  * pgbuf_check_page_type () - Check the page type is as expected. If it isn't an assert will be hit.
10319  *
10320  * return : True if the page type is as expected.
10321  * thread_p (in) : Thread entry.
10322  * pgptr (in) : Pointer to buffer page.
10323  * ptype (in) : Expected page type.
10324  */
10325 bool
10327 {
10328  return pgbuf_check_page_ptype_internal (pgptr, ptype, false);
10329 }
10330 
10331 /*
10332  * pgbuf_check_page_type_no_error () - Return if the page type is the expected type given as argument. No assert is
10333  * hit if not.
10334  *
10335  * return : True if the page type is as expected.
10336  * thread_p (in) : Thread entry.
10337  * pgptr (in) : Pointer to buffer page.
10338  * ptype (in) : Expected page type.
10339  */
10340 bool
10342 {
10343  return pgbuf_check_page_ptype_internal (pgptr, ptype, true);
10344 }
10345 
10346 /*
10347  * pgbuf_check_page_ptype_internal () -
10348  * return: true/false
10349  * bufptr(in): pointer to buffer page
10350  * ptype(in): page type
10351  *
10352  * Note: Verify if the given page's ptype is valid.
10353  * This function is used for debugging purposes.
10354  */
10355 STATIC_INLINE bool
10357 {
10358  PGBUF_BCB *bufptr;
10359 
10360  if (pgptr == NULL)
10361  {
10362  assert (false);
10363  return false;
10364  }
10365 
10366 #if 1 /* TODO - do not delete me */
10367 #if defined(NDEBUG)
10368  if (log_is_in_crash_recovery ())
10369  {
10370  return true;
10371  }
10372 #endif
10373 #endif
10374 
10376  {
10377  if (pgbuf_is_valid_page_ptr (pgptr) == false)
10378  {
10379  return false;
10380  }
10381  }
10382 
10383  /* NOTE: Does not need to hold mutex since the page is fixed */
10384 
10385  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
10386  assert (!VPID_ISNULL (&bufptr->vpid));
10387 
10388  if (pgbuf_check_bcb_page_vpid (bufptr, false) == true)
10389  {
10390  if (bufptr->iopage_buffer->iopage.prv.ptype != PAGE_UNKNOWN && bufptr->iopage_buffer->iopage.prv.ptype != ptype)
10391  {
10392  assert_release (no_error);
10393  return false;
10394  }
10395  }
10396  else
10397  {
10398  assert_release (false);
10399  return false;
10400  }
10401 
10402  return true;
10403 }
10404 
10405 /*
10406  * pgbuf_check_bcb_page_vpid () - Validate an FILEIO_PAGE prv
10407  * return: true/false
10408  * bufptr(in): pointer to buffer page
10409  * maybe_deallocated(in) : true, if page may be deallocated
10410  *
10411  * Note: Verify if the given page's prv is valid.
10412  * This function is used for debugging purposes.
10413  */
10414 STATIC_INLINE bool
10415 pgbuf_check_bcb_page_vpid (PGBUF_BCB * bufptr, bool maybe_deallocated)
10416 {
10417  if (bufptr == NULL || VPID_ISNULL (&bufptr->vpid))
10418  {
10419  assert (bufptr != NULL);
10420  assert (!VPID_ISNULL (&bufptr->vpid));
10421  return false;
10422  }
10423 
10424  /* perm volume */
10425  if (bufptr->vpid.volid > NULL_VOLID)
10426  {
10427  /* Check Page identifier */
10429  || (bufptr->vpid.pageid == bufptr->iopage_buffer->iopage.prv.pageid
10430  && bufptr->vpid.volid == bufptr->iopage_buffer->iopage.prv.volid));
10431 
10432  assert (bufptr->iopage_buffer->iopage.prv.p_reserve_1 == 0);
10433  assert (bufptr->iopage_buffer->iopage.prv.p_reserve_2 == 0);
10434 
10435  return (bufptr->vpid.pageid == bufptr->iopage_buffer->iopage.prv.pageid
10436  && bufptr->vpid.volid == bufptr->iopage_buffer->iopage.prv.volid);
10437  }
10438  else
10439  {
10440  return true; /* nop */
10441  }
10442 }
10443 
10444 #if defined(CUBRID_DEBUG)
10445 /*
10446  * pgbuf_scramble () - Scramble the content of the buffer
10447  * return: void
10448  * iopage(in): Pointer to page portion
10449  *
10450  * Note: This is done for debugging reasons to make sure that a user of a
10451  * buffer does not assume that buffers are initialized to zero. For safty
10452  * reasons, the buffers are initialized to zero, instead of scrambled,
10453  * when running in production mode.
10454  */
10455 static void
10456 pgbuf_scramble (FILEIO_PAGE * iopage)
10457 {
10458  MEM_REGION_INIT (iopage, IO_PAGESIZE);
10460 
10461  /* Init Page identifier */
10462  iopage->prv.pageid = -1;
10463  iopage->prv.volid = -1;
10464 
10465  iopage->prv.ptype = '\0';
10466  iopage->prv.pflag = '\0';
10467  iopage->prv.p_reserve_1 = 0;
10468  iopage->prv.p_reserve_2 = 0;
10469  iopage->prv.tde_nonce = 0;
10470 }
10471 
10472 /*
10473  * pgbuf_dump_if_any_fixed () - Dump buffer pool if any page buffer is fixed
10474  * return: void
10475  *
10476  * Note: This is a debugging function that can be used to verify if buffers
10477  * were freed after a set of operations (e.g., a request or a API
10478  * function).
10479  * This function will not give you good results when there are multiple
10480  * users in the system (multiprocessing)
10481  */
10482 void
10483 pgbuf_dump_if_any_fixed (void)
10484 {
10485  PGBUF_BCB *bufptr;
10486  int bufid;
10487  int consistent = PGBUF_CONTENT_GOOD;
10488 #if defined(SERVER_MODE)
10489  int rv;
10490 #endif /* SERVER_MODE */
10491 
10492  /* Make sure that each buffer is unfixed and consistent */
10493  for (bufid = 0; bufid < pgbuf_Pool.num_buffers; bufid++)
10494  {
10495  bufptr = PGBUF_FIND_BCB_PTR (bufid);
10496  PGBUF_BCB_LOCK (bufptr);
10497 
10498  if (bufptr->latch_mode != PGBUF_LATCH_INVALID && bufptr->fcnt > 0)
10499  {
10500  /* The buffer is not unfixed */
10501  PGBUF_BCB_UNLOCK (bufptr);
10502  pgbuf_dump ();
10503  return;
10504  }
10505 
10506  consistent = pgbuf_is_consistent (bufptr, 0);
10507  PGBUF_BCB_UNLOCK (bufptr);
10508 
10509  if (consistent == PGBUF_CONTENT_BAD)
10510  {
10511  break;
10512  }
10513  }
10514 
10515  if (consistent != PGBUF_CONTENT_GOOD)
10516  {
10517  pgbuf_dump ();
10518  }
10519 }
10520 
10521 /*
10522  * pgbuf_dump () - Dump the system area of each buffer
10523  * return: void
10524  *
10525  * Note: This function is used for debugging purposes
10526  */
10527 static void
10528 pgbuf_dump (void)
10529 {
10530  PGBUF_BCB *bufptr;
10531  int bufid, i;
10532  int consistent;
10533  int nfetched = 0;
10534  int ndirty = 0;
10535  const char *latch_mode_str, *zone_str, *consistent_str;
10536 #if defined(SERVER_MODE)
10537  int rv;
10538 #endif /* SERVER_MODE */
10539 
10540  (void) fflush (stderr);
10541  (void) fflush (stdout);
10542  (void) fprintf (stdout, "\n\n");
10543  (void) fprintf (stdout, "Num buffers = %d\n", pgbuf_Pool.num_buffers);
10544 
10545  /* Dump info cached about perm and tmp volume identifiers */
10546  rv = pthread_mutex_lock (&pgbuf_Pool.volinfo_mutex);
10547  (void) fprintf (stdout, "Lastperm volid = %d, Num permvols of tmparea = %d\n", pgbuf_Pool.last_perm_volid,
10548  pgbuf_Pool.num_permvols_tmparea);
10549 
10550  if (pgbuf_Pool.permvols_tmparea_volids != NULL)
10551  {
10552  (void) fprintf (stdout, "Permanent volumes with tmp area: ");
10553  for (i = 0; i < pgbuf_Pool.num_permvols_tmparea; i++)
10554  {
10555  if (i != 0)
10556  {
10557  (void) fprintf (stdout, ", ");
10558  }
10559  (void) fprintf (stdout, "%d", pgbuf_Pool.permvols_tmparea_volids[i]);
10560  }
10561  (void) fprintf (stdout, "\n");
10562  }
10563  pthread_mutex_unlock (&pgbuf_Pool.volinfo_mutex);
10564 
10565  /* Now, dump all buffer pages */
10566  (void) fprintf (stdout,
10567  " Buf Volid Pageid Fcnt LatchMode D A F Zone Lsa consistent Bufaddr Usrarea\n");
10568 
10569  for (bufid = 0; bufid < pgbuf_Pool.num_buffers; bufid++)
10570  {
10571  bufptr = PGBUF_FIND_BCB_PTR (bufid);
10572  PGBUF_BCB_LOCK (bufptr);
10573 
10574  if (bufptr->fcnt > 0)
10575  {
10576  nfetched++;
10577  }
10578 
10579  if (pgbuf_bcb_is_dirty (bufptr))
10580  {
10581  ndirty++;
10582  }
10583 
10584  /* check if the content of current buffer page is consistent. */
10585  consistent = pgbuf_is_consistent (bufptr, 0);
10586  if (!pgbuf_bcb_is_dirty (bufptr) && bufptr->fcnt == 0 && consistent != PGBUF_CONTENT_BAD)
10587  {
10588  PGBUF_BCB_UNLOCK (bufptr);
10589  continue;
10590  }
10591  else
10592  {
10593  latch_mode_str = pgbuf_latch_mode_str (bufptr->latch_mode);
10594  zone_str = pgbuf_latch_mode_str (bufptr->zone);
10595  consistenet_str = pgbuf_consistent_str (consistent);
10596 
10597  fprintf (stdout, "%4d %5d %6d %4d %9s %1d %1d %1d %11s %lld|%4d %10s %p %p-%p\n",
10598  pgbuf_bcb_get_pool_index (bufptr), VPID_AS_ARGS (&bufptr->vpid), bufptr->fcnt, latch_mode_str,
10599  pgbuf_bcb_is_dirty (bufptr), (int) pgbuf_bcb_is_flushing (bufptr),
10600  (int) pgbuf_bcb_is_async_flush_request (bufptr), zone_str,
10601  LSA_AS_ARGS (&bufptr->iopage_buffer->iopage.prv.lsa), consistent_str, (void *) bufptr,
10602  (void *) (&bufptr->iopage_buffer->iopage.page[0]),
10603  (void *) (&bufptr->iopage_buffer->iopage.page[DB_PAGESIZE - 1]));
10604  }
10605  PGBUF_BCB_UNLOCK (bufptr);
10606  }
10607 
10608  (void) fprintf (stdout, "Number of fetched buffers = %d\nNumber of dirty buffers = %d\n", nfetched, ndirty);
10609 }
10610 
10611 /*
10612  * pgbuf_is_consistent () - Check if a page is consistent
10613  * return:
10614  * bufptr(in): Pointer to buffer
10615  * likely_bad_after_fixcnt(in): Don't tell me that he page is bad if
10616  * fixcnt is greater that this
10617  *
10618  * Note: Consistency rule:
10619  * If memory page is dirty, the content of page should be different to
10620  * the content of the page on disk, otherwise, page is considered
10621  * inconsistent. That is, someone set a page dirty without updating
10622  * the page. This rule may fail since a page can be updated with the
10623  * same content that the page on disk, however, this is a remote case.
10624  *
10625  * If memory page is not dirty, the content of page should be identical
10626  * to the content of the page on disk, otherwise, page is considered
10627  * inconsistent. This is the case that someone updates the page without
10628  * setting it dirty.
10629  */
10630 static int
10631 pgbuf_is_consistent (const PGBUF_BCB * bufptr, int likely_bad_after_fixcnt)
10632 {
10633  int consistent = PGBUF_CONTENT_GOOD;
10634  FILEIO_PAGE *malloc_io_pgptr;
10635  bool is_page_corrupted;
10636 
10637  /* the caller is holding bufptr->mutex */
10638  if (memcmp (PGBUF_FIND_BUFFER_GUARD (bufptr), pgbuf_Guard, sizeof (pgbuf_Guard)) != 0)
10639  {
10640  er_log_debug (ARG_FILE_LINE, "SYSTEM ERROR buffer of pageid = %d|%d has been OVER RUN", bufptr->vpid.volid,
10641  bufptr->vpid.pageid);
10642  return PGBUF_CONTENT_BAD;
10643  }
10644 
10645  if (!VPID_ISNULL (&bufptr->vpid))
10646  {
10647  malloc_io_pgptr = (FILEIO_PAGE *) malloc (IO_PAGESIZE);
10648  if (malloc_io_pgptr == NULL)
10649  {
10650  return consistent;
10651  }
10652 
10653  /* Read the disk page into local page area */
10654  if (fileio_read (NULL, fileio_get_volume_descriptor (bufptr->vpid.volid), malloc_io_pgptr, bufptr->vpid.pageid,
10655  IO_PAGESIZE) == NULL)
10656  {
10657  /* Unable to verify consistency of this page */
10658  consistent = PGBUF_CONTENT_BAD;
10659  }
10660  else
10661  {
10662  /* If page is dirty, it should be different from the one on disk */
10663  if (!LSA_EQ (&malloc_io_pgptr->prv.lsa, &bufptr->iopage_buffer->iopage.prv.lsa)
10664  || memcmp (malloc_io_pgptr->page, bufptr->iopage_buffer->iopage.page, DB_PAGESIZE) != 0)
10665  {
10666  consistent = (pgbuf_bcb_is_dirty (bufptr) ? PGBUF_CONTENT_GOOD : PGBUF_CONTENT_BAD);
10667 
10668  /* If fix count is greater than likely_bad_after_fixcnt, the function cannot state that the page is bad */
10669  if (consistent == PGBUF_CONTENT_BAD && bufptr->fcnt > likely_bad_after_fixcnt)
10670  {
10671  consistent = PGBUF_CONTENT_LIKELY_BAD;
10672  }
10673  }
10674  else
10675  {
10677  }
10678  }
10679 
10680  if (consistent != PGBUF_CONTENT_GOOD)
10681  {
10683  &is_page_corrupted) != NO_ERROR || is_page_corrupted)
10684  {
10685  consistent = PGBUF_CONTENT_BAD;
10686  }
10687  }
10688 
10689  free_and_init (malloc_io_pgptr);
10690  }
10691  else
10692  {
10694  {
10695  int i;
10696  /* The page should be scrambled, otherwise some one step on it */
10697  for (i = 0; i < DB_PAGESIZE; i++)
10698  {
10700  {
10701  /* The page has been stepped by someone */
10702  consistent = PGBUF_CONTENT_BAD;
10703  break;
10704  }
10705  }
10706  }
10707  }
10708 
10709  /* The I/O executed for pgbuf_is_consistent is not recorded... */
10710  return consistent;
10711 }
10712 #endif /* CUBRID_DEBUG */
10713 
10714 #if !defined(NDEBUG)
10715 static void
10716 pgbuf_add_fixed_at (PGBUF_HOLDER * holder, const char *caller_file, int caller_line, bool reset)
10717 {
10718  char buf[256];
10719  const char *p;
10720 
10721  p = caller_file + strlen (caller_file);
10722  while (p)
10723  {
10724  if (p == caller_file)
10725  {
10726  break;
10727  }
10728 
10729  if (*p == '/' || *p == '\\')
10730  {
10731  p++;
10732  break;
10733  }
10734 
10735  p--;
10736  }
10737 
10738  if (reset)
10739  {
10740  sprintf (holder->fixed_at, "%s:%d ", p, caller_line);
10741  holder->fixed_at_size = (int) strlen (holder->fixed_at);
10742  }
10743  else
10744  {
10745  sprintf (buf, "%s:%d ", p, caller_line);
10746  if (strstr (holder->fixed_at, buf) == NULL)
10747  {
10748  strcat (holder->fixed_at, buf);
10749  holder->fixed_at_size += (int) strlen (buf);
10750  assert (holder->fixed_at_size < (64 * 1024));
10751  }
10752  }
10753 
10754  return;
10755 }
10756 #endif /* NDEBUG */
10757 
10758 #if defined(SERVER_MODE)
10759 static void
10760 pgbuf_sleep (THREAD_ENTRY * thread_p, pthread_mutex_t * mutex_p)
10761 {
10762  thread_lock_entry (thread_p);
10763  pthread_mutex_unlock (mutex_p);
10764 
10766 }
10767 
10768 STATIC_INLINE int
10769 pgbuf_wakeup (THREAD_ENTRY * thread_p)
10770 {
10771  int r = NO_ERROR;
10772 
10773  if (thread_p->request_latch_mode != PGBUF_NO_LATCH)
10774  {
10775  thread_p->resume_status = THREAD_PGBUF_RESUMED;
10776 
10777  r = pthread_cond_signal (&thread_p->wakeup_cond);
10778  if (r != 0)
10779  {
10781  thread_unlock_entry (thread_p);
10783  }
10784  }
10785  else
10786  {
10787  er_log_debug (ARG_FILE_LINE, "thread_entry (%d, %ld) already timedout\n", thread_p->tran_index,
10788  thread_p->get_posix_id ());
10789  }
10790 
10791  thread_unlock_entry (thread_p);
10792 
10793  return r;
10794 }
10795 
10796 STATIC_INLINE int
10797 pgbuf_wakeup_uncond (THREAD_ENTRY * thread_p)
10798 {
10799  int r;
10800 
10801  thread_lock_entry (thread_p);
10802  thread_p->resume_status = THREAD_PGBUF_RESUMED;
10803 
10804  r = pthread_cond_signal (&thread_p->wakeup_cond);
10805  if (r != 0)
10806  {
10808  thread_unlock_entry (thread_p);
10810  }
10811 
10812  thread_unlock_entry (thread_p);
10813 
10814  return r;
10815 }
10816 #endif /* SERVER_MODE */
10817 
10818 STATIC_INLINE void
10820 {
10821  PGBUF_HOLDER *holder;
10822 
10823  assert (bufptr != NULL);
10824 
10825  pgbuf_bcb_set_dirty (thread_p, bufptr);
10826 
10827  holder = pgbuf_find_thrd_holder (thread_p, bufptr);
10828  assert (bufptr->latch_mode == PGBUF_LATCH_WRITE);
10829  assert (holder != NULL);
10830  if (holder != NULL && holder->perf_stat.dirtied_by_holder == 0)
10831  {
10832  holder->perf_stat.dirtied_by_holder = 1;
10833  }
10834 
10835  /* Record number of dirties in statistics */
10837 }
10838 
10839 /*
10840  * pgbuf_wakeup_page_flush_daemon () - Wakeup the flushing daemon thread to flush some
10841  * of the dirty pages in buffer pool to disk
10842  * return : void
10843  * thread_p (in) :
10844  */
10845 static void
10847 {
10848 #if defined (SERVER_MODE)
10850  {
10851  pgbuf_Page_flush_daemon->wakeup ();
10852  return;
10853  }
10854 #endif
10855 
10856  PERF_UTIME_TRACKER dummy_time_tracker;
10857  bool stop = false;
10858 
10859  /* single-threaded environment. do flush on our own. */
10860  dummy_time_tracker.is_perf_tracking = false;
10862  &stop);
10863  assert (!stop);
10864 }
10865 
10866 /*
10867  * pgbuf_has_perm_pages_fixed () -
10868  *
10869  * return : The number of pages fixed by the thread.
10870  * thread_p (in) : Thread entry.
10871  *
10872  */
10873 bool
10875 {
10876  int thrd_idx = thread_get_entry_index (thread_p);
10877  PGBUF_HOLDER *holder = NULL;
10878 
10879  if (pgbuf_Pool.thrd_holder_info[thrd_idx].num_hold_cnt == 0)
10880  {
10881  return false;
10882  }
10883 
10884  for (holder = pgbuf_Pool.thrd_holder_info[thrd_idx].thrd_hold_list; holder != NULL; holder = holder->thrd_link)
10885  {
10886  if (holder->bufptr->iopage_buffer->iopage.prv.ptype != PAGE_QRESULT)
10887  {
10888  return true;
10889  }
10890  }
10891  return false;
10892 }
10893 
10894 #if defined (SERVER_MODE)
10895 /*
10896  * pgbuf_is_thread_high_priority () -
10897  *
10898  * return : true if the threads has any fixed pages and the other is waiting on any of them or
10899  * it has an important hot page such as volume header, file header, index root and heap header.
10900  * thread_p (in) : Thread entry.
10901  */
10902 static bool
10903 pgbuf_is_thread_high_priority (THREAD_ENTRY * thread_p)
10904 {
10905  int thrd_idx = thread_get_entry_index (thread_p);
10906  PGBUF_HOLDER *holder = NULL;
10907 
10908  if (pgbuf_Pool.thrd_holder_info[thrd_idx].num_hold_cnt == 0)
10909  {
10910  /* not owns any page */
10911  return false;
10912  }
10913 
10914  for (holder = pgbuf_Pool.thrd_holder_info[thrd_idx].thrd_hold_list; holder != NULL; holder = holder->thrd_link)
10915  {
10916  if (holder->bufptr->next_wait_thrd != NULL)
10917  {
10918  /* someone is waiting for the thread */
10919  return true;
10920  }
10921 
10923  {
10924  /* has volume header */
10925  return true;
10926  }
10927  if (holder->bufptr->iopage_buffer->iopage.prv.ptype == PAGE_FTAB)
10928  {
10929  /* holds a file header page */
10930  return true;
10931  }
10932  if (holder->bufptr->iopage_buffer->iopage.prv.ptype == PAGE_BTREE
10935  {
10936  /* holds b-tree root */
10937  return true;
10938  }
10939  if (holder->bufptr->iopage_buffer->iopage.prv.ptype == PAGE_HEAP
10940  && heap_is_page_header (thread_p, holder->bufptr->iopage_buffer->iopage.page))
10941  {
10942  /* heap file header */
10943  return true;
10944  }
10945  }
10946 
10947  return false;
10948 }
10949 #endif /* SERVER_MODE */
10950 
10951 enum
10952 {
10954 
10957 
10960 
10964 };
10965 
10966 /*
10967  * pgbuf_flush_page_and_neighbors_fb () - Flush page pointed to by the supplied BCB and also flush neighbor pages
10968  *
10969  * return : error code or NO_ERROR
10970  * thread_p (in) : thread entry
10971  * bufptr (in) : BCB to flush
10972  * flushed_pages(out): actual number of flushed pages
10973  *
10974  * todo: too big to be inlined. maybe we can optimize it.
10975  */
10976 static int
10977 pgbuf_flush_page_and_neighbors_fb (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, int *flushed_pages)
10978 {
10979 #define PGBUF_PAGES_COUNT_THRESHOLD 4
10980  int error = NO_ERROR, i;
10981  int save_first_error = NO_ERROR;
10982  LOG_LSA log_newest_oldest_unflush_lsa;
10983  VPID first_vpid, vpid;
10984  PGBUF_BUFFER_HASH *hash_anchor;
10986  bool prev_page_dirty = true;
10987  int dirty_pages_cnt = 0;
10988  int pos;
10989  bool forward;
10990  bool search_nondirty;
10991  int written_pages;
10992  int abort_reason;
10993  bool was_page_flushed = false;
10994 #if defined(ENABLE_SYSTEMTAP)
10995  QUERY_ID query_id = -1;
10996  bool monitored = false;
10997 #endif /* ENABLE_SYSTEMTAP */
10998 
10999 #if defined(ENABLE_SYSTEMTAP)
11000  query_id = qmgr_get_current_query_id (thread_p);
11001  if (query_id != NULL_QUERY_ID)
11002  {
11003  monitored = true;
11004  CUBRID_IO_WRITE_START (query_id);
11005  }
11006 #endif /* ENABLE_SYSTEMTAP */
11007 
11008  /* init */
11009  helper->npages = 0;
11010  helper->fwd_offset = 0;
11011  helper->back_offset = 0;
11012 
11013  /* add bufptr as middle page */
11014  pgbuf_add_bufptr_to_batch (bufptr, 0);
11015  VPID_COPY (&first_vpid, &bufptr->vpid);
11016  LSA_COPY (&log_newest_oldest_unflush_lsa, &bufptr->oldest_unflush_lsa);
11017  PGBUF_BCB_UNLOCK (bufptr);
11018 
11019  VPID_COPY (&vpid, &first_vpid);
11020 
11021  /* Now search around bufptr->vpid for neighbors. */
11022  forward = true;
11023  search_nondirty = false;
11024  abort_reason = 0;
11025  for (i = 1; i < PGBUF_NEIGHBOR_PAGES;)
11026  {
11027  if (forward == true)
11028  {
11029  if (first_vpid.pageid <= PAGEID_MAX - (helper->fwd_offset + 1))
11030  {
11031  vpid.pageid = first_vpid.pageid + helper->fwd_offset + 1;
11032  }
11033  else
11034  {
11035  abort_reason = NEIGHBOR_ABORT_RANGE;
11036  break;
11037  }
11038  }
11039  else
11040  {
11041  if (first_vpid.pageid >= helper->back_offset + 1)
11042  {
11043  vpid.pageid = first_vpid.pageid - helper->back_offset - 1;
11044  }
11045  else if (PGBUF_NEIGHBOR_FLUSH_NONDIRTY == false || search_nondirty == true)
11046  {
11047  abort_reason = NEIGHBOR_ABORT_RANGE;
11048  break;
11049  }
11050  else
11051  {
11052  search_nondirty = true;
11053  forward = true;
11054  continue;
11055  }
11056  }
11057 
11058  hash_anchor = &pgbuf_Pool.buf_hash_table[PGBUF_HASH_VALUE (&vpid)];
11059 
11060  bufptr = pgbuf_search_hash_chain (thread_p, hash_anchor, &vpid);
11061  if (bufptr == NULL)
11062  {
11063  /* Page not found: change direction or abandon batch */
11064  pthread_mutex_unlock (&hash_anchor->hash_mutex);
11065  if (search_nondirty == true)
11066  {
11067  if (forward == false)
11068  {
11070  break;
11071  }
11072  else
11073  {
11074  forward = false;
11075  continue;
11076  }
11077  }
11078  else
11079  {
11080  if (forward == true)
11081  {
11082  forward = false;
11083  continue;
11084  }
11085  else if (PGBUF_NEIGHBOR_FLUSH_NONDIRTY == true)
11086  {
11087  search_nondirty = true;
11088  forward = true;
11089  continue;
11090  }
11091  else
11092  {
11093  abort_reason = NEIGHBOR_ABORT_NOTFOUND_DIRTY_BACK;
11094  break;
11095  }
11096  }
11097  }
11098 
11099  /* Abandon batch for: fixed pages, latched pages or with 'avoid_victim' */
11100  if (pgbuf_bcb_is_flushing (bufptr) || bufptr->latch_mode > PGBUF_LATCH_READ)
11101  {
11102  PGBUF_BCB_UNLOCK (bufptr);
11103  if (search_nondirty == true)
11104  {
11105  if (forward == false)
11106  {
11107  abort_reason = NEIGHBOR_ABORT_LATCH_NONDIRTY_BACK;
11108  break;
11109  }
11110  else
11111  {
11112  forward = false;
11113  continue;
11114  }
11115  }
11116  else
11117  {
11118  if (forward == true)
11119  {
11120  forward = false;
11121  continue;
11122  }
11123  else if (PGBUF_NEIGHBOR_FLUSH_NONDIRTY == true)
11124  {
11125  search_nondirty = true;
11126  forward = true;
11127  continue;
11128  }
11129  else
11130  {
11131  abort_reason = NEIGHBOR_ABORT_LATCH_DIRTY_BACK;
11132  break;
11133  }
11134  }
11135  }
11136 
11137  if (!pgbuf_bcb_is_dirty (bufptr))
11138  {
11139  if (search_nondirty == false)
11140  {
11141  PGBUF_BCB_UNLOCK (bufptr);
11142  if (forward == true)
11143  {
11144  forward = false;
11145  continue;
11146  }
11147  else if (PGBUF_NEIGHBOR_FLUSH_NONDIRTY == true)
11148  {
11149  search_nondirty = true;
11150  forward = true;
11151  continue;
11152  }
11153  abort_reason = NEIGHBOR_ABORT_NONDIRTY_NOT_ALLOWED;
11154  break;
11155  }
11156 
11157  if (prev_page_dirty == false)
11158  {
11159  /* two consecutive non-dirty pages */
11160  PGBUF_BCB_UNLOCK (bufptr);
11162  break;
11163  }
11164  }
11165  else
11166  {
11167  if (LSA_LT (&log_newest_oldest_unflush_lsa, &bufptr->oldest_unflush_lsa))
11168  {
11169  LSA_COPY (&log_newest_oldest_unflush_lsa, &bufptr->oldest_unflush_lsa);
11170  }
11171  dirty_pages_cnt++;
11172  }
11173 
11174  if (helper->npages > PGBUF_PAGES_COUNT_THRESHOLD && ((2 * dirty_pages_cnt) < helper->npages))
11175  {
11176  /* too many non dirty pages */
11177  PGBUF_BCB_UNLOCK (bufptr);
11178  helper->npages = 1;
11179  abort_reason = NEIGHBOR_ABORT_TOO_MANY_NONDIRTIES;
11180  break;
11181  }
11182 
11183  prev_page_dirty = pgbuf_bcb_is_dirty (bufptr);
11184 
11185  /* add bufptr to batch */
11186  pgbuf_add_bufptr_to_batch (bufptr, vpid.pageid - first_vpid.pageid);
11187  PGBUF_BCB_UNLOCK (bufptr);
11188  i++;
11189  }
11190 
11191  if (prev_page_dirty == true)
11192  {
11193  if (helper->fwd_offset > 0 && !pgbuf_bcb_is_dirty (helper->pages_bufptr[PGBUF_NEIGHBOR_POS (helper->fwd_offset)]))
11194  {
11195  helper->fwd_offset--;
11196  helper->npages--;
11197  }
11198  if (helper->back_offset > 0
11200  {
11201  helper->back_offset--;
11202  helper->npages--;
11203  }
11204  }
11205 
11206  if (helper->npages <= 1)
11207  {
11208  /* flush only first page */
11209  pos = PGBUF_NEIGHBOR_POS (0);
11210  bufptr = helper->pages_bufptr[pos];
11211 
11212  error = pgbuf_flush_neighbor_safe (thread_p, bufptr, &helper->vpids[pos], &was_page_flushed);
11213  if (error != NO_ERROR)
11214  {
11215  ASSERT_ERROR ();
11216  return error;
11217  }
11218  if (was_page_flushed)
11219  {
11220  *flushed_pages = 1;
11221  }
11222  return NO_ERROR;
11223  }
11224 
11225  /* WAL protocol: force log record to disk */
11226  logpb_flush_log_for_wal (thread_p, &log_newest_oldest_unflush_lsa);
11227 
11228  written_pages = 0;
11229  for (pos = PGBUF_NEIGHBOR_POS (-helper->back_offset); pos <= PGBUF_NEIGHBOR_POS (helper->fwd_offset); pos++)
11230  {
11231  bufptr = helper->pages_bufptr[pos];
11232 
11233  error = pgbuf_flush_neighbor_safe (thread_p, bufptr, &helper->vpids[pos], &was_page_flushed);
11234  if (error != NO_ERROR)
11235  {
11236  ASSERT_ERROR ();
11237  if (save_first_error == NO_ERROR)
11238  {
11239  save_first_error = error;
11240  }
11241  continue;
11242  }
11243  if (was_page_flushed)
11244  {
11245  written_pages++;
11246  }
11247  }
11248 
11250  {
11252  "pgbuf_flush_page_and_neighbors_fb: collected_pages:%d, written:%d, back_offset:%d, fwd_offset%d, "
11253  "abort_reason:%d", helper->npages, written_pages, helper->back_offset, helper->fwd_offset,
11254  abort_reason);
11255  }
11256 
11257  *flushed_pages = written_pages;
11258  helper->npages = 0;
11259 
11260  return save_first_error;
11261 #undef PGBUF_PAGES_COUNT_THRESHOLD
11262 }
11263 
11264 /*
11265  * pgbuf_add_bufptr_to_batch () - Add a page to the flush helper
11266  * return : void
11267  * bufptr (in) : BCB of page to add
11268  */
11269 STATIC_INLINE void
11271 {
11273  int pos;
11274 
11275  assert (bufptr->latch_mode == PGBUF_NO_LATCH || bufptr->latch_mode == PGBUF_LATCH_READ
11276  || bufptr->latch_mode == PGBUF_LATCH_WRITE);
11277 
11279  pos = PGBUF_NEIGHBOR_POS (idx);
11280 
11281  VPID_COPY (&helper->vpids[pos], &bufptr->vpid);
11282  helper->pages_bufptr[pos] = bufptr;
11283 
11284  helper->npages++;
11285  if (idx > 0)
11286  {
11287  helper->fwd_offset++;
11288  }
11289  else if (idx < 0)
11290  {
11291  helper->back_offset++;
11292  }
11293 }
11294 
11295 /*
11296  * pgbuf_flush_neighbor_safe () - Flush collected page for neighbor flush if it's safe:
11297  * 1. VPID of bufptr has not changed.
11298  * 2. Page has no latch or is only latched for read.
11299  *
11300  * return : Error code.
11301  * thread_p (in) : Thread entry.
11302  * bufptr (in) : Buffered page collected for neighbor flush.
11303  * expected_vpid (in) : Expected VPID for bufptr.
11304  * flushed (out) : Output true if page was flushed.
11305  */
11306 STATIC_INLINE int
11307 pgbuf_flush_neighbor_safe (THREAD_ENTRY * thread_p, PGBUF_BCB * bufptr, VPID * expected_vpid, bool * flushed)
11308 {
11309  int error = NO_ERROR;
11310  bool is_bcb_locked = true;
11311 
11312  assert (bufptr != NULL);
11313  assert (expected_vpid != NULL && !VPID_ISNULL (expected_vpid));
11314  assert (flushed != NULL);
11315 
11316  *flushed = false;
11317 
11318  PGBUF_BCB_LOCK (bufptr);
11319  if (!VPID_EQ (&bufptr->vpid, expected_vpid))
11320  {
11321  PGBUF_BCB_UNLOCK (bufptr);
11322  return NO_ERROR;
11323  }
11324 
11325  if (pgbuf_bcb_is_flushing (bufptr) || bufptr->latch_mode > PGBUF_LATCH_READ)
11326  {
11327  PGBUF_BCB_UNLOCK (bufptr);
11328  return NO_ERROR;
11329  }
11330 
11331  /* flush even if it is not dirty. todo: is this necessary? */
11332  error = pgbuf_bcb_flush_with_wal (thread_p, bufptr, true, &is_bcb_locked);
11333  if (is_bcb_locked)
11334  {
11335  PGBUF_BCB_UNLOCK (bufptr);
11336  }
11337  if (error == NO_ERROR)
11338  {
11339  *flushed = true;
11340  }
11341  else
11342  {
11343  ASSERT_ERROR ();
11344  }
11345  return error;
11346 }
11347 
11348 /*
11349  * pgbuf_compare_hold_vpid_for_sort () - Compare the vpid for sort
11350  * return: p1 - p2
11351  * p1(in): victim candidate list 1
11352  * p2(in): victim candidate list 2
11353  */
11354 static int
11355 pgbuf_compare_hold_vpid_for_sort (const void *p1, const void *p2)
11356 {
11357  PGBUF_HOLDER_INFO *h1, *h2;
11358  int diff;
11359 
11360  h1 = (PGBUF_HOLDER_INFO *) p1;
11361  h2 = (PGBUF_HOLDER_INFO *) p2;
11362 
11363  if (h1 == h2)
11364  {
11365  return 0;
11366  }
11367 
11368  /* Pages with NULL GROUP sort last */
11369  if (VPID_ISNULL (&h1->group_id) && !VPID_ISNULL (&h2->group_id))
11370  {
11371  return 1;
11372  }
11373  else if (!VPID_ISNULL (&h1->group_id) && VPID_ISNULL (&h2->group_id))
11374  {
11375  return -1;
11376  }
11377 
11378  diff = h1->group_id.volid - h2->group_id.volid;
11379  if (diff != 0)
11380  {
11381  return diff;
11382  }
11383 
11384  diff = h1->group_id.pageid - h2->group_id.pageid;
11385  if (diff != 0)
11386  {
11387  return diff;
11388  }
11389 
11390  diff = h1->rank - h2->rank;
11391  if (diff != 0)
11392  {
11393  return diff;
11394  }
11395 
11396  diff = h1->vpid.volid - h2->vpid.volid;
11397  if (diff != 0)
11398  {
11399  return diff;
11400  }
11401 
11402  diff = h1->vpid.pageid - h2->vpid.pageid;
11403  if (diff != 0)
11404  {
11405  return diff;
11406  }
11407 
11408  return diff;
11409 }
11410 
11411 /*
11412  * pgbuf_ordered_fix () - Fix page in VPID order; other previously fixed pages may be unfixed and re-fixed again.
11413  * return: error code
11414  * thread_p(in):
11415  * req_vpid(in):
11416  * fetch_mode(in): old or new page
11417  * request_mode(in): latch mode
11418  * req_watcher(in/out): page watcher object, also holds output page pointer
11419  *
11420  * Note: If fails to re-fix previously fixed pages (unfixed with this request), the requested page is unfixed
11421  * (if fixed) and error is returned. In such case, older some pages may be re-fixed, other not : the caller
11422  * should check page pointer of watchers before using them in case of error.
11423  *
11424  * Note2: If any page re-fix occurs for previously fixed pages, their 'unfix' flag in their watcher is set.
11425  * (caller is responsible to check this flag)
11426  *
11427  */
11428 #if !defined(NDEBUG)
11429 int
11430 pgbuf_ordered_fix_debug (THREAD_ENTRY * thread_p, const VPID * req_vpid, PAGE_FETCH_MODE fetch_mode,
11431  const PGBUF_LATCH_MODE request_mode, PGBUF_WATCHER * req_watcher, const char *caller_file,
11432  int caller_line)
11433 #else /* NDEBUG */
11434 int
11435 pgbuf_ordered_fix_release (THREAD_ENTRY * thread_p, const VPID * req_vpid, PAGE_FETCH_MODE fetch_mode,
11436  const PGBUF_LATCH_MODE request_mode, PGBUF_WATCHER * req_watcher)
11437 #endif /* NDEBUG */
11438 {
11439  int er_status = NO_ERROR;
11440  PGBUF_HOLDER *holder, *next_holder;
11441  PAGE_PTR pgptr, ret_pgptr;
11442  int i, thrd_idx;
11443  int saved_pages_cnt = 0;
11444  PGBUF_LATCH_MODE curr_request_mode;
11445  PAGE_FETCH_MODE curr_fetch_mode;
11446  PGBUF_HOLDER_INFO ordered_holders_info[PGBUF_MAX_PAGE_FIXED_BY_TRAN];
11447  PGBUF_HOLDER_INFO req_page_holder_info;
11448  bool req_page_has_watcher;
11449  bool req_page_has_group = false;
11450  int er_status_get_hfid = NO_ERROR;
11451  VPID req_page_groupid;
11452  bool has_dealloc_prevent_flag = false;
11453  PGBUF_LATCH_CONDITION latch_condition;
11454  PGBUF_BCB *bufptr = NULL;
11455 #if defined(PGBUF_ORDERED_DEBUG)
11456  static unsigned int global_ordered_fix_id = 0;
11457  unsigned int ordered_fix_id;
11458 #endif
11459 
11460  assert (req_watcher != NULL);
11461 
11462 #if defined(PGBUF_ORDERED_DEBUG)
11463  ordered_fix_id = global_ordered_fix_id++;
11464 #endif
11465 
11466 #if !defined(NDEBUG)
11467  assert (req_watcher->magic == PGBUF_WATCHER_MAGIC_NUMBER);
11468 #endif
11469 
11470  ret_pgptr = NULL;
11471 
11472  req_page_has_watcher = false;
11473  if (req_watcher->pgptr != NULL)
11474  {
11475  assert_release (false);
11476  er_status = ER_FAILED_ASSERTION;
11477  goto exit;
11478  }
11479 
11480  /* set or promote current page rank */
11481  if (VPID_EQ (&req_watcher->group_id, req_vpid))
11482  {
11483  req_watcher->curr_rank = PGBUF_ORDERED_HEAP_HDR;
11484  }
11485  else
11486  {
11487  req_watcher->curr_rank = req_watcher->initial_rank;
11488  }
11489 
11490  req_page_has_group = VPID_ISNULL (&req_watcher->group_id) ? false : true;
11491  if (req_page_has_group == false)
11492  {
11493  VPID_SET_NULL (&req_page_groupid);
11494  }
11495 
11496  VPID_COPY (&req_page_holder_info.group_id, &req_watcher->group_id);
11497  req_page_holder_info.rank = req_watcher->curr_rank;
11498  VPID_COPY (&req_page_holder_info.vpid, req_vpid);
11499  req_page_holder_info.watch_count = 1;
11500  req_page_holder_info.watcher[0] = req_watcher;
11501 
11502  thrd_idx = thread_get_entry_index (thread_p);
11503  holder = pgbuf_Pool.thrd_holder_info[thrd_idx].thrd_hold_list;
11504  if ((holder == NULL) || ((holder->thrd_link == NULL) && (VPID_EQ (req_vpid, &(holder->bufptr->vpid)))))
11505  {
11506  /* There are no other fixed pages or only the requested page was already fixed */
11507  latch_condition = PGBUF_UNCONDITIONAL_LATCH;
11508  }
11509  else
11510  {
11511  latch_condition = PGBUF_CONDITIONAL_LATCH;
11512  }
11513 
11514 #if !defined(NDEBUG)
11515  ret_pgptr = pgbuf_fix_debug (thread_p, req_vpid, fetch_mode, request_mode, latch_condition, caller_file, caller_line);
11516 #else
11517  ret_pgptr = pgbuf_fix_release (thread_p, req_vpid, fetch_mode, request_mode, latch_condition);
11518 #endif
11519 
11520  if (ret_pgptr != NULL)
11521  {
11522  for (holder = pgbuf_Pool.thrd_holder_info[thrd_idx].thrd_hold_list; holder != NULL; holder = holder->thrd_link)
11523  {
11524  CAST_BFPTR_TO_PGPTR (ret_pgptr, holder->bufptr);
11525 
11526  if (VPID_EQ (req_vpid, &(holder->bufptr->vpid)))
11527  {
11529 
11530  if (req_page_has_group == false && holder->first_watcher != NULL)
11531  {
11532  /* special case : already have fix on this page with an watcher; get group id from existing watcher */
11533  assert (holder->watch_count > 0);
11534  assert (!VPID_ISNULL (&holder->first_watcher->group_id));
11535  VPID_COPY (&req_watcher->group_id, &holder->first_watcher->group_id);
11536  }
11537  else if (req_page_has_group == false && pgbuf_get_page_ptype (thread_p, ret_pgptr) == PAGE_HEAP)
11538  {
11539  er_status = pgbuf_get_groupid_and_unfix (thread_p, req_vpid, &ret_pgptr, &req_page_groupid, false);
11540  if (er_status != NO_ERROR)
11541  {
11542  er_status_get_hfid = er_status;
11543  goto exit;
11544  }
11545  assert (!VPID_ISNULL (&req_page_groupid));
11546  VPID_COPY (&req_watcher->group_id, &req_page_groupid);
11547  }
11548 #if !defined(NDEBUG)
11549  pgbuf_add_watch_instance_internal (holder, ret_pgptr, req_watcher, request_mode, true, caller_file,
11550  caller_line);
11551 #else
11552  pgbuf_add_watch_instance_internal (holder, ret_pgptr, req_watcher, request_mode, true);
11553 #endif
11554  req_page_has_watcher = true;
11555  goto exit;
11556  }
11557  }
11558 
11559  assert_release (false);
11560 
11561  er_status = ER_FAILED_ASSERTION;
11562  goto exit;
11563  }
11564  else
11565  {
11566  int wait_msecs;
11567 
11568  assert (ret_pgptr == NULL);
11569 
11570  er_status = er_errid_if_has_error ();
11571  if (er_status == ER_PB_BAD_PAGEID || er_status == ER_INTERRUPTED)
11572  {
11573  goto exit;
11574  }
11575 
11576  wait_msecs = pgbuf_find_current_wait_msecs (thread_p);
11577  if (wait_msecs == LK_ZERO_WAIT || wait_msecs == LK_FORCE_ZERO_WAIT)
11578  {
11579  /* attempts to unfix-refix old page may fail since CONDITIONAL latch will be enforced; just return page
11580  * cannot be fixed */
11581  if (er_status == NO_ERROR)
11582  {
11583  /* LK_FORCE_ZERO_WAIT is used in some page scan functions (e.g. heap_stats_find_page_in_bestspace) to
11584  * skip busy pages; here we return an error code (which means the page was not fixed), however no error
11585  * is set : this allows scan of pages to continue */
11586  assert (wait_msecs == LK_FORCE_ZERO_WAIT);
11587  er_status = ER_LK_PAGE_TIMEOUT;
11588  }
11589  goto exit;
11590  }
11591 
11592  if (latch_condition == PGBUF_UNCONDITIONAL_LATCH)
11593  {
11594  /* continue */
11595  er_status = er_errid ();
11596  if (er_status == NO_ERROR)
11597  {
11598  er_status = ER_FAILED;
11599  }
11600  goto exit;
11601  }
11602 
11603  /* to proceed ordered fix the pages, forget any underlying error. */
11604  er_status = NO_ERROR;
11605  }
11606 
11607  if (fetch_mode == OLD_PAGE_PREVENT_DEALLOC)
11608  {
11609  has_dealloc_prevent_flag = true;
11610  fetch_mode = OLD_PAGE;
11611  }
11612 
11613  holder = pgbuf_Pool.thrd_holder_info[thrd_idx].thrd_hold_list;
11614  while (holder != NULL)
11615  {
11616  next_holder = holder->thrd_link;
11617  if (holder->watch_count <= 0)
11618  {
11619  /* cannot perform unfix-ordered fix without watcher; we assume that this holder's page will not trigger a
11620  * latch deadlock and ignore it */
11621  holder = next_holder;
11622  continue;
11623  }
11624 
11626 
11627  if (saved_pages_cnt >= PGBUF_MAX_PAGE_FIXED_BY_TRAN)
11628  {
11629  assert_release (false);
11630 
11631  er_status = ER_FAILED_ASSERTION;
11632  goto exit;
11633  }
11634  else if (VPID_EQ (req_vpid, &(holder->bufptr->vpid)))
11635  {
11636  /* already have a fix on this page, should not be here */
11637  if (pgbuf_is_valid_page (thread_p, req_vpid, false, NULL, NULL) != DISK_VALID)
11638  {
11639 #if defined(PGBUF_ORDERED_DEBUG)
11640  _er_log_debug (__FILE__, __LINE__,
11641  "ORDERED_FIX(%u): page VPID:(%d,%d) (GROUP:%d,%d; rank:%d/%d) "
11642  "invalid, while having holder: %X ", ordered_fix_id, req_vpid->volid, req_vpid->pageid,
11643  req_watcher->group_id.volid, req_watcher->group_id.pageid, req_watcher->curr_rank,
11644  req_watcher->initial_rank, holder);
11645 #endif
11646  er_status = er_errid ();
11647  }
11648  else
11649  {
11650  er_status = ER_FAILED_ASSERTION;
11651  }
11652  assert_release (false);
11653 
11654  goto exit;
11655  }
11656  else
11657  {
11658  int holder_fix_cnt;
11659  int j, diff;
11660  PAGE_PTR save_page_ptr = NULL;
11661  PGBUF_WATCHER *pg_watcher;
11662  int page_rank;
11663  PGBUF_ORDERED_GROUP group_id;
11664 
11665  page_rank = PGBUF_ORDERED_RANK_UNDEFINED;
11666  VPID_SET_NULL (&group_id);
11667  holder_fix_cnt = holder->fix_count;
11668 
11669  if (holder_fix_cnt != holder->watch_count)
11670  {
11671  /* this page was fixed without watcher, without being unfixed before another page fix ; we do not allow
11672  * this */
11673  assert_release (false);
11674 
11675  er_status = ER_FAILED_ASSERTION;
11676  goto exit;
11677  }
11678 
11680 
11681  ordered_holders_info[saved_pages_cnt].latch_mode = PGBUF_LATCH_READ;
11682  pg_watcher = holder->first_watcher;
11683  j = 0;
11684  ordered_holders_info[saved_pages_cnt].prevent_dealloc = false;
11685 
11686  /* add all watchers */
11687  while (pg_watcher != NULL)
11688  {
11689 #if !defined(NDEBUG)
11690  CAST_BFPTR_TO_PGPTR (pgptr, holder->bufptr);
11691 
11692  assert (pg_watcher->magic == PGBUF_WATCHER_MAGIC_NUMBER);
11693  assert (pg_watcher->pgptr == pgptr);
11695  assert (!VPID_ISNULL (&pg_watcher->group_id));
11696 #endif
11697  if (page_rank == PGBUF_ORDERED_RANK_UNDEFINED)
11698  {
11699  page_rank = pg_watcher->curr_rank;
11700  }
11701  else if (page_rank != pg_watcher->curr_rank)
11702  {
11703  /* all watchers on this page should have the same rank */
11704  char additional_msg[128];
11705  snprintf (additional_msg, sizeof (additional_msg) - 1, "different page ranks:%d,%d", page_rank,
11706  pg_watcher->curr_rank);
11707 
11708  er_status = ER_PB_ORDERED_INCONSISTENCY;
11709  er_set (ER_FATAL_ERROR_SEVERITY, ARG_FILE_LINE, er_status, 5, req_vpid->volid, req_vpid->pageid,
11710  holder->bufptr->vpid.volid, holder->bufptr->vpid.pageid, additional_msg);
11711  goto exit;
11712  }
11713 
11714  if (VPID_ISNULL (&group_id))
11715  {
11716  VPID_COPY (&group_id, &pg_watcher->group_id);
11717  }
11718  else if (!VPID_EQ (&group_id, &pg_watcher->group_id))
11719  {
11720  char additional_msg[128];
11721  snprintf (additional_msg, sizeof (additional_msg) - 1, "different GROUP_ID : (%d,%d) and (%d,%d)",
11722  group_id.volid, group_id.pageid, pg_watcher->group_id.volid, pg_watcher->group_id.pageid);
11723 
11724  /* all watchers on this page should have the same group id */
11725  er_status = ER_PB_ORDERED_INCONSISTENCY;
11726  er_set (ER_FATAL_ERROR_SEVERITY, ARG_FILE_LINE, er_status, 5, req_vpid->volid, req_vpid->pageid,
11727  holder->bufptr->vpid.volid, holder->bufptr->vpid.pageid, additional_msg);
11728  goto exit;
11729  }
11730 
11731  if (save_page_ptr == NULL)
11732  {
11733  save_page_ptr = pg_watcher->pgptr;
11734  }
11735  else
11736  {
11737  assert (save_page_ptr == pg_watcher->pgptr);
11738  }
11739 
11740  ordered_holders_info[saved_pages_cnt].watcher[j] = pg_watcher;
11741  if (pg_watcher->latch_mode == PGBUF_LATCH_WRITE)
11742  {
11743  ordered_holders_info[saved_pages_cnt].latch_mode = PGBUF_LATCH_WRITE;
11744  }
11745  j++;
11746 
11747 #if defined(PGBUF_ORDERED_DEBUG)
11748  _er_log_debug (__FILE__, __LINE__,
11749  "ordered_fix(%u): check_watcher: pgptr:%X, VPID:(%d,%d), GROUP:%d,%d, rank:%d/%d, "
11750  "holder_fix_count:%d, holder_watch_count:%d, holder_fixed_at:%s", ordered_fix_id,
11751  pg_watcher->pgptr, holder->bufptr->vpid.volid, holder->bufptr->vpid.pageid,
11752  pg_watcher->group_id.volid, pg_watcher->group_id.pageid, pg_watcher->curr_rank,
11753  pg_watcher->initial_rank, holder->fix_count, holder->watch_count, holder->fixed_at);
11754 #endif
11755  pg_watcher = pg_watcher->next;
11756  }
11757 
11758  assert (j == holder->watch_count);
11759 
11760  VPID_COPY (&ordered_holders_info[saved_pages_cnt].group_id, &group_id);
11761  ordered_holders_info[saved_pages_cnt].rank = page_rank;
11762  VPID_COPY (&(ordered_holders_info[saved_pages_cnt].vpid), &(holder->bufptr->vpid));
11763 
11764  if (req_page_has_group == true)
11765  {
11766  diff = pgbuf_compare_hold_vpid_for_sort (&req_page_holder_info, &ordered_holders_info[saved_pages_cnt]);
11767  }
11768  else
11769  {
11770  /* page needs to be unfixed */
11771  diff = -1;
11772  }
11773 
11774  if (diff < 0)
11775  {
11776  ordered_holders_info[saved_pages_cnt].watch_count = holder->watch_count;
11777  ordered_holders_info[saved_pages_cnt].ptype = (PAGE_TYPE) holder->bufptr->iopage_buffer->iopage.prv.ptype;
11778 
11779 #if defined(PGBUF_ORDERED_DEBUG)
11780  _er_log_debug (__FILE__, __LINE__,
11781  "ordered_fix(%u): save_watchers (%d): pgptr:%X, VPID:(%d,%d), "
11782  "GROUP:(%d,%d), rank:%d(page_rank:%d), holder_fix_count:%d, holder_watch_count:%d",
11783  ordered_fix_id, ordered_holders_info[saved_pages_cnt].watch_count, save_page_ptr,
11784  ordered_holders_info[saved_pages_cnt].vpid.volid,
11785  ordered_holders_info[saved_pages_cnt].vpid.pageid,
11786  ordered_holders_info[saved_pages_cnt].group_id.volid,
11787  ordered_holders_info[saved_pages_cnt].group_id.pageid,
11788  ordered_holders_info[saved_pages_cnt].rank, page_rank, holder_fix_cnt,
11789  holder->watch_count);
11790 #endif
11791  saved_pages_cnt++;
11792  }
11793  else if (diff == 0)
11794  {
11795  assert_release (false);
11796 
11797  er_status = ER_FAILED_ASSERTION;
11798  goto exit;
11799  }
11800  else
11801  {
11802  assert (diff > 0);
11803  /* this page is correctly fixed before new requested page, the accumulated watchers are just ignored */
11804 #if defined(PGBUF_ORDERED_DEBUG)
11805  _er_log_debug (__FILE__, __LINE__,
11806  "ordered_fix(%u): ignore: pgptr:%X, VPID:(%d,%d) "
11807  "GROUP:(%d,%d), rank:%d --- ignored", ordered_fix_id, save_page_ptr,
11808  ordered_holders_info[saved_pages_cnt].vpid.volid,
11809  ordered_holders_info[saved_pages_cnt].vpid.pageid,
11810  ordered_holders_info[saved_pages_cnt].group_id.volid,
11811  ordered_holders_info[saved_pages_cnt].group_id.pageid,
11812  ordered_holders_info[saved_pages_cnt].rank);
11813 #endif
11814  }
11815  }
11816  holder = next_holder;
11817  }
11818 
11819  holder = pgbuf_Pool.thrd_holder_info[thrd_idx].thrd_hold_list;
11820  /* unfix pages which do not fulfill the VPID order */
11821  for (i = 0; i < saved_pages_cnt; i++)
11822  {
11823  int j, holder_fix_cnt;
11824 #if defined(PGBUF_ORDERED_DEBUG)
11825  int holder_fix_cnt_save;
11826 #endif
11827 
11828  while (holder != NULL && !VPID_EQ (&(ordered_holders_info[i].vpid), &(holder->bufptr->vpid)))
11829  {
11830  holder = holder->thrd_link;
11831  }
11832 
11833  if (holder == NULL)
11834  {
11835  assert_release (false);
11836  er_status = ER_FAILED_ASSERTION;
11837  goto exit;
11838  }
11839 
11840  next_holder = holder->thrd_link;
11841  /* not necessary to remove each watcher since the holder will be removed completely */
11842 
11843  holder->watch_count = 0;
11844  holder->first_watcher = NULL;
11845  holder->last_watcher = NULL;
11846  holder_fix_cnt = holder->fix_count;
11847 #if defined(PGBUF_ORDERED_DEBUG)
11848  holder_fix_cnt_save = holder_fix_cnt;
11849 #endif
11850 
11851  CAST_BFPTR_TO_PGPTR (pgptr, holder->bufptr);
11852  assert (holder_fix_cnt > 0);
11853  /* prevent deallocate. */
11855  ordered_holders_info[i].prevent_dealloc = true;
11856  while (holder_fix_cnt-- > 0)
11857  {
11858  pgbuf_unfix (thread_p, pgptr);
11859  }
11860 
11861  for (j = 0; j < ordered_holders_info[i].watch_count; j++)
11862  {
11863  PGBUF_WATCHER *pg_watcher;
11864 
11865  pg_watcher = ordered_holders_info[i].watcher[j];
11866 
11867  assert (pg_watcher->pgptr == pgptr);
11869 
11870 #if defined(PGBUF_ORDERED_DEBUG)
11871  _er_log_debug (__FILE__, __LINE__,
11872  "ordered_fix(%u): unfix & clear_watcher(%d/%d): pgptr:%X, VPID:(%d,%d), GROUP:%d,%d, "
11873  "rank:%d/%d, latch_mode:%d, holder_fix_cnt:%d", ordered_fix_id, j + 1,
11874  ordered_holders_info[i].watch_count, pg_watcher->pgptr, ordered_holders_info[i].vpid.volid,
11875  ordered_holders_info[i].vpid.pageid, pg_watcher->group_id.volid, pg_watcher->group_id.pageid,
11876  pg_watcher->curr_rank, pg_watcher->initial_rank, pg_watcher->latch_mode, holder_fix_cnt_save);
11877 #endif
11878  PGBUF_CLEAR_WATCHER (pg_watcher);
11879  pg_watcher->page_was_unfixed = true;
11880 
11881 #if !defined(NDEBUG)
11882  pgbuf_watcher_init_debug (pg_watcher, caller_file, caller_line, true);
11883 #endif
11884  }
11885  holder = next_holder;
11886  }
11887 
11888  /* the following code assumes that if the class OID is deleted, after the requested page is unlatched, the HFID page
11889  * is not reassigned to an ordinary page; in such case, a page deadlock may occur in worst case. Example of scenario
11890  * when such situation may occur : We assume an existing latch on VPID1 (0, 90) 1. Fix requested page VPID2 (0, 100),
11891  * get class_oid from page 2. Unfix requested page 3. Get HFID from schema : < between 2 and 3, other threads drop
11892  * the class, and HFID page is reused, along with current page which may be allocated to the HFID of another class >
11893  * 4. Still assuming that HFID is valid, this thread starts latching pages: In order VPID1, VPID2 At the same time,
11894  * another thread, starts latching pages VPID1 and VPID2, but since this later thread knows that VPID2 is a HFID,
11895  * will use the order VPID2, VPID1. */
11896  if (req_page_has_group == false)
11897  {
11898 #if !defined(NDEBUG)
11899  /* all previous pages with watcher have been unfixed */
11900  holder = pgbuf_Pool.thrd_holder_info[thrd_idx].thrd_hold_list;
11901  while (holder != NULL)
11902  {
11903  assert (holder->watch_count == 0);
11904  holder = holder->thrd_link;
11905  }
11906  pgptr =
11907  pgbuf_fix_debug (thread_p, req_vpid, fetch_mode, request_mode, PGBUF_UNCONDITIONAL_LATCH, caller_file,
11908  caller_line);
11909 #else
11910  pgptr = pgbuf_fix_release (thread_p, req_vpid, fetch_mode, request_mode, PGBUF_UNCONDITIONAL_LATCH);
11911 #endif
11912  if (pgptr != NULL)
11913  {
11914  if (has_dealloc_prevent_flag == true)
11915  {
11916  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
11918  has_dealloc_prevent_flag = false;
11919  }
11920  if (pgbuf_get_page_ptype (thread_p, pgptr) == PAGE_HEAP)
11921  {
11922  er_status = pgbuf_get_groupid_and_unfix (thread_p, req_vpid, &pgptr, &req_page_groupid, true);
11923  if (er_status != NO_ERROR)
11924  {
11925  er_status_get_hfid = er_status;
11926  /* continue (re-latch old pages) */
11927  }
11928  }
11929  }
11930  else
11931  {
11932  /* continue */
11933  er_status_get_hfid = er_errid ();
11934  if (er_status_get_hfid == NO_ERROR)
11935  {
11936  er_status_get_hfid = ER_FAILED;
11937  }
11938  }
11939  }
11940 
11941 #if defined(PGBUF_ORDERED_DEBUG)
11942  _er_log_debug (__FILE__, __LINE__,
11943  "ordered_fix(%u) : restore_pages: %d, req_VPID(%d,%d), GROUP(%d,%d), rank:%d/%d", ordered_fix_id,
11944  saved_pages_cnt, req_vpid->volid, req_vpid->pageid, req_watcher->group_id.volid,
11945  req_watcher->group_id.pageid, req_watcher->curr_rank, req_watcher->initial_rank);
11946 #endif
11947 
11948  /* add requested page, watch instance is added after page is fixed */
11949  if (req_page_has_group == true || er_status_get_hfid == NO_ERROR)
11950  {
11951  if (req_page_has_group)
11952  {
11953  VPID_COPY (&(ordered_holders_info[saved_pages_cnt].group_id), &req_watcher->group_id);
11954  }
11955  else
11956  {
11957  assert (!VPID_ISNULL (&req_page_groupid));
11958  VPID_COPY (&req_watcher->group_id, &req_page_groupid);
11959  VPID_COPY (&(ordered_holders_info[saved_pages_cnt].group_id), &req_page_groupid);
11960  }
11961  VPID_COPY (&(ordered_holders_info[saved_pages_cnt].vpid), req_vpid);
11962  if (req_page_has_group)
11963  {
11964  ordered_holders_info[saved_pages_cnt].rank = req_watcher->curr_rank;
11965  }
11966  else
11967  {
11968  if (VPID_EQ (&(ordered_holders_info[saved_pages_cnt].group_id), req_vpid))
11969  {
11970  ordered_holders_info[saved_pages_cnt].rank = PGBUF_ORDERED_HEAP_HDR;
11971  }
11972  else
11973  {
11974  /* leave rank set by user */
11975  ordered_holders_info[saved_pages_cnt].rank = req_watcher->curr_rank;
11976  }
11977  }
11978  ordered_holders_info[saved_pages_cnt].prevent_dealloc = false;
11979  saved_pages_cnt++;
11980  }
11981 
11982  if (saved_pages_cnt > 1)
11983  {
11984  qsort (ordered_holders_info, saved_pages_cnt, sizeof (ordered_holders_info[0]), pgbuf_compare_hold_vpid_for_sort);
11985  }
11986 
11987  /* restore fixes on previously unfixed pages and fix the requested page */
11988  for (i = 0; i < saved_pages_cnt; i++)
11989  {
11990  if (VPID_EQ (req_vpid, &(ordered_holders_info[i].vpid)))
11991  {
11992  curr_request_mode = request_mode;
11993  curr_fetch_mode = fetch_mode;
11994  }
11995  else
11996  {
11997  curr_request_mode = ordered_holders_info[i].latch_mode;
11998  curr_fetch_mode = OLD_PAGE;
11999  }
12000 
12001 #if !defined(NDEBUG)
12002  pgptr =
12003  pgbuf_fix_debug (thread_p, &(ordered_holders_info[i].vpid), curr_fetch_mode, curr_request_mode,
12004  PGBUF_UNCONDITIONAL_LATCH, caller_file, caller_line);
12005 #else
12006  pgptr =
12007  pgbuf_fix_release (thread_p, &(ordered_holders_info[i].vpid), curr_fetch_mode, curr_request_mode,
12009 #endif
12010 
12011  if (pgptr == NULL)
12012  {
12013  er_status = er_errid ();
12014  if (er_status == ER_INTERRUPTED)
12015  {
12016  /* this is expected */
12017  goto exit;
12018  }
12019  if (er_status == ER_PB_BAD_PAGEID)
12020  {
12021  /* page was probably deallocated? so has the impossible indeed happen?? */
12022  assert (false);
12023  er_log_debug (ARG_FILE_LINE, "pgbuf_ordered_fix: page %d|%d was deallocated an we told it not to!\n",
12024  VPID_AS_ARGS (&ordered_holders_info[i].vpid));
12025  }
12026  if (!VPID_EQ (req_vpid, &(ordered_holders_info[i].vpid)))
12027  {
12028  int prev_er_status = er_status;
12029  er_status = ER_PB_ORDERED_REFIX_FAILED;
12030  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, er_status, 3, ordered_holders_info[i].vpid.volid,
12031  ordered_holders_info[i].vpid.pageid, prev_er_status);
12032  }
12033  goto exit;
12034  }
12035 
12036  /* get holder of last fix: last fixed pages is in top of holder list, we use parse code just for safety */
12037  for (holder = pgbuf_Pool.thrd_holder_info[thrd_idx].thrd_hold_list; holder != NULL; holder = holder->thrd_link)
12038  {
12039  if (VPID_EQ (&(holder->bufptr->vpid), &(ordered_holders_info[i].vpid)))
12040  {
12041  break;
12042  }
12043  }
12044 
12045  assert (holder != NULL);
12046 
12047  if (VPID_EQ (req_vpid, &(ordered_holders_info[i].vpid)))
12048  {
12049  ret_pgptr = pgptr;
12050 
12051  if (has_dealloc_prevent_flag == true)
12052  {
12053  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
12055  has_dealloc_prevent_flag = false;
12056  }
12057 
12058  if (req_watcher != NULL)
12059  {
12060 #if !defined(NDEBUG)
12061  pgbuf_add_watch_instance_internal (holder, pgptr, req_watcher, request_mode, true, caller_file,
12062  caller_line);
12063 #else
12064  pgbuf_add_watch_instance_internal (holder, pgptr, req_watcher, request_mode, true);
12065 #endif
12066  req_page_has_watcher = true;
12067 
12068 #if defined(PGBUF_ORDERED_DEBUG)
12069  _er_log_debug (__FILE__, __LINE__,
12070  "ordered_fix(%u) : fixed req page, VPID:(%d,%d), GROUP:%d,%d, "
12071  "rank:%d, pgptr:%X, holder_fix_count:%d, holder_watch_count:%d, holder_fixed_at:%s, ",
12072  ordered_fix_id, ordered_holders_info[i].vpid.volid, ordered_holders_info[i].vpid.pageid,
12073  ordered_holders_info[i].group_id.volid, ordered_holders_info[i].group_id.pageid,
12074  ordered_holders_info[i].rank, pgptr, holder->fix_count, holder->watch_count,
12075  holder->fixed_at);
12076 #endif
12077  }
12078  }
12079  else
12080  {
12081  int j;
12082 
12083  /* page is fixed, therefore avoiding deallocate is no longer necessary */
12084  assert (ordered_holders_info[i].prevent_dealloc);
12085  ordered_holders_info[i].prevent_dealloc = false;
12086  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
12088 
12089  /* page after re-fix should have the same type as before unfix */
12090  (void) pgbuf_check_page_ptype (thread_p, pgptr, ordered_holders_info[i].ptype);
12091 
12092 #if defined(PGBUF_ORDERED_DEBUG)
12093  _er_log_debug (__FILE__, __LINE__,
12094  "ordered_fix(%u) : restore_holder:%X, VPID:(%d,%d), pgptr:%X, holder_fix_count:%d, "
12095  "holder_watch_count:%d, holder_fixed_at:%s, saved_fix_cnt:%d, saved_watch_cnt:%d",
12096  ordered_fix_id, holder, ordered_holders_info[i].vpid.volid,
12097  ordered_holders_info[i].vpid.pageid, pgptr, holder->fix_count, holder->watch_count,
12098  holder->fixed_at, ordered_holders_info[i].fix_cnt, ordered_holders_info[i].watch_count);
12099 #endif
12100 
12101  /* restore number of fixes for previously fixed page: just use pgbuf_fix since it is safer */
12102  for (j = 1; j < ordered_holders_info[i].watch_count; j++)
12103  {
12104 #if !defined(NDEBUG)
12105  pgptr =
12106  pgbuf_fix_debug (thread_p, &(ordered_holders_info[i].vpid), curr_fetch_mode, curr_request_mode,
12107  PGBUF_UNCONDITIONAL_LATCH, caller_file, caller_line);
12108 #else
12109  pgptr =
12110  pgbuf_fix_release (thread_p, &(ordered_holders_info[i].vpid), curr_fetch_mode, curr_request_mode,
12112 #endif
12113  if (pgptr == NULL)
12114  {
12115  assert_release (false);
12116  er_status = ER_FAILED_ASSERTION;
12117  goto exit;
12118  }
12119  }
12120 
12121  for (j = 0; j < ordered_holders_info[i].watch_count; j++)
12122  {
12123 #if !defined(NDEBUG)
12124  pgbuf_add_watch_instance_internal (holder, pgptr, ordered_holders_info[i].watcher[j],
12125  (PGBUF_LATCH_MODE) ordered_holders_info[i].watcher[j]->latch_mode,
12126  false, caller_file, caller_line);
12127 #else
12128  pgbuf_add_watch_instance_internal (holder, pgptr, ordered_holders_info[i].watcher[j],
12129  (PGBUF_LATCH_MODE) ordered_holders_info[i].watcher[j]->latch_mode,
12130  false);
12131 #endif
12132 #if defined(PGBUF_ORDERED_DEBUG)
12133  _er_log_debug (__FILE__, __LINE__,
12134  "ordered_fix(%u) : restore_watcher:%X, GROUP:%d,%d, rank:%d/%d,"
12135  " pgptr:%X, holder_fix_count:%d, holder_watch_count:%d, holder_fixed_at:%s",
12136  ordered_fix_id, ordered_holders_info[i].watcher[j],
12137  ordered_holders_info[i].watcher[j]->group_id.volid,
12138  ordered_holders_info[i].watcher[j]->group_id.pageid,
12139  ordered_holders_info[i].watcher[j]->curr_rank,
12140  ordered_holders_info[i].watcher[j]->initial_rank,
12141  ordered_holders_info[i].watcher[j]->pgptr, holder->fix_count, holder->watch_count,
12142  holder->fixed_at);
12143 #endif /* PGBUF_ORDERED_DEBUG */
12144  }
12145  }
12146  }
12147 
12148 exit:
12149  if (er_status_get_hfid != NO_ERROR && er_status == NO_ERROR)
12150  {
12151  er_status = er_status_get_hfid;
12152  }
12153 
12154  assert (er_status != NO_ERROR || !VPID_ISNULL (&(req_watcher->group_id)));
12155 
12156  if (ret_pgptr != NULL && er_status != NO_ERROR)
12157  {
12158  if (req_page_has_watcher)
12159  {
12160  pgbuf_ordered_unfix_and_init (thread_p, ret_pgptr, req_watcher);
12161  }
12162  else
12163  {
12164  pgbuf_unfix_and_init (thread_p, ret_pgptr);
12165  }
12166  }
12167 
12168  if (req_page_has_group == false && ret_pgptr != NULL && req_watcher->curr_rank != PGBUF_ORDERED_HEAP_HDR
12169  && VPID_EQ (&req_watcher->group_id, req_vpid))
12170  {
12171  req_watcher->curr_rank = PGBUF_ORDERED_HEAP_HDR;
12172  }
12173 
12174  for (i = 0; i < saved_pages_cnt; i++)
12175  {
12176  if (ordered_holders_info[i].prevent_dealloc)
12177  {
12178  /* we need to remove prevent deallocate. */
12179  PGBUF_BUFFER_HASH *hash_anchor = &pgbuf_Pool.buf_hash_table[PGBUF_HASH_VALUE (&ordered_holders_info[i].vpid)];
12180  bufptr = pgbuf_search_hash_chain (thread_p, hash_anchor, &ordered_holders_info[i].vpid);
12181 
12182  if (bufptr == NULL)
12183  {
12184  /* oops... no longer in buffer?? */
12185  assert (false);
12186  pthread_mutex_unlock (&hash_anchor->hash_mutex);
12187  continue;
12188  }
12190  {
12191  /* oops... deallocate not prevented */
12192  assert (false);
12193  }
12194  else
12195  {
12197  }
12198  PGBUF_BCB_UNLOCK (bufptr);
12199  }
12200  }
12201 
12202  return er_status;
12203 }
12204 
12205 /*
12206  * pgbuf_get_groupid_and_unfix () - retrieves group identifier of page and performs unlatch if requested.
12207  * return: error code
12208  * req_vpid(in): id of page for which the group is needed (for debug)
12209  * pgptr(in): page (already latched); only heap page allowed
12210  * groupid(out): group identifier (VPID of HFID)
12211  * do_unfix(in): if true, it unfixes the page.
12212  *
12213  * Note : helper function of ordered fix.
12214  */
12215 static int
12216 pgbuf_get_groupid_and_unfix (THREAD_ENTRY * thread_p, const VPID * req_vpid, PAGE_PTR * pgptr, VPID * groupid,
12217  bool do_unfix)
12218 {
12219  OID cls_oid;
12220  HFID hfid;
12221  int er_status = NO_ERROR;
12222  int thrd_idx;
12223 
12224  assert (pgptr != NULL && *pgptr != NULL);
12225  assert (groupid != NULL);
12226 
12227  VPID_SET_NULL (groupid);
12228 
12229  thrd_idx = thread_get_entry_index (thread_p);
12230 
12231  /* get class oid and hfid */
12232  er_status = heap_get_class_oid_from_page (thread_p, *pgptr, &cls_oid);
12233 
12234  if (do_unfix == true)
12235  {
12236  /* release requested page to avoid deadlocks with catalog pages */
12237  pgbuf_unfix_and_init (thread_p, *pgptr);
12238  }
12239 
12240  if (er_status != NO_ERROR)
12241  {
12242  return er_status;
12243  }
12244 
12245  assert (do_unfix == false || *pgptr == NULL);
12246 
12247  if (OID_IS_ROOTOID (&cls_oid))
12248  {
12249  boot_find_root_heap (&hfid);
12250  }
12251  else
12252  {
12253  er_status = heap_get_class_info (thread_p, &cls_oid, &hfid, NULL, NULL);
12254  }
12255 
12256  if (er_status == NO_ERROR)
12257  {
12258  if (HFID_IS_NULL (&hfid))
12259  {
12260  /* the requested page does not belong to a heap */
12261  er_status = ER_PB_ORDERED_NO_HEAP;
12262  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, er_status, 2, req_vpid->volid, req_vpid->pageid);
12263  }
12264  else
12265  {
12266  groupid->volid = hfid.vfid.volid;
12267  groupid->pageid = hfid.hpgid;
12268  assert (!VPID_ISNULL (groupid));
12269  }
12270  }
12271 
12272  return er_status;
12273 }
12274 
12275 /*
12276  * pgbuf_ordered_unfix () - Unfix a page which was previously fixed with ordered_fix (has a page watcher)
12277  * return: void
12278  * thread_p(in):
12279  * watcher_object(in/out): page watcher
12280  *
12281  */
12282 #if !defined (NDEBUG)
12283 void
12284 pgbuf_ordered_unfix_debug (THREAD_ENTRY * thread_p, PGBUF_WATCHER * watcher_object, const char *caller_file,
12285  int caller_line)
12286 #else /* NDEBUG */
12287 void
12288 pgbuf_ordered_unfix (THREAD_ENTRY * thread_p, PGBUF_WATCHER * watcher_object)
12289 #endif /* NDEBUG */
12290 {
12291  PGBUF_HOLDER *holder;
12292  PAGE_PTR pgptr;
12293  PGBUF_WATCHER *watcher;
12294 
12295  assert (watcher_object != NULL);
12296 
12297 #if !defined(NDEBUG)
12298  assert (watcher_object->magic == PGBUF_WATCHER_MAGIC_NUMBER);
12299 #endif
12300 
12301  if (watcher_object->pgptr == NULL)
12302  {
12303  assert_release (false);
12304  return;
12305  }
12306 
12307  pgptr = watcher_object->pgptr;
12308 
12309  assert (pgptr != NULL);
12310 
12311  holder = pgbuf_get_holder (thread_p, pgptr);
12312 
12313  assert_release (holder != NULL);
12314 
12315  watcher = holder->last_watcher;
12316  while (watcher != NULL)
12317  {
12318  if (watcher == watcher_object)
12319  {
12320  /* found */
12321  break;
12322  }
12323  watcher = watcher->prev;
12324  }
12325 
12326  assert_release (watcher != NULL);
12327 
12328  assert (holder->fix_count >= holder->watch_count);
12329 
12330  pgbuf_remove_watcher (holder, watcher_object);
12331 
12332 #if !defined(NDEBUG)
12333  pgbuf_watcher_init_debug (watcher_object, caller_file, caller_line, false);
12334  pgbuf_unfix_debug (thread_p, pgptr, caller_file, caller_line);
12335 #else
12336  pgbuf_unfix (thread_p, pgptr);
12337 #endif
12338 }
12339 
12340 /*
12341  * pgbuf_add_watch_instance_internal () - Adds a page watcher for a fixed page
12342  * holder(in): holder object
12343  * pgptr(in): holder object
12344  * watcher(in/out): page watcher
12345  * latch_mode(in): latch mode used for fixing the page
12346  * clear_unfix_flag(in): True to reset page_was_unfixed flag, false otherwise.
12347  *
12348  */
12349 #if !defined(NDEBUG)
12350 STATIC_INLINE void
12352  const PGBUF_LATCH_MODE latch_mode, const bool clear_unfix_flag,
12353  const char *caller_file, const int caller_line)
12354 #else
12355 STATIC_INLINE void
12357  const PGBUF_LATCH_MODE latch_mode, const bool clear_unfix_flag)
12358 #endif
12359 {
12360 #if !defined(NDEBUG)
12361  char *p;
12362 #endif
12363  assert (watcher != NULL);
12364  assert (pgptr != NULL);
12365  assert (holder != NULL);
12366 
12368 
12369  assert (watcher->pgptr == NULL);
12370  assert (watcher->next == NULL);
12371  assert (watcher->prev == NULL);
12372 
12373  if (holder->last_watcher == NULL)
12374  {
12375  assert (holder->first_watcher == NULL);
12376  holder->first_watcher = watcher;
12377  holder->last_watcher = watcher;
12378  }
12379  else
12380  {
12381  watcher->prev = holder->last_watcher;
12382  (holder->last_watcher)->next = watcher;
12383  holder->last_watcher = watcher;
12384  }
12385 
12386  watcher->pgptr = pgptr;
12387  watcher->latch_mode = latch_mode;
12388  if (clear_unfix_flag)
12389  {
12390  watcher->page_was_unfixed = false;
12391  }
12392 
12393  holder->watch_count += 1;
12394 
12395 #if !defined(NDEBUG)
12396  p = (char *) caller_file + strlen (caller_file);
12397  while (p)
12398  {
12399  if (p == caller_file)
12400  {
12401  break;
12402  }
12403 
12404  if (*p == '/' || *p == '\\')
12405  {
12406  p++;
12407  break;
12408  }
12409 
12410  p--;
12411  }
12412 
12413  snprintf (watcher->watched_at, sizeof (watcher->watched_at) - 1, "%s:%d", p, caller_line);
12414 #endif
12415 }
12416 
12417 /*
12418  * pgbuf_attach_watcher () - Add a watcher to a fixed page.
12419  *
12420  * return : Void.
12421  * thread_p (in) : Thread entry.
12422  * pgptr (in) : Fixed page pointer.
12423  * latch_mode (in) : Latch mode.
12424  * hfid (in) : Heap file identifier.
12425  * watcher (out) : Page water.
12426  */
12427 #if !defined (NDEBUG)
12428 void
12430  PGBUF_WATCHER * watcher, const char *caller_file, const int caller_line)
12431 #else /* NDEBUG */
12432 void
12433 pgbuf_attach_watcher (THREAD_ENTRY * thread_p, PAGE_PTR pgptr, PGBUF_LATCH_MODE latch_mode, HFID * hfid,
12434  PGBUF_WATCHER * watcher)
12435 #endif /* NDEBUG */
12436 {
12437  PGBUF_HOLDER *holder = NULL;
12438  VPID header_vpid = VPID_INITIALIZER;
12439  PGBUF_ORDERED_RANK rank;
12440 
12441  assert (pgptr != NULL);
12442  assert (watcher != NULL);
12443  assert (hfid != NULL && !HFID_IS_NULL (hfid));
12444 
12445  header_vpid.volid = hfid->vfid.volid;
12446  header_vpid.pageid = hfid->hpgid;
12447 
12448  /* Set current rank based on page being heap header or not. */
12449  if (VPID_EQ (&header_vpid, pgbuf_get_vpid_ptr (pgptr)))
12450  {
12451  rank = PGBUF_ORDERED_HEAP_HDR;
12452  }
12453  else
12454  {
12456  }
12457 
12458  PGBUF_INIT_WATCHER (watcher, rank, hfid);
12459  watcher->curr_rank = rank;
12460 
12461  holder = pgbuf_get_holder (thread_p, pgptr);
12462  assert (holder != NULL);
12463 
12464 #if !defined (NDEBUG)
12465  pgbuf_add_watch_instance_internal (holder, pgptr, watcher, latch_mode, true, caller_file, caller_line);
12466 #else
12467  pgbuf_add_watch_instance_internal (holder, pgptr, watcher, latch_mode, true);
12468 #endif
12469 }
12470 
12471 /*
12472  * pgbuf_get_holder () - Searches holder of fixed page
12473  * Return : holder object or NULL if not found
12474  * thread_p(in):
12475  * pgptr(in): pgptr
12476  */
12477 static PGBUF_HOLDER *
12479 {
12480  int thrd_idx;
12481  PGBUF_BCB *bufptr;
12482  PGBUF_HOLDER *holder;
12483 
12484  assert (pgptr != NULL);
12485  thrd_idx = thread_get_entry_index (thread_p);
12486 
12487  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
12488 
12489  for (holder = pgbuf_Pool.thrd_holder_info[thrd_idx].thrd_hold_list; holder != NULL; holder = holder->thrd_link)
12490  {
12491  if (bufptr == holder->bufptr)
12492  {
12493  return holder;
12494  }
12495  }
12496 
12497  return NULL;
12498 }
12499 
12500 /*
12501  * pgbuf_remove_watcher () - Removes a page watcher
12502  * holder(in): holder object
12503  * watcher_object(in): watcher object
12504  */
12505 static void
12507 {
12508  PAGE_PTR pgptr;
12509 
12510  assert (watcher_object != NULL);
12511  assert (holder != NULL);
12512 
12513 #if !defined(NDEBUG)
12514  assert (watcher_object->magic == PGBUF_WATCHER_MAGIC_NUMBER);
12515 #endif
12516 
12517  pgptr = watcher_object->pgptr;
12518 
12519  if (holder->first_watcher == watcher_object)
12520  {
12521  assert (watcher_object->prev == NULL);
12522  holder->first_watcher = watcher_object->next;
12523  }
12524  else if (watcher_object->prev != NULL)
12525  {
12526  (watcher_object->prev)->next = watcher_object->next;
12527  }
12528 
12529  if (holder->last_watcher == watcher_object)
12530  {
12531  assert (watcher_object->next == NULL);
12532  holder->last_watcher = watcher_object->prev;
12533  }
12534  else if (watcher_object->next != NULL)
12535  {
12536  (watcher_object->next)->prev = watcher_object->prev;
12537  }
12538  watcher_object->next = NULL;
12539  watcher_object->prev = NULL;
12540  watcher_object->pgptr = NULL;
12541  watcher_object->curr_rank = PGBUF_ORDERED_RANK_UNDEFINED;
12542  holder->watch_count -= 1;
12543 }
12544 
12545 /*
12546  * pgbuf_replace_watcher () - Replaces a page watcher with another page watcher
12547  * thread_p(in):
12548  * old_watcher(in/out): current page watcher to replace
12549  * new_watcher(in/out): new page watcher to use
12550  *
12551  */
12552 #if !defined(NDEBUG)
12553 void
12554 pgbuf_replace_watcher_debug (THREAD_ENTRY * thread_p, PGBUF_WATCHER * old_watcher, PGBUF_WATCHER * new_watcher,
12555  const char *caller_file, const int caller_line)
12556 #else
12557 void
12558 pgbuf_replace_watcher (THREAD_ENTRY * thread_p, PGBUF_WATCHER * old_watcher, PGBUF_WATCHER * new_watcher)
12559 #endif
12560 {
12561  PGBUF_HOLDER *holder;
12562  PAGE_PTR page_ptr;
12563  PGBUF_LATCH_MODE latch_mode;
12564 
12565  assert (old_watcher != NULL);
12566  assert (PGBUF_IS_CLEAN_WATCHER (new_watcher));
12567 
12568 #if !defined(NDEBUG)
12569  assert (old_watcher->magic == PGBUF_WATCHER_MAGIC_NUMBER);
12570  assert (new_watcher->magic == PGBUF_WATCHER_MAGIC_NUMBER);
12571 #endif
12572 
12573  assert (old_watcher->pgptr != NULL);
12574 
12575  holder = pgbuf_get_holder (thread_p, old_watcher->pgptr);
12576 
12577  assert_release (holder != NULL);
12578 
12579  page_ptr = old_watcher->pgptr;
12580  latch_mode = (PGBUF_LATCH_MODE) old_watcher->latch_mode;
12581  new_watcher->initial_rank = old_watcher->initial_rank;
12582  new_watcher->curr_rank = old_watcher->curr_rank;
12583  VPID_COPY (&new_watcher->group_id, &old_watcher->group_id);
12584 
12585  pgbuf_remove_watcher (holder, old_watcher);
12586 
12587 #if !defined(NDEBUG)
12588  pgbuf_watcher_init_debug (old_watcher, caller_file, caller_line, false);
12589  pgbuf_add_watch_instance_internal (holder, page_ptr, new_watcher, latch_mode, true, caller_file, caller_line);
12590 #else
12591  pgbuf_add_watch_instance_internal (holder, page_ptr, new_watcher, latch_mode, true);
12592 #endif
12593 }
12594 
12595 /*
12596  * pgbuf_ordered_set_dirty_and_free () - Mark as modified the buffer associated and unfixes the page
12597  * (previously fixed with ordered fix)
12598  * return: void
12599  * thread_p(in):
12600  * pg_watcher(in): page watcher holding the page to dirty and unfix
12601  */
12602 void
12604 {
12605  pgbuf_set_dirty (thread_p, pg_watcher->pgptr, DONT_FREE);
12606  pgbuf_ordered_unfix (thread_p, pg_watcher);
12607 }
12608 
12609 /*
12610  * pgbuf_get_condition_for_ordered_fix () - returns the condition which should
12611  * be used to latch (vpid_new_page) knowing that we already have a latch on
12612  * (vpid_fixed_page)
12613  *
12614  * return: latch condition (PGBUF_LATCH_CONDITION)
12615  * vpid_new_page(in):
12616  * vpid_fixed_page(in):
12617  * hfid(in): HFID of both pages
12618  *
12619  * Note: This is intended only for HEAP/HEAP_OVERFLOW pages.
12620  * The user should make sure both pages belong to the same heap.
12621  * To be used when pgbuf_ordered_fix is not possible:
12622  * In vacuum context, unfixing a older page to prevent deadlatch,
12623  * requires flushing of the old page first - this is not possible with
12624  * pgbuf_ordered_fix.
12625  */
12626 int
12627 pgbuf_get_condition_for_ordered_fix (const VPID * vpid_new_page, const VPID * vpid_fixed_page, const HFID * hfid)
12628 {
12629  PGBUF_HOLDER_INFO new_page_holder_info;
12630  PGBUF_HOLDER_INFO fixed_page_holder_info;
12631 
12632  new_page_holder_info.group_id.volid = hfid->vfid.volid;
12633  new_page_holder_info.group_id.pageid = hfid->hpgid;
12634  fixed_page_holder_info.group_id.volid = hfid->vfid.volid;
12635  fixed_page_holder_info.group_id.pageid = hfid->hpgid;
12636 
12637  VPID_COPY (&new_page_holder_info.vpid, vpid_new_page);
12638  VPID_COPY (&fixed_page_holder_info.vpid, vpid_fixed_page);
12639 
12640  if (VPID_EQ (&new_page_holder_info.group_id, &new_page_holder_info.vpid))
12641  {
12642  new_page_holder_info.rank = PGBUF_ORDERED_HEAP_HDR;
12643  }
12644  else
12645  {
12646  new_page_holder_info.rank = PGBUF_ORDERED_HEAP_NORMAL;
12647  }
12648 
12649  if (VPID_EQ (&fixed_page_holder_info.group_id, &fixed_page_holder_info.vpid))
12650  {
12651  fixed_page_holder_info.rank = PGBUF_ORDERED_HEAP_HDR;
12652  }
12653  else
12654  {
12655  fixed_page_holder_info.rank = PGBUF_ORDERED_HEAP_NORMAL;
12656  }
12657 
12658  if (pgbuf_compare_hold_vpid_for_sort (&new_page_holder_info, &fixed_page_holder_info) < 0)
12659  {
12660  return PGBUF_CONDITIONAL_LATCH;
12661  }
12662 
12664 }
12665 
12666 #if !defined(NDEBUG)
12667 /*
12668  * pgbuf_watcher_init_debug () -
12669  * return: void
12670  * watcher(in/out):
12671  * add(in): if add or reset the "init" field
12672  */
12673 void
12674 pgbuf_watcher_init_debug (PGBUF_WATCHER * watcher, const char *caller_file, const int caller_line, bool add)
12675 {
12676  char *p;
12677 
12678  p = (char *) caller_file + strlen (caller_file);
12679  while (p)
12680  {
12681  if (p == caller_file)
12682  {
12683  break;
12684  }
12685 
12686  if (*p == '/' || *p == '\\')
12687  {
12688  p++;
12689  break;
12690  }
12691 
12692  p--;
12693  }
12694 
12695  if (add)
12696  {
12697  char prev_init[256];
12698  strncpy (prev_init, watcher->init_at, sizeof (watcher->init_at) - 1);
12699  prev_init[sizeof (prev_init) - 1] = '\0';
12700  snprintf_dots_truncate (watcher->init_at, sizeof (watcher->init_at) - 1, "%s:%d %s", p, caller_line, prev_init);
12701  }
12702  else
12703  {
12704  snprintf (watcher->init_at, sizeof (watcher->init_at) - 1, "%s:%d", p, caller_line);
12705  }
12706 }
12707 
12708 /*
12709  * pgbuf_is_page_fixed_by_thread () -
12710  * return: true if page is already fixed, false otherwise
12711  * thread_p(in): thread entry
12712  * vpid_p(in): virtual page id
12713  */
12714 bool
12716 {
12717  int thrd_index;
12718  PGBUF_HOLDER_ANCHOR *thrd_holder_info;
12719  PGBUF_HOLDER *thrd_holder;
12720  assert (vpid_p != NULL);
12721 
12722  /* walk holders and try to find page */
12723  thrd_index = thread_get_entry_index (thread_p);
12724  thrd_holder_info = &(pgbuf_Pool.thrd_holder_info[thrd_index]);
12725  for (thrd_holder = thrd_holder_info->thrd_hold_list; thrd_holder != NULL; thrd_holder = thrd_holder->next_holder)
12726  {
12727  if (VPID_EQ (&thrd_holder->bufptr->vpid, vpid_p))
12728  {
12729  return true;
12730  }
12731  }
12732  return false;
12733 }
12734 #endif
12735 
12736 /*
12737  * pgbuf_initialize_page_quota_parameters () - Initializes page quota parameters
12738  *
12739  * return: NO_ERROR, or ER_code
12740  *
12741  * Note: Call this before any LRU initialization
12742  */
12743 static int
12745 {
12746  PGBUF_PAGE_QUOTA *quota;
12747 
12748  quota = &(pgbuf_Pool.quota);
12749  memset (quota, 0, sizeof (PGBUF_PAGE_QUOTA));
12750 
12751  tsc_getticks (&quota->last_adjust_time);
12752  quota->adjust_age = 0;
12753  quota->is_adjusting = 0;
12754 
12755 #if defined (SERVER_MODE)
12757  if (quota->num_private_LRU_list == -1)
12758  {
12759  /* set value automatically to maximum number of workers (active and vacuum). */
12761  }
12762  else if (quota->num_private_LRU_list == 0)
12763  {
12764  /* disabled */
12765  }
12766  else
12767  {
12768  /* set number of workers to the number desired by user (or to minimum accepted) */
12770  {
12771  /* set to minimum count */
12773  }
12774  }
12775 #else /* !SERVER_MODE */ /* SA_MODE */
12776  /* stand-alone quota is disabled */
12777  quota->num_private_LRU_list = 0;
12778 #endif /* SA_MODE */
12779 
12780  return NO_ERROR;
12781 }
12782 
12783 /*
12784  * pgbuf_initialize_page_quota () - Initializes page quota
12785  * return: NO_ERROR, or ER_code
12786  */
12787 static int
12789 {
12790  PGBUF_PAGE_QUOTA *quota;
12791  int i;
12792  int error_status = NO_ERROR;
12793 
12794  quota = &(pgbuf_Pool.quota);
12795 
12797  (float *) malloc (PGBUF_TOTAL_LRU_COUNT * sizeof (quota->lru_victim_flush_priority_per_lru[0]));
12799  {
12800  error_status = ER_OUT_OF_VIRTUAL_MEMORY;
12802  1, (PGBUF_TOTAL_LRU_COUNT * sizeof (quota->lru_victim_flush_priority_per_lru[0])));
12803  goto exit;
12804  }
12805 
12806  quota->private_lru_session_cnt =
12807  (int *) malloc (PGBUF_PRIVATE_LRU_COUNT * sizeof (quota->private_lru_session_cnt[0]));
12808  if (quota->private_lru_session_cnt == NULL)
12809  {
12810  error_status = ER_OUT_OF_VIRTUAL_MEMORY;
12812  1, (PGBUF_TOTAL_LRU_COUNT * sizeof (quota->private_lru_session_cnt[0])));
12813  goto exit;
12814  }
12815 
12816  /* initialize the quota data for each LRU */
12817  for (i = 0; i < PGBUF_TOTAL_LRU_COUNT; i++)
12818  {
12820 
12822  {
12824  }
12825  }
12826 
12828  {
12829  quota->private_pages_ratio = 1.0f;
12830  }
12831  else
12832  {
12833  quota->private_pages_ratio = 0;
12834  }
12835 
12836  quota->add_shared_lru_idx = 0;
12837  quota->avoid_shared_lru_idx = -1;
12838 
12839 exit:
12840  return error_status;
12841 }
12842 
12843 /*
12844  * pgbuf_initialize_page_monitor () - Initializes page monitor
12845  * return: NO_ERROR, or ER_code
12846  */
12847 static int
12849 {
12850  PGBUF_PAGE_MONITOR *monitor;
12851  int i;
12852  int error_status = NO_ERROR;
12853 #if defined (SERVER_MODE)
12854  size_t count_threads = thread_num_total_threads ();
12855 #endif /* SERVER_MODE */
12856 
12857  monitor = &(pgbuf_Pool.monitor);
12858 
12859  memset (monitor, 0, sizeof (PGBUF_PAGE_MONITOR));
12860 
12861  monitor->lru_hits = (int *) malloc (PGBUF_TOTAL_LRU_COUNT * sizeof (monitor->lru_hits[0]));
12862  if (monitor->lru_hits == NULL)
12863  {
12864  error_status = ER_OUT_OF_VIRTUAL_MEMORY;
12866  1, (PGBUF_TOTAL_LRU_COUNT * sizeof (monitor->lru_hits[0])));
12867  goto exit;
12868  }
12869 
12870  monitor->lru_activity = (int *) malloc (PGBUF_TOTAL_LRU_COUNT * sizeof (monitor->lru_activity[0]));
12871  if (monitor->lru_activity == NULL)
12872  {
12873  error_status = ER_OUT_OF_VIRTUAL_MEMORY;
12875  1, (PGBUF_TOTAL_LRU_COUNT * sizeof (monitor->lru_activity[0])));
12876  goto exit;
12877  }
12878 
12879  /* initialize the monitor data for each LRU */
12880  for (i = 0; i < PGBUF_TOTAL_LRU_COUNT; i++)
12881  {
12882  monitor->lru_hits[i] = 0;
12883  monitor->lru_activity[i] = 0;
12884  }
12885 
12886  monitor->lru_victim_req_cnt = 0;
12887  monitor->fix_req_cnt = 0;
12888  monitor->pg_unfix_cnt = 0;
12889  monitor->lru_shared_pgs_cnt = 0;
12890 
12891 #if defined (SERVER_MODE)
12892  if (pgbuf_Monitor_locks)
12893  {
12894  monitor->bcb_locks = (PGBUF_MONITOR_BCB_MUTEX *) calloc (count_threads, sizeof (PGBUF_MONITOR_BCB_MUTEX));
12895  if (monitor->bcb_locks == NULL)
12896  {
12897  error_status = ER_OUT_OF_VIRTUAL_MEMORY;
12899  count_threads * sizeof (PGBUF_MONITOR_BCB_MUTEX));
12900  goto exit;
12901  }
12902  }
12903 #endif /* SERVER_MDOE */
12904 
12905  /* no bcb's, no victims */
12906  monitor->victim_rich = false;
12907 
12908 exit:
12909  return error_status;
12910 }
12911 
12912 /*
12913  * pgbuf_compute_lru_vict_target () -
12914  *
12915  * lru_sum_flush_priority(out) : sum of all flush priorities of all LRUs
12916  * return : void
12917  */
12918 static void
12919 pgbuf_compute_lru_vict_target (float *lru_sum_flush_priority)
12920 {
12921  int i;
12922 
12923  float prv_quota;
12924  float prv_real_ratio;
12925  float diff;
12926  float prv_flush_ratio;
12927  float shared_flush_ratio;
12928 
12929  bool use_prv_size = false;
12930 
12931  int total_prv_target = 0;
12932  int this_prv_target = 0;
12933 
12934  PGBUF_LRU_LIST *lru_list;
12935 
12936  assert (lru_sum_flush_priority != NULL);
12937 
12938  *lru_sum_flush_priority = 0;
12939 
12940  prv_quota = pgbuf_Pool.quota.private_pages_ratio;
12941  assert (pgbuf_Pool.monitor.lru_shared_pgs_cnt >= 0
12942  && pgbuf_Pool.monitor.lru_shared_pgs_cnt <= pgbuf_Pool.num_buffers);
12943 
12944  prv_real_ratio = 1.0f - ((float) pgbuf_Pool.monitor.lru_shared_pgs_cnt / pgbuf_Pool.num_buffers);
12945  diff = prv_quota - prv_real_ratio;
12946 
12947  prv_flush_ratio = prv_real_ratio * (1.0f - diff);
12948  prv_flush_ratio = MIN (1.0f, prv_flush_ratio);
12949 
12950  for (i = PGBUF_LRU_INDEX_FROM_PRIVATE (0); i < PGBUF_TOTAL_LRU_COUNT; i++)
12951  {
12952  lru_list = PGBUF_GET_LRU_LIST (i);
12953 
12954  /* note: we target especially over quota private lists or close to quota. we cannot target only over quota lists
12955  * (I tried), because you may find yourself in the peculiar case where quota's are on par with list size, while
12956  * shared are right below minimum desired size... and flush will not find anything.
12957  */
12958  this_prv_target = PGBUF_LRU_LIST_COUNT (lru_list) - (int) (lru_list->quota * 0.9);
12959  this_prv_target = MIN (this_prv_target, lru_list->count_lru3);
12960  if (this_prv_target > 0)
12961  {
12962  total_prv_target += this_prv_target;
12963  }
12964  }
12965  if (total_prv_target == 0)
12966  {
12967  /* can we victimize from shared? */
12968  if (pgbuf_Pool.monitor.lru_shared_pgs_cnt
12969  <= (int) (pgbuf_Pool.num_LRU_list * PGBUF_MIN_SHARED_LIST_ADJUST_SIZE
12970  * (pgbuf_Pool.ratio_lru1 + pgbuf_Pool.ratio_lru2)))
12971  {
12972  /* we won't be able to victimize from shared. this is a backup hack, I don't like to rely on it. let's
12973  * find smarter ways to avoid the case. */
12974  /* right now, considering we target all bcb's beyond 90% of quota, but total_prv_target is 0, that means all
12975  * private bcb's must be less than 90% of buffer. that means shared bcb's have to be 10% or more of buffer.
12976  * PGBUF_MIN_SHARED_LIST_ADJUST_SIZE is currently set to 50, which is 5% to targeted 1k shared list size.
12977  * we shouldn't be here unless I messed up the calculus. */
12978  if (pgbuf_Pool.buf_invalid_list.invalid_cnt > 0)
12979  {
12980  /* This is not really an interesting case.
12981  * Probably both shared and private are small and most of buffers in invalid list.
12982  * We don't really need flush for the case, since BCB could be allocated from invalid list.
12983  */
12984  return;
12985  }
12986 
12987  assert (false);
12988  use_prv_size = true;
12989  prv_flush_ratio = 1.0f;
12990  /* we can compute the zone 3 total size (for privates, zones 1 & 2 are both set to minimum ratio). */
12991  total_prv_target =
12992  (int) ((pgbuf_Pool.num_buffers - pgbuf_Pool.monitor.lru_shared_pgs_cnt)
12993  * (1.0f - 2 * PGBUF_LRU_ZONE_MIN_RATIO));
12994  }
12995  }
12996  shared_flush_ratio = 1.0f - prv_flush_ratio;
12997 
12998  for (i = 0; i < PGBUF_TOTAL_LRU_COUNT; i++)
12999  {
13000  lru_list = PGBUF_GET_LRU_LIST (i);
13001  if (PGBUF_IS_SHARED_LRU_INDEX (i))
13002  {
13003  pgbuf_Pool.quota.lru_victim_flush_priority_per_lru[i] = shared_flush_ratio / (float) PGBUF_SHARED_LRU_COUNT;
13004  }
13005  else if (PGBUF_IS_PRIVATE_LRU_INDEX (i))
13006  {
13007  if (prv_flush_ratio == 0.0f)
13008  {
13009  pgbuf_Pool.quota.lru_victim_flush_priority_per_lru[i] = 0.0f;
13010  }
13011  else
13012  {
13013  if (use_prv_size)
13014  {
13015  /* back plan: use zone 3 size instead of computed target based on quota. */
13016  this_prv_target = lru_list->count_lru3;
13017  }
13018  else
13019  {
13020  /* use bcb's over 90% of quota as flush target */
13021  this_prv_target = PGBUF_LRU_LIST_COUNT (lru_list) - (int) (lru_list->quota * 0.9);
13022  this_prv_target = MIN (this_prv_target, lru_list->count_lru3);
13023  }
13024  if (this_prv_target > 0)
13025  {
13027  prv_flush_ratio * ((float) this_prv_target / (float) total_prv_target);
13028  }
13029  else
13030  {
13031  pgbuf_Pool.quota.lru_victim_flush_priority_per_lru[i] = 0.0f;
13032  }
13033  }
13034  }
13035  else
13036  {
13037  pgbuf_Pool.quota.lru_victim_flush_priority_per_lru[i] = 0.0f;
13038  }
13039  *lru_sum_flush_priority += pgbuf_Pool.quota.lru_victim_flush_priority_per_lru[i];
13040  }
13041 }
13042 
13043 /*
13044  * pgbuf_adjust_quotas () - Adjusts the quotas for private LRU's. The quota's are decided based on thread activities on
13045  * private and shared lists. Activity is counted as number of accessed pages.
13046  * Based on quota's, the thread also sets zone thresholds for each LRU.
13047  *
13048  * return : void
13049  * thread_p (in) : thread entry
13050  */
13051 void
13053 {
13054 #define MAX_PRIVATE_RATIO 0.998f
13055 #define MIN_PRIVATE_RATIO 0.01f
13056 
13057  PGBUF_PAGE_QUOTA *quota;
13058  PGBUF_PAGE_MONITOR *monitor;
13059  int i;
13060  int all_private_quota;
13061  int sum_private_lru_activity_total = 0;
13062  TSC_TICKS curr_tick;
13063  INT64 diff_usec;
13064  int lru_hits;
13065  int lru_shared_hits = 0;
13066  int lru_private_hits = 0;
13067  float private_ratio;
13068  int avg_shared_lru_size;
13069  int shared_threshold_lru1;
13070  int shared_threshold_lru2;
13071  int new_quota;
13072  float new_lru_ratio;
13073  const INT64 onesec_usec = 1000000LL;
13074  const INT64 tensec_usec = 10 * onesec_usec;
13075  int total_victims = 0;
13076  bool low_overall_activity = false;
13077 
13078  PGBUF_LRU_LIST *lru_list;
13079 
13080  if (thread_p == NULL)
13081  {
13082  assert (thread_p != NULL);
13083  thread_p = thread_get_thread_entry_info ();
13084  }
13085 
13086  quota = &(pgbuf_Pool.quota);
13087  monitor = &(pgbuf_Pool.monitor);
13088 
13090  {
13091  return;
13092  }
13093 
13094  quota->is_adjusting = 1;
13095 
13096  tsc_getticks (&curr_tick);
13097  diff_usec = tsc_elapsed_utime (curr_tick, quota->last_adjust_time);
13098  if (diff_usec < 1000LL)
13099  {
13100  /* less than 1 msec. stop */
13101  quota->is_adjusting = 0;
13102  return;
13103  }
13104 
13105  /* quota adjust if :
13106  * - or more than 500 msec since last adjustment and activity is more than threshold
13107  * - or more than 5 min since last adjustment and activity is more 1% of threshold
13108  * Activity of page buffer is measured in number of page unfixes
13109  */
13110  if (pgbuf_Pool.monitor.pg_unfix_cnt < PGBUF_TRAN_THRESHOLD_ACTIVITY && diff_usec < 500000LL)
13111  {
13112  quota->is_adjusting = 0;
13113  return;
13114  }
13115  if (ATOMIC_TAS_32 (&monitor->pg_unfix_cnt, 0) < PGBUF_TRAN_THRESHOLD_ACTIVITY / 100)
13116  {
13117  low_overall_activity = true;
13118  }
13119 
13120  quota->last_adjust_time = curr_tick;
13121 
13122  (void) ATOMIC_INC_32 (&quota->adjust_age, 1);
13123 
13124  /* process hits since last adjust:
13125  * 1. collect lru_private_hits and lru_shared_hits.
13126  * 2. update each private list activity.
13127  * 3. collect total activity.
13128  */
13129  for (i = 0; i < PGBUF_TOTAL_LRU_COUNT; i++)
13130  {
13131  /* get hits since last adjust and reset */
13132  lru_hits = ATOMIC_TAS_32 (&monitor->lru_hits[i], 0);
13133  /* compute hits per second */
13134  lru_hits = (int) (onesec_usec * lru_hits / diff_usec);
13135 
13137  {
13138  /* adjust private lru activity. for convenience reasons, we consider that previous lru_activity value was same
13139  * for 10 seconds minus the time since last adjustment. if previous adjustment is more than 10 seconds old
13140  * then we set new activity. */
13141  if (diff_usec >= tensec_usec)
13142  {
13143  /* set current activity */
13144  monitor->lru_activity[i] = lru_hits;
13145  }
13146  else
13147  {
13148  /* interpolate old activity with new activity */
13149  monitor->lru_activity[i] =
13150  (int) (((tensec_usec - diff_usec) * monitor->lru_activity[i] + diff_usec * lru_hits) / tensec_usec);
13151  }
13152  /* collect to total activity */
13153  sum_private_lru_activity_total += monitor->lru_activity[i];
13154 
13155  /* collect to total private hits */
13156  lru_private_hits += lru_hits;
13157  }
13158  else
13159  {
13160  /* collect to total shared hits */
13161  lru_shared_hits += lru_hits;
13162  }
13163 
13164  lru_list = PGBUF_GET_LRU_LIST (i);
13165  total_victims += lru_list->count_vict_cand;
13166  }
13167 
13168  /* compute private ratio */
13169  if (low_overall_activity)
13170  {
13171  private_ratio = MIN_PRIVATE_RATIO;
13172  }
13173  else
13174  {
13175  /* avoid division by 0 */
13176  lru_shared_hits = MAX (1, lru_shared_hits);
13177  private_ratio = (float) (lru_private_hits) / (float) (lru_private_hits + lru_shared_hits);
13178  private_ratio = MIN (MAX_PRIVATE_RATIO, private_ratio);
13179  private_ratio = MAX (MIN_PRIVATE_RATIO, private_ratio);
13180  }
13181  if (diff_usec >= tensec_usec)
13182  {
13183  quota->private_pages_ratio = private_ratio;
13184  }
13185  else
13186  {
13187  quota->private_pages_ratio =
13188  ((quota->private_pages_ratio * (float) (tensec_usec - diff_usec) + private_ratio * (float) diff_usec)
13189  / (float) tensec_usec);
13190  }
13191 
13192  if (sum_private_lru_activity_total == 0)
13193  {
13194  /* no private activity */
13195  /* well I guess we can just set all quota's to 0. */
13196  all_private_quota = 0;
13197  for (i = PGBUF_SHARED_LRU_COUNT; i < PGBUF_TOTAL_LRU_COUNT; i++)
13198  {
13199  lru_list = PGBUF_GET_LRU_LIST (i);
13200 
13201  lru_list->quota = 0;
13202  lru_list->threshold_lru1 = 0;
13203  lru_list->threshold_lru2 = 0;
13204  if (lru_list->count_lru1 + lru_list->count_lru2 > 0)
13205  {
13206  pthread_mutex_lock (&lru_list->mutex);
13207  pgbuf_lru_adjust_zones (thread_p, lru_list, false);
13208  pthread_mutex_unlock (&lru_list->mutex);
13210  }
13211  if (lru_list->count_vict_cand > 0 && PGBUF_LRU_LIST_IS_OVER_QUOTA (lru_list))
13212  {
13213  /* make sure this is added to victim list */
13214  if (pgbuf_lfcq_add_lru_with_victims (lru_list)
13216  {
13217  /* added to queue of lru lists having victims. */
13218  }
13219  }
13220  }
13221  }
13222  else
13223  {
13224  /* compute all_private_quota in number of bcb's */
13225  all_private_quota =
13226  (int) ((pgbuf_Pool.num_buffers - pgbuf_Pool.buf_invalid_list.invalid_cnt) * quota->private_pages_ratio);
13227 
13228  /* split private bcb's quota's based on activity */
13229  for (i = PGBUF_SHARED_LRU_COUNT; i < PGBUF_TOTAL_LRU_COUNT; i++)
13230  {
13231  if (monitor->lru_activity[i] > 0)
13232  {
13233  new_lru_ratio = (float) monitor->lru_activity[i] / (float) sum_private_lru_activity_total;
13234  }
13235  else
13236  {
13237  new_lru_ratio = 0.0f;
13238  }
13239 
13240  new_quota = (int) (new_lru_ratio * all_private_quota);
13241  new_quota = MIN (new_quota, PGBUF_PRIVATE_LRU_MAX_HARD_QUOTA);
13242  new_quota = MIN (new_quota, pgbuf_Pool.num_buffers / 2);
13243 
13244  lru_list = PGBUF_GET_LRU_LIST (i);
13245  lru_list->quota = new_quota;
13246  lru_list->threshold_lru1 = (int) (new_quota * PGBUF_LRU_ZONE_MIN_RATIO);
13247  lru_list->threshold_lru2 = (int) (new_quota * PGBUF_LRU_ZONE_MIN_RATIO);
13248 
13249  if (PGBUF_LRU_LIST_IS_ONE_TWO_OVER_QUOTA (lru_list))
13250  {
13251  pthread_mutex_lock (&lru_list->mutex);
13252  pgbuf_lru_adjust_zones (thread_p, lru_list, false);
13253  pthread_mutex_unlock (&lru_list->mutex);
13254 
13256  }
13257  if (lru_list->count_vict_cand > 0 && PGBUF_LRU_LIST_IS_OVER_QUOTA (lru_list))
13258  {
13259  /* make sure this is added to victim list */
13260  if (pgbuf_lfcq_add_lru_with_victims (lru_list)
13262  {
13263  /* added to queue of lru lists having victims. */
13264  }
13265  }
13266  }
13267  }
13268 
13269  /* set shared target size */
13270  avg_shared_lru_size = (pgbuf_Pool.num_buffers - all_private_quota) / pgbuf_Pool.num_LRU_list;
13271  avg_shared_lru_size = MAX (avg_shared_lru_size, PGBUF_MIN_SHARED_LIST_ADJUST_SIZE);
13272  shared_threshold_lru1 = (int) (avg_shared_lru_size * pgbuf_Pool.ratio_lru1);
13273  shared_threshold_lru2 = (int) (avg_shared_lru_size * pgbuf_Pool.ratio_lru2);
13274  for (i = 0; i < PGBUF_SHARED_LRU_COUNT; i++)
13275  {
13276  lru_list = PGBUF_GET_LRU_LIST (i);
13277  lru_list->threshold_lru1 = shared_threshold_lru1;
13278  lru_list->threshold_lru2 = shared_threshold_lru2;
13279 
13281  {
13282  pthread_mutex_lock (&lru_list->mutex);
13283  pgbuf_lru_adjust_zones (thread_p, lru_list, false);
13284  pthread_mutex_unlock (&lru_list->mutex);
13285  }
13286 
13287  if (lru_list->count_vict_cand > 0)
13288  {
13289  /* make sure this is added to victim list */
13290  if (pgbuf_lfcq_add_lru_with_victims (lru_list)
13292  {
13293  /* added to queue of lru lists having victims. */
13294  }
13295  }
13296  }
13297 
13298  /* is pool victim rich? we consider this true if the victim count is more than 10% of page buffer. I think we could
13299  * lower the bar a little bit */
13300  pgbuf_Pool.monitor.victim_rich = total_victims >= (int) (0.1 * pgbuf_Pool.num_buffers);
13301 
13302  quota->is_adjusting = 0;
13303 }
13304 
13305 /*
13306  * pgbuf_assign_private_lru_id () -
13307  * return: NO_ERROR
13308  * is_vacuum(in): true if client is a vacuum thread
13309  * id(in): id of client (vacuum index or session id)
13310  */
13311 int
13312 pgbuf_assign_private_lru (THREAD_ENTRY * thread_p, bool is_vacuum, const int id)
13313 {
13314  int i;
13315  int min_activitity;
13316  int min_bcbs;
13317  int lru_cand_idx, lru_cand_zero_sessions;
13318  int private_idx;
13319  int cnt_lru;
13320  PGBUF_PAGE_MONITOR *monitor;
13321  PGBUF_PAGE_QUOTA *quota;
13322  int retry_cnt = 0;
13323 
13325  {
13326  return -1;
13327  }
13328 
13329  monitor = &pgbuf_Pool.monitor;
13330  quota = &pgbuf_Pool.quota;
13331 
13332  /* Priority for choosing a private list :
13333  * 1. the list with zero sessions having the least number of pages
13334  * 2. the list having least activity */
13335 
13336 retry:
13337  lru_cand_zero_sessions = -1;
13338  lru_cand_idx = -1;
13339  min_bcbs = pgbuf_Pool.num_buffers;
13340  min_activitity = PGBUF_TRAN_MAX_ACTIVITY;
13341  for (i = PGBUF_SHARED_LRU_COUNT; i < PGBUF_TOTAL_LRU_COUNT; i++)
13342  {
13344  {
13345  cnt_lru = PGBUF_LRU_LIST_COUNT (PGBUF_GET_LRU_LIST (i));
13346  if (cnt_lru < min_bcbs)
13347  {
13348  min_bcbs = cnt_lru;
13349  lru_cand_zero_sessions = i;
13350 
13351  if (min_bcbs <= 0)
13352  {
13353  break;
13354  }
13355  }
13356  }
13357  if (monitor->lru_activity[i] < min_activitity)
13358  {
13359  min_activitity = monitor->lru_activity[i];
13360  lru_cand_idx = i;
13361  }
13362  }
13363 
13364  if (lru_cand_zero_sessions != -1)
13365  {
13366  lru_cand_idx = lru_cand_zero_sessions;
13367  }
13368 
13369  assert (lru_cand_idx != -1);
13370 
13371  cnt_lru = PGBUF_LRU_LIST_COUNT (PGBUF_GET_LRU_LIST (lru_cand_idx));
13372 
13373  private_idx = PGBUF_PRIVATE_LIST_FROM_LRU_INDEX (lru_cand_idx);
13374 
13375  if (lru_cand_zero_sessions != -1)
13376  {
13377  if (ATOMIC_INC_32 (&quota->private_lru_session_cnt[private_idx], 1) > 1)
13378  {
13379  /* another thread stole this lru, retry */
13380  if (retry_cnt++ < 5)
13381  {
13382  ATOMIC_INC_32 (&quota->private_lru_session_cnt[private_idx], -1);
13383  goto retry;
13384  }
13385  }
13386  }
13387  else
13388  {
13389  ATOMIC_INC_32 (&quota->private_lru_session_cnt[private_idx], 1);
13390  }
13391 
13392  /* TODO: is this necessary? */
13393  pgbuf_adjust_quotas (thread_p);
13394 
13395  return private_idx;
13396 }
13397 
13398 /*
13399  * pgbuf_release_private_lru () -
13400  * return: NO_ERROR
13401  * bufptr(in): pointer to buffer page
13402  *
13403  * Note: This function puts BCB to the bottom of the LRU list.
13404  */
13405 int
13406 pgbuf_release_private_lru (THREAD_ENTRY * thread_p, const int private_idx)
13407 {
13408  if (PGBUF_PAGE_QUOTA_IS_ENABLED && private_idx >= 0 && private_idx < PGBUF_PRIVATE_LRU_COUNT
13409  && pgbuf_Pool.num_buffers > 0)
13410  {
13411  if (ATOMIC_INC_32 (&pgbuf_Pool.quota.private_lru_session_cnt[private_idx], -1) <= 0)
13412  {
13413  ATOMIC_TAS_32 (&pgbuf_Pool.monitor.lru_activity[PGBUF_LRU_INDEX_FROM_PRIVATE (private_idx)], 0);
13414  /* TODO: is this necessary? */
13415  pgbuf_adjust_quotas (thread_p);
13416  }
13417  }
13418  return NO_ERROR;
13419 }
13420 
13421 /*
13422  * pgbuf_initialize_seq_flusher () - Initializes sequential flusher on a list of pages to be flushed
13423  *
13424  * return: error code
13425  * seq_flusher(in/out):
13426  * f_list(in/out): flush list to use or NULL if needs to be allocated
13427  * cnt(in/out): size of flush list
13428  */
13429 static int
13431 {
13432  int alloc_size;
13433 
13434  memset (seq_flusher, 0, sizeof (*seq_flusher));
13435  seq_flusher->flush_max_size = cnt;
13436 
13437  if (f_list != NULL)
13438  {
13439  seq_flusher->flush_list = f_list;
13440  }
13441  else
13442  {
13443  alloc_size = seq_flusher->flush_max_size * sizeof (seq_flusher->flush_list[0]);
13444  seq_flusher->flush_list = (PGBUF_VICTIM_CANDIDATE_LIST *) malloc (alloc_size);
13445  if (seq_flusher->flush_list == NULL)
13446  {
13448  return ER_OUT_OF_VIRTUAL_MEMORY;
13449  }
13450  }
13451  seq_flusher->flush_cnt = 0;
13452  seq_flusher->flush_idx = 0;
13453  seq_flusher->burst_mode = true;
13454 
13455  seq_flusher->control_intervals_cnt = 0;
13456  seq_flusher->control_flushed = 0;
13457 
13458  return NO_ERROR;
13459 }
13460 
13461 /*
13462  * pgbuf_has_any_waiters () - Quick check if page has any waiters.
13463  *
13464  * return : True if page has any waiters, false otherwise.
13465  * pgptr (in) : Page pointer.
13466  */
13467 bool
13469 {
13470 #if defined (SERVER_MODE)
13471  PGBUF_BCB *bufptr = NULL;
13472  bool has_waiter;
13473 
13474  /* note: we rule out flush waiters here */
13475 
13476  assert (pgptr != NULL);
13477  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
13478 
13479  PGBUF_BCB_LOCK (bufptr);
13480  has_waiter = pgbuf_is_exist_blocked_reader_writer (bufptr);
13481  PGBUF_BCB_UNLOCK (bufptr);
13482  return has_waiter;
13483 #else
13484  return false;
13485 #endif
13486 }
13487 
13488 /*
13489  * pgbuf_has_any_non_vacuum_waiters () - Check if page has any non-vacuum waiters.
13490  *
13491  * return : True if page has waiters, false otherwise.
13492  * pgptr (in) : Page pointer.
13493  */
13494 bool
13496 {
13497 #if defined (SERVER_MODE)
13498  PGBUF_BCB *bufptr = NULL;
13499  THREAD_ENTRY *thread_entry_p;
13500 
13501  assert (pgptr != NULL);
13502  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
13503 
13504  thread_entry_p = bufptr->next_wait_thrd;
13505  while (thread_entry_p != NULL)
13506  {
13507  if (thread_entry_p->type != TT_VACUUM_WORKER)
13508  {
13509  return true;
13510  }
13511  thread_entry_p = thread_entry_p->next_wait_thrd;
13512  }
13513 
13514  return false;
13515 #else
13516  return false;
13517 #endif
13518 }
13519 
13520 /*
13521  * pgbuf_has_prevent_dealloc () - Quick check if page has any scanners.
13522  *
13523  * return : True if page has any waiters, false otherwise.
13524  * pgptr (in) : Page pointer.
13525  */
13526 bool
13528 {
13529 #if defined (SERVER_MODE)
13530  PGBUF_BCB *bufptr = NULL;
13531 
13532  assert (pgptr != NULL);
13533  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
13534 
13535  return pgbuf_bcb_should_avoid_deallocation (bufptr);
13536 #else
13537  return false;
13538 #endif
13539 }
13540 
13541 void
13542 pgbuf_peek_stats (UINT64 * fixed_cnt, UINT64 * dirty_cnt, UINT64 * lru1_cnt, UINT64 * lru2_cnt, UINT64 * lru3_cnt,
13543  UINT64 * victim_candidates, UINT64 * avoid_dealloc_cnt, UINT64 * avoid_victim_cnt,
13544  UINT64 * private_quota, UINT64 * private_cnt, UINT64 * alloc_bcb_waiter_high,
13545  UINT64 * alloc_bcb_waiter_med, UINT64 * flushed_bcbs_waiting_direct_assign,
13546  UINT64 * lfcq_big_prv_num, UINT64 * lfcq_prv_num, UINT64 * lfcq_shr_num)
13547 {
13548  PGBUF_BCB *bufptr;
13549  int i;
13550  int bcb_flags;
13551  PGBUF_ZONE zone;
13552 
13553  *fixed_cnt = 0;
13554  *dirty_cnt = 0;
13555  *lru1_cnt = 0;
13556  *lru2_cnt = 0;
13557  *lru3_cnt = 0;
13558  *avoid_dealloc_cnt = 0;
13559  *avoid_victim_cnt = 0;
13560  *private_cnt = 0;
13561  *victim_candidates = 0;
13562 
13563  for (i = 0; i < pgbuf_Pool.num_buffers; i++)
13564  {
13565  bufptr = PGBUF_FIND_BCB_PTR (i);
13566  if (bufptr->fcnt > 0)
13567  {
13568  *fixed_cnt = *fixed_cnt + 1;
13569  }
13570 
13571  /* copy flags. we do not lock the bcb and we can be affected by concurrent changes. */
13572  bcb_flags = bufptr->flags;
13573  if (bcb_flags & PGBUF_BCB_DIRTY_FLAG)
13574  {
13575  *dirty_cnt = *dirty_cnt + 1;
13576  }
13577 
13578  zone = PGBUF_GET_ZONE (bcb_flags);
13579  if (zone == PGBUF_LRU_1_ZONE)
13580  {
13581  *lru1_cnt = *lru1_cnt + 1;
13582  }
13583  else if (zone == PGBUF_LRU_2_ZONE)
13584  {
13585  *lru2_cnt = *lru2_cnt + 1;
13586  }
13587  else if (zone == PGBUF_LRU_3_ZONE)
13588  {
13589  *lru3_cnt = *lru3_cnt + 1;
13590  }
13591 
13593  {
13594  *avoid_dealloc_cnt = *avoid_dealloc_cnt + 1;
13595  }
13596 
13597  if (bcb_flags & PGBUF_BCB_FLUSHING_TO_DISK_FLAG)
13598  {
13599  *avoid_victim_cnt = *avoid_victim_cnt + 1;
13600  }
13601 
13602  if (zone & PGBUF_LRU_ZONE_MASK)
13603  {
13605  {
13606  *private_cnt = *private_cnt + 1;
13607  }
13608  }
13609  }
13610  for (i = 0; i < PGBUF_TOTAL_LRU_COUNT; i++)
13611  {
13612  *victim_candidates = *victim_candidates + pgbuf_Pool.buf_LRU_list[i].count_vict_cand;
13613  }
13614 
13615  *private_quota = (UINT64) (pgbuf_Pool.quota.private_pages_ratio * pgbuf_Pool.num_buffers);
13616 
13617 #if defined (SERVER_MODE)
13618  *alloc_bcb_waiter_high = pgbuf_Pool.direct_victims.waiter_threads_high_priority->size ();
13619  *alloc_bcb_waiter_med = pgbuf_Pool.direct_victims.waiter_threads_low_priority->size ();
13620  *flushed_bcbs_waiting_direct_assign = pgbuf_Pool.flushed_bcbs->size ();
13621 #else /* !SERVER_MODE */
13622  *alloc_bcb_waiter_high = 0;
13623  *alloc_bcb_waiter_med = 0;
13624  *flushed_bcbs_waiting_direct_assign = 0;
13625 #endif /* !SERVER_MODE */
13626 
13627  if (pgbuf_Pool.big_private_lrus_with_victims != NULL)
13628  {
13629  *lfcq_big_prv_num = pgbuf_Pool.big_private_lrus_with_victims->size ();
13630  }
13631 
13632  if (pgbuf_Pool.private_lrus_with_victims != NULL)
13633  {
13634  *lfcq_prv_num = pgbuf_Pool.private_lrus_with_victims->size ();
13635  }
13636 
13637  *lfcq_shr_num = pgbuf_Pool.shared_lrus_with_victims->size ();
13638 }
13639 
13640 /*
13641  * pgbuf_flush_control_from_dirty_ratio () - Try to control adaptive flush aggressiveness based on the
13642  * page buffer "dirtiness".
13643  *
13644  * return : Suggested number to increase flush rate.
13645  */
13646 int
13648 {
13649  static int prev_dirties_cnt = 0;
13650  int crt_dirties_cnt = (int) pgbuf_Pool.monitor.dirties_cnt;
13651  int desired_dirty_cnt = pgbuf_Pool.num_buffers / 2;
13652  int adapt_flush_rate = 0;
13653 
13654  /* If the dirty ratio is now above the desired level, try to suggest a more aggressive flush to bring it back. */
13655  if (crt_dirties_cnt > desired_dirty_cnt)
13656  {
13657  /* Try to get dirties count back to dirty desired ratio. */
13658  /* Accelerate the rate when dirties count is higher. */
13659  int dirties_above_desired_cnt = crt_dirties_cnt - desired_dirty_cnt;
13660  int total_above_desired_cnt = pgbuf_Pool.num_buffers - desired_dirty_cnt;
13661 
13662  adapt_flush_rate = dirties_above_desired_cnt * dirties_above_desired_cnt / total_above_desired_cnt;
13663  }
13664 
13665  /* Now consider dirty growth rate. Even if page buffer dirty ratio is not yet reached, try to avoid a sharp growth.
13666  * Flush may be not be aggressive enough and may require time to get there. In the meantime, the dirty ratio could go
13667  * well beyond the desired ratio. */
13668  if (crt_dirties_cnt > prev_dirties_cnt)
13669  {
13670  int diff = crt_dirties_cnt - prev_dirties_cnt;
13671 
13672  /* Set a weight on the difference based on the dirty rate of buffer. */
13673  adapt_flush_rate += diff * crt_dirties_cnt / pgbuf_Pool.num_buffers;
13674 
13675  prev_dirties_cnt = crt_dirties_cnt;
13676  }
13677 
13678  return adapt_flush_rate;
13679 }
13680 
13681 /*
13682  * pgbuf_rv_flush_page () - Flush page during recovery. Some changes must be flushed immediately to provide
13683  * consistency, in case server crashes again during recovery.
13684  *
13685  * return : Error code.
13686  * thread_p (in) : Thread entry.
13687  * rcv (in) : Recovery data (VPID of page to flush).
13688  */
13689 int
13691 {
13692  PAGE_PTR page_to_flush = NULL;
13693  VPID vpid_to_flush = VPID_INITIALIZER;
13695 
13696  assert (rcv->pgptr == NULL);
13697  assert (rcv->length == sizeof (VPID));
13698 
13699  VPID_COPY (&vpid_to_flush, (VPID *) rcv->data);
13700  page_to_flush =
13702  if (page_to_flush == NULL)
13703  {
13704  /* Page no longer exist. */
13705  er_clear ();
13706  return NO_ERROR;
13707  }
13708  /* Flush page and unfix. */
13709  /* add a log or else the end of logical system operation will complain */
13710  log_append_empty_record (thread_p, LOG_DUMMY_GENERIC, &addr);
13711  pgbuf_set_dirty (thread_p, page_to_flush, DONT_FREE);
13712  pgbuf_flush (thread_p, page_to_flush, DONT_FREE);
13713  pgbuf_unfix (thread_p, page_to_flush);
13714 
13715  return NO_ERROR;
13716 }
13717 
13718 /*
13719  * pgbuf_rv_flush_page_dump () - Dump data for recovery page flush.
13720  *
13721  * return : Void.
13722  * fp (in) : Output target.
13723  * length (in) : Length of recovery data.
13724  * data (in) : Recovery data (VPID of page to flush).
13725  */
13726 void
13727 pgbuf_rv_flush_page_dump (FILE * fp, int length, void *data)
13728 {
13729  VPID vpid_to_flush = VPID_INITIALIZER;
13730 
13731  assert (length == sizeof (VPID));
13732 
13733  VPID_COPY (&vpid_to_flush, (VPID *) data);
13734  fprintf (fp, "Page to flush: %d|%d. \n", vpid_to_flush.volid, vpid_to_flush.pageid);
13735 }
13736 
13737 /*
13738  * pgbuf_latch_mode_str () - print latch_mode
13739  *
13740  * return : const char *
13741  * latch_mode (in) :
13742  */
13743 static const char *
13745 {
13746  const char *latch_mode_str;
13747 
13748  switch (latch_mode)
13749  {
13750  case PGBUF_NO_LATCH:
13751  latch_mode_str = "No Latch";
13752  break;
13753  case PGBUF_LATCH_READ:
13754  latch_mode_str = "Read";
13755  break;
13756  case PGBUF_LATCH_WRITE:
13757  latch_mode_str = "Write";
13758  break;
13759  case PGBUF_LATCH_FLUSH:
13760  latch_mode_str = "Flush";
13761  break;
13762  default:
13763  latch_mode_str = "Fault";
13764  break;
13765  }
13766 
13767  return latch_mode_str;
13768 }
13769 
13770 /*
13771  * pgbuf_zone_str () - print zone info
13772  *
13773  * return : const char *
13774  * zone (in) :
13775  */
13776 static const char *
13778 {
13779  const char *zone_str;
13780 
13781  switch (zone)
13782  {
13783  case PGBUF_LRU_1_ZONE:
13784  zone_str = "LRU_1_Zone";
13785  break;
13786  case PGBUF_LRU_2_ZONE:
13787  zone_str = "LRU_2_Zone";
13788  break;
13789  case PGBUF_LRU_3_ZONE:
13790  zone_str = "LRU_3_Zone";
13791  break;
13792  case PGBUF_INVALID_ZONE:
13793  zone_str = "INVALID_Zone";
13794  break;
13795  default:
13796  zone_str = "VOID_Zone";
13797  break;
13798  }
13799 
13800  return zone_str;
13801 }
13802 
13803 /*
13804  * pgbuf_consistent_str () - print consistent info
13805  *
13806  * return : const char *
13807  * consistent (in) :
13808  */
13809 static const char *
13810 pgbuf_consistent_str (int consistent)
13811 {
13812  const char *consistent_str;
13813 
13814  switch (consistent)
13815  {
13816  case PGBUF_CONTENT_GOOD:
13817  consistent_str = "GOOD";
13818  break;
13819  case PGBUF_CONTENT_BAD:
13820  consistent_str = "BAD";
13821  break;
13822  default:
13823  consistent_str = "LIKELY BAD";
13824  break;
13825  }
13826 
13827  return consistent_str;
13828 }
13829 
13830 /*
13831  * pgbuf_get_fix_count () - Get page fix count.
13832  *
13833  * return : Fix count.
13834  * pgptr (in) : Page pointer.
13835  */
13836 int
13838 {
13839  PGBUF_BCB *bufptr = NULL;
13840 
13841  assert (pgptr != NULL);
13842 
13843  CAST_PGPTR_TO_BFPTR (bufptr, pgptr);
13844 
13845  return bufptr->fcnt;
13846 }
13847 
13848 /*
13849  * pgbuf_get_hold_count () - Get hold count for current thread.
13850  *
13851  * return : Hold count
13852  * thread_p (in) : Thread entry
13853  */
13854 int
13856 {
13857  int me = thread_get_entry_index (thread_p);
13858  return pgbuf_Pool.thrd_holder_info[me].num_hold_cnt;
13859 }
13860 
13861 /*
13862  * pgbuf_get_page_type_for_stat () - Return the page type for current page
13863  *
13864  * return : page type
13865  * pgptr (in) : pointer to a page
13866  */
13869 {
13870  PERF_PAGE_TYPE perf_page_type;
13871  FILEIO_PAGE *io_pgptr;
13872 
13873  CAST_PGPTR_TO_IOPGPTR (io_pgptr, pgptr);
13874  if ((io_pgptr->prv.ptype == PAGE_BTREE)
13876  {
13877  perf_page_type = btree_get_perf_btree_page_type (thread_p, pgptr);
13878  }
13879  else
13880  {
13881  perf_page_type = (PERF_PAGE_TYPE) io_pgptr->prv.ptype;
13882  }
13883 
13884  return perf_page_type;
13885 }
13886 
13887 /*
13888  * pgbuf_log_new_page () - log new page being created
13889  *
13890  * return : error code
13891  * thread_p (in) : thread entry
13892  * page_new (in) : new page
13893  * data_size (in) : size of page data
13894  * ptype_new (in) : new page type
13895  */
13896 void
13897 pgbuf_log_new_page (THREAD_ENTRY * thread_p, PAGE_PTR page_new, int data_size, PAGE_TYPE ptype_new)
13898 {
13899  assert (ptype_new != PAGE_UNKNOWN);
13900  assert (page_new != NULL);
13901  assert (data_size > 0);
13902 
13903  log_append_undoredo_data2 (thread_p, RVPGBUF_NEW_PAGE, NULL, page_new, (PGLENGTH) ptype_new, 0, data_size, NULL,
13904  page_new);
13905  pgbuf_set_dirty (thread_p, page_new, DONT_FREE);
13906 }
13907 
13908 void
13909 pgbuf_log_redo_new_page (THREAD_ENTRY * thread_p, PAGE_PTR page_new, int data_size, PAGE_TYPE ptype_new)
13910 {
13911  assert (ptype_new != PAGE_UNKNOWN);
13912  assert (page_new != NULL);
13913  assert (data_size > 0);
13914 
13915  log_append_redo_data2 (thread_p, RVPGBUF_NEW_PAGE, NULL, page_new, (PGLENGTH) ptype_new, data_size, page_new);
13916  pgbuf_set_dirty (thread_p, page_new, DONT_FREE);
13917 }
13918 
13919 /*
13920  * log_redo_page () - Apply redo for changing entire page (or at least its first part).
13921  *
13922  * return : NO_ERROR.
13923  * thread_p (in) : Thread entry.
13924  * rcv (in) : Recovery data.
13925  */
13926 int
13928 {
13929  PAGE_TYPE set_page_type;
13930  assert (rcv->pgptr != NULL);
13931  assert (rcv->length >= 0);
13932  assert (rcv->length <= DB_PAGESIZE);
13933 
13934  if (rcv->length > 0)
13935  {
13936  memcpy (rcv->pgptr, rcv->data, rcv->length);
13937  }
13938 
13939  set_page_type = (PAGE_TYPE) rcv->offset;
13940  if (set_page_type != PAGE_UNKNOWN)
13941  {
13942  pgbuf_set_page_ptype (thread_p, rcv->pgptr, set_page_type);
13943  }
13944  else
13945  {
13946  assert (false);
13947  }
13948 
13949  pgbuf_set_dirty (thread_p, rcv->pgptr, DONT_FREE);
13950  return NO_ERROR;
13951 }
13952 
13953 /*
13954  * pgbuf_rv_new_page_undo () - undo new page (by resetting its page type to PAGE_UNKNOWN)
13955  *
13956  * return : NO_ERROR
13957  * thread_p (in) : thread entry
13958  * rcv (in) : recovery data
13959  */
13960 int
13962 {
13963  pgbuf_set_page_ptype (thread_p, rcv->pgptr, PAGE_UNKNOWN);
13964  pgbuf_set_dirty (thread_p, rcv->pgptr, DONT_FREE);
13965  return NO_ERROR;
13966 }
13967 
13968 /*
13969  * pgbuf_dealloc_page () - deallocate a page
13970  *
13971  * return : error code
13972  * thread_p (in) : thread entry
13973  * page (in) : page to deallocate
13974  */
13975 void
13976 pgbuf_dealloc_page (THREAD_ENTRY * thread_p, PAGE_PTR page_dealloc)
13977 {
13978  PGBUF_BCB *bcb = NULL;
13979  PAGE_TYPE ptype;
13980  FILEIO_PAGE_RESERVED *prv;
13982  char undo_data[8]; // pageid(4) + volid(2) + pyte(1) + pflag(1)
13983  int holder_status;
13984 
13985  /* how it works: page is "deallocated" by resetting its type to PAGE_UNKNOWN. also prepare bcb for victimization.
13986  *
13987  * note: the bcb used to be invalidated. but that means flushing page to disk and waiting for IO write. that may be
13988  * too slow. if we add the bcb to the bottom of a lru list, it will be eventually flushed by flush thread and
13989  * victimized. */
13990 
13991  CAST_PGPTR_TO_BFPTR (bcb, page_dealloc);
13992  assert (bcb->fcnt == 1);
13993 
13994  prv = &bcb->iopage_buffer->iopage.prv;
13995  assert (prv->ptype != PAGE_UNKNOWN);
13996 
13997  udata.pageid = prv->pageid;
13998  udata.volid = prv->volid;
13999  udata.ptype = prv->ptype;
14000  udata.pflag = prv->pflag;
14001 
14002  log_append_undoredo_data2 (thread_p, RVPGBUF_DEALLOC, NULL, page_dealloc, 0, sizeof (udata), 0, &udata, NULL);
14003 
14004  PGBUF_BCB_LOCK (bcb);
14005 
14006 #if !defined(NDEBUG)
14008  {
14010  "TDE: pgbuf_dealloc_page(): clear tde bit in pflag, VPID = %d|%d, tde_algorithm = %s\n",
14012  }
14013 #endif /* !NDEBUG */
14014 
14015  /* set unknown type */
14016  bcb->iopage_buffer->iopage.prv.ptype = (char) PAGE_UNKNOWN;
14017  /* clear page flags (now only tde algorithm) */
14018  bcb->iopage_buffer->iopage.prv.pflag = (unsigned char) 0;
14019 
14020  /* set dirty and mark to move to the bottom of lru */
14022 
14023  holder_status = pgbuf_unlatch_thrd_holder (thread_p, bcb, NULL);
14024 
14025 #if !defined (NDEBUG)
14026  thread_p->get_pgbuf_tracker ().decrement (page_dealloc);
14027 #endif // !NDEBUG
14028  (void) pgbuf_unlatch_bcb_upon_unfix (thread_p, bcb, holder_status);
14029  /* bufptr->mutex has been released in above function. */
14030 }
14031 
14032 /*
14033  * pgbuf_rv_dealloc_redo () - redo page deallocate (by resetting page type to unknown).
14034  *
14035  * return : NO_ERROR
14036  * thread_p (in) : thread entry
14037  * rcv (in) : recovery data
14038  */
14039 int
14041 {
14042  pgbuf_set_page_ptype (thread_p, rcv->pgptr, PAGE_UNKNOWN);
14043  pgbuf_set_tde_algorithm (thread_p, rcv->pgptr, TDE_ALGORITHM_NONE, true);
14044  pgbuf_set_dirty (thread_p, rcv->pgptr, DONT_FREE);
14045  return NO_ERROR;
14046 }
14047 
14048 /*
14049  * pgbuf_rv_dealloc_undo () - undo page deallocation. the page is validated by setting its page type back.
14050  *
14051  * return : error code
14052  * thread_p (in) : thread entry
14053  * rcv (in) : recovery data
14054  *
14055  * note: we had to make this function logical, because if a page is deallocated, it cannot be fixed, unless we use
14056  * fetch type OLD_PAGE_DEALLOCATED.
14057  */
14058 int
14060 {
14061  PAGE_PTR page_deallocated = NULL;
14063  VPID vpid;
14064  FILEIO_PAGE *iopage;
14065 
14066  vpid.pageid = udata->pageid;
14067  vpid.volid = udata->volid;
14068 
14069  assert (rcv->length == sizeof (PGBUF_DEALLOC_UNDO_DATA));
14070  assert (udata->ptype > PAGE_UNKNOWN && udata->ptype <= PAGE_LAST);
14071 
14072  /* fix deallocated page */
14073  page_deallocated = pgbuf_fix (thread_p, &vpid, OLD_PAGE_DEALLOCATED, PGBUF_LATCH_WRITE, PGBUF_UNCONDITIONAL_LATCH);
14074  if (page_deallocated == NULL)
14075  {
14076  assert_release (false);
14077  return ER_FAILED;
14078  }
14079  assert (pgbuf_get_page_ptype (thread_p, page_deallocated) == PAGE_UNKNOWN);
14080  pgbuf_set_page_ptype (thread_p, page_deallocated, (PAGE_TYPE) udata->ptype);
14081 
14082  CAST_PGPTR_TO_IOPGPTR (iopage, page_deallocated);
14083  iopage->prv.pflag = udata->pflag;
14084 
14085 #if !defined(NDEBUG)
14087  {
14089  "TDE: pgbuf_rv_dealloc_page(): reset tde bit in pflag, VPID = %d|%d, tde_algorithm = %s\n",
14090  VPID_AS_ARGS (&vpid), tde_get_algorithm_name (pgbuf_get_tde_algorithm (page_deallocated)));
14091  }
14092 #endif /* !NDEBUG */
14093 
14094  log_append_compensate_with_undo_nxlsa (thread_p, RVPGBUF_COMPENSATE_DEALLOC, &vpid, 0, page_deallocated,
14095  sizeof (PGBUF_DEALLOC_UNDO_DATA), udata, LOG_FIND_CURRENT_TDES (thread_p),
14096  &rcv->reference_lsa);
14097  pgbuf_set_dirty_and_free (thread_p, page_deallocated);
14098  return NO_ERROR;
14099 }
14100 
14101 /*
14102  * pgbuf_rv_dealloc_undo_compensate () - compensation for undo page deallocation. the page is validated by setting its page type back.
14103  *
14104  * return : error code
14105  * thread_p (in) : thread entry
14106  * rcv (in) : recovery data
14107  *
14108  */
14109 int
14111 {
14113  VPID vpid;
14114  FILEIO_PAGE *iopage;
14115 
14116  assert (rcv->pgptr != NULL);
14117  assert (rcv->length == sizeof (PGBUF_DEALLOC_UNDO_DATA));
14118  assert (udata->ptype > PAGE_UNKNOWN && udata->ptype <= PAGE_LAST);
14119 
14120  CAST_PGPTR_TO_IOPGPTR (iopage, rcv->pgptr);
14121 
14122  pgbuf_set_page_ptype (thread_p, rcv->pgptr, (PAGE_TYPE) udata->ptype);
14123  iopage->prv.pflag = udata->pflag;
14124 
14125 #if !defined(NDEBUG)
14127  {
14129  "TDE: pgbuf_rv_dealloc_page(): reset tde bit in pflag, VPID = %d|%d, tde_algorithm = %s\n",
14131  }
14132 #endif /* !NDEBUG */
14133 
14134  return NO_ERROR;
14135 }
14136 
14137 /*
14138  * pgbuf_fix_if_not_deallocated () - fix a page if it is not deallocated. the difference compared to regulat page fix
14139  * finding deallocated pages is expected. if the page is indeed deallocated, it will
14140  * not fix it
14141  *
14142  * return : error code
14143  * thread_p (in) : thread entry
14144  * vpid (in) : page identifier
14145  * latch_mode (in) : latch mode
14146  * latch_condition (in) : latch condition
14147  * page (out) : output fixed page if not deallocated. output NULL if deallocated.
14148  * caller_file (in) : caller file name
14149  * caller_line (in) : caller line
14150  */
14151 int
14153  PGBUF_LATCH_CONDITION latch_condition, PAGE_PTR * page,
14154  const char *caller_file, int caller_line)
14155 {
14156  DISK_ISVALID isvalid;
14157  int error_code = NO_ERROR;
14158 
14159  assert (vpid != NULL && !VPID_ISNULL (vpid));
14160  assert (page != NULL);
14161  *page = NULL;
14162 
14163  /* First, checks whether the file was destroyed. Such check may create performance issues.
14164  * This function must be adapted. Thus, if the transaction has a lock on table, we can skip
14165  * the code that checks whether the file was destroyed.
14166  */
14167  isvalid = disk_is_page_sector_reserved (thread_p, vpid->volid, vpid->pageid);
14168  if (isvalid == DISK_INVALID)
14169  {
14170  /* deallocated */
14171  return NO_ERROR;
14172  }
14173  else if (isvalid == DISK_ERROR)
14174  {
14175  ASSERT_ERROR_AND_SET (error_code);
14176  return error_code;
14177  }
14178  assert (isvalid == DISK_VALID);
14179 
14180  /* is reserved */
14181 #if defined (NDEBUG)
14182  *page = pgbuf_fix_release (thread_p, vpid, OLD_PAGE_MAYBE_DEALLOCATED, latch_mode, latch_condition);
14183 #else /* !NDEBUG */
14184  *page =
14185  pgbuf_fix_debug (thread_p, vpid, OLD_PAGE_MAYBE_DEALLOCATED, latch_mode, latch_condition, caller_file, caller_line);
14186 #endif /* !NDEBUG */
14188  {
14189  ASSERT_ERROR_AND_SET (error_code);
14190  if (error_code == ER_PB_BAD_PAGEID)
14191  {
14192  /* deallocated */
14193  er_clear ();
14194  error_code = NO_ERROR;
14195  }
14196  }
14197  return error_code;
14198 }
14199 
14200 #if defined (SERVER_MODE)
14201 /*
14202  * pgbuf_keep_victim_flush_thread_running () - keep flush thread running
14203  *
14204  * return : true to keep flush thread running, false otherwise
14205  */
14206 bool
14207 pgbuf_keep_victim_flush_thread_running (void)
14208 {
14209  return (pgbuf_is_any_thread_waiting_for_direct_victim () || pgbuf_is_hit_ratio_low ());
14210 }
14211 #endif /* SERVER_MDOE */
14212 
14213 /*
14214  * pgbuf_assign_direct_victim () - try to assign bcb directly to a thread waiting for victim. bcb must be a valid victim
14215  * candidate
14216  *
14217  * return : true if bcb was assigned directly as victim, false otherwise
14218  * thread_p (in) : thread entry
14219  * bcb (in) : bcb to assign as victim
14220  */
14221 STATIC_INLINE bool
14223 {
14224 #if defined (SERVER_MODE)
14225  THREAD_ENTRY *waiter_thread = NULL;
14226 
14227  PERF_UTIME_TRACKER timetr;
14228 
14229  PERF_UTIME_TRACKER_START (thread_p, &timetr);
14230 
14231  /* must hold bcb mutex and victimization should be possible. the only victim-candidate invalidating flag allowed here
14232  * is PGBUF_BCB_FLUSHING_TO_DISK_FLAG (because flush also calls this). */
14235  assert (!pgbuf_bcb_is_dirty (bcb));
14236  assert (!pgbuf_is_bcb_fixed_by_any (bcb, true));
14237 
14238  PGBUF_BCB_CHECK_OWN (bcb);
14239 
14240  /* is flushing is expected, since this is called from flush too. caller should make sure no other case should get
14241  * here with is flushing true. */
14242  /* if marked as victim candidate, we are sorry for the one that marked it. we'll override the flag. */
14243 
14244  /* do we have any waiter threads? */
14245  while (pgbuf_get_thread_waiting_for_direct_victim (waiter_thread))
14246  {
14247  assert (waiter_thread != NULL);
14248 
14249  thread_lock_entry (waiter_thread);
14250 
14251  if (waiter_thread->resume_status != THREAD_ALLOC_BCB_SUSPENDED)
14252  {
14253  /* it is not waiting for us anymore */
14254  thread_unlock_entry (waiter_thread);
14255  continue;
14256  }
14257 
14258  /* wakeup suspended thread */
14260 
14261  /* assign bcb to thread */
14263 
14264  pgbuf_Pool.direct_victims.bcb_victims[waiter_thread->index] = bcb;
14265 
14266  thread_unlock_entry (waiter_thread);
14267 
14268  PERF_UTIME_TRACKER_TIME (thread_p, &timetr, PSTAT_PB_ASSIGN_DIRECT_BCB);
14269 
14270  /* bcb was assigned */
14271  return true;
14272  }
14273  PERF_UTIME_TRACKER_TIME (thread_p, &timetr, PSTAT_PB_ASSIGN_DIRECT_BCB);
14274 #endif /* SERVER_MODE */
14275 
14276  /* no waiting threads */
14277  return false;
14278 }
14279 
14280 #if defined (SERVER_MODE)
14281 
14282 /*
14283  * pgbuf_assign_flushed_pages () - assign flushed pages directly. or just mark them as flushed if it cannot be assigned.
14284  *
14285  * return : void
14286  * thread_p (in) : thread entry
14287  */
14288 bool
14289 pgbuf_assign_flushed_pages (THREAD_ENTRY * thread_p)
14290 {
14291  PGBUF_BCB *bcb_flushed = NULL;
14293  bool not_empty = false;
14294  /* invalidation flag for direct victim assignment: any flag invalidating victim candidates, except is flushing flag */
14296 
14297  /* consume all flushed bcbs queue */
14298  while (pgbuf_Pool.flushed_bcbs->consume (bcb_flushed))
14299  {
14300  not_empty = true;
14301 
14302  /* we need to lock mutex */
14303  PGBUF_BCB_LOCK (bcb_flushed);
14304 
14305  if ((bcb_flushed->flags & invalidate_flag) != 0)
14306  {
14307  /* dirty bcb is not a valid victim */
14308  }
14309  else if (pgbuf_is_bcb_fixed_by_any (bcb_flushed, true))
14310  {
14311  /* bcb is fixed. we cannot assign it as victim */
14312  }
14313  else if (!PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (bcb_flushed))
14314  {
14315  /* bcb is hot. don't assign it as victim */
14316  }
14317  else if (PGBUF_IS_PRIVATE_LRU_INDEX (pgbuf_bcb_get_lru_index (bcb_flushed))
14319  {
14320  /* bcb belongs to a private list under quota. give it a chance. */
14321  }
14322  else if (pgbuf_assign_direct_victim (thread_p, bcb_flushed))
14323  {
14324  /* assigned directly */
14325  if (detailed_perf)
14326  {
14328  }
14329  }
14330  else
14331  {
14332  /* not assigned directly */
14333  assert (!pgbuf_bcb_is_direct_victim (bcb_flushed));
14334  /* could not assign it directly. there must be no waiters */
14335  }
14336 
14337  /* make sure bcb is no longer marked as flushing */
14338  pgbuf_bcb_mark_was_flushed (thread_p, bcb_flushed);
14339 
14340  /* wakeup thread waiting for flush */
14341  if (bcb_flushed->next_wait_thrd != NULL)
14342  {
14343  pgbuf_wake_flush_waiters (thread_p, bcb_flushed);
14344  }
14345 
14346  PGBUF_BCB_UNLOCK (bcb_flushed);
14347  }
14348 
14349  return not_empty;
14350 }
14351 
14352 /*
14353  * pgbuf_get_thread_waiting_for_direct_victim () - get one of the threads waiting
14354  *
14355  * return : true if got thread, false otherwise
14356  * waiting_thread_out (out) : output thread waiting for victim
14357  */
14358 STATIC_INLINE bool
14359 pgbuf_get_thread_waiting_for_direct_victim (REFPTR (THREAD_ENTRY, waiting_thread_out))
14360 {
14361  static INT64 count = 0;
14362  INT64 my_count = ATOMIC_INC_64 (&count, 1);
14363 
14364  /* every now and then, force getting waiting threads from queues with lesser priority */
14365  if (my_count % 4 == 0)
14366  {
14367  if (pgbuf_Pool.direct_victims.waiter_threads_low_priority->consume (waiting_thread_out))
14368  {
14369  return true;
14370  }
14371  }
14372  /* try queue in their priority order */
14373  if (pgbuf_Pool.direct_victims.waiter_threads_high_priority->consume (waiting_thread_out))
14374  {
14375  return true;
14376  }
14377  if (pgbuf_Pool.direct_victims.waiter_threads_low_priority->consume (waiting_thread_out))
14378  {
14379  return true;
14380  }
14381  return false;
14382 }
14383 
14384 /*
14385  * pgbuf_get_direct_victim () - get victim assigned directly.
14386  *
14387  * return : pointer to victim bcb
14388  * thread_p (in) : thread entry
14389  */
14391 pgbuf_get_direct_victim (THREAD_ENTRY * thread_p)
14392 {
14393  PGBUF_BCB *bcb =
14394  (PGBUF_BCB *) ATOMIC_TAS_ADDR (&pgbuf_Pool.direct_victims.bcb_victims[thread_p->index], (PGBUF_BCB *) NULL);
14395  int lru_idx;
14396 
14397  assert (bcb != NULL);
14398 
14399  PGBUF_BCB_LOCK (bcb);
14400 
14402  {
14403  /* somebody fixed the page again. */
14405  PGBUF_BCB_UNLOCK (bcb);
14406  return NULL;
14407  }
14408 
14410 
14411  /* clear direct victim flag */
14413 
14414  if (!pgbuf_is_bcb_victimizable (bcb, true))
14415  {
14416  /* should not happen */
14417  assert (false);
14418  PGBUF_BCB_UNLOCK (bcb);
14419  return NULL;
14420  }
14421 
14422  switch (pgbuf_bcb_get_zone (bcb))
14423  {
14424  case PGBUF_VOID_ZONE:
14425  break;
14426  case PGBUF_INVALID_ZONE:
14427  /* should not be here */
14428  assert (false);
14429  break;
14430  default:
14431  /* lru zones */
14432  assert (PGBUF_IS_BCB_IN_LRU (bcb));
14433  lru_idx = pgbuf_bcb_get_lru_index (bcb);
14434 
14435  /* remove bcb from lru list */
14436  pgbuf_lru_remove_bcb (thread_p, bcb);
14437 
14438  /* add to AOUT */
14439  pgbuf_add_vpid_to_aout_list (thread_p, &bcb->vpid, lru_idx);
14440  break;
14441  }
14442 
14444  return bcb;
14445 }
14446 
14447 /*
14448  * pgbuf_is_any_thread_waiting_for_direct_victim () - is any thread waiting to allocate bcb?
14449  *
14450  * return : true/false
14451  */
14452 STATIC_INLINE bool
14453 pgbuf_is_any_thread_waiting_for_direct_victim (void)
14454 {
14455  return (!pgbuf_Pool.direct_victims.waiter_threads_high_priority->is_empty ()
14456  || !pgbuf_Pool.direct_victims.waiter_threads_low_priority->is_empty ());
14457 }
14458 #endif /* SERVER_MODE */
14459 
14460 /*
14461  * pgbuf_lru_increment_victim_candidates () - increment lru list victim candidate counter
14462  *
14463  * return : void
14464  * lru_list (in) : lru list
14465  */
14466 STATIC_INLINE void
14468 {
14469  PGBUF_BCB *old_victim_hint;
14470  int list_tick;
14471 
14472  /* first, let's update the victim hint. */
14473  /* We don't own the LRU mutex here, so after we read the victim_hint, another thread may change that BCB,
14474  * or the victim_hint pointer itself.
14475  * All changes of lru_list->victim_hint, must be precedeed by changing the new hint BCB to LRU3 zone, the checks must
14476  * be repetead here in the same sequence:
14477  * 1. read lru_list->victim_hint
14478  * 2. stop if old_victim_hint is still in LRU3 and is older than proposed to be hint
14479  * 3. atomically change the hint
14480  * (old_victim_hint may suffer other changes including relocating to another LRU, this is protected by the atomic op)
14481  */
14482  do
14483  {
14484  /* replace current victim hint only if this candidate is better. that is if its age in zone 3 is greater that of
14485  * current hint's */
14486  old_victim_hint = lru_list->victim_hint;
14487  list_tick = lru_list->tick_lru3;
14488  if (old_victim_hint != NULL && PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (old_victim_hint)
14489  && (PGBUF_AGE_DIFF (old_victim_hint->tick_lru3, list_tick) > PGBUF_AGE_DIFF (bcb->tick_lru3, list_tick)))
14490  {
14491  /* current hint is older. */
14492  break;
14493  }
14494 
14495  /* compare & swap. if it fails, the hint must have been updated by someone else (it is possible even if we hold
14496  * lru and bcb mutexes, see pgbuf_set_dirty). we try until we succeed changing the hint or until the current hint
14497  * is better. */
14498  }
14499  while (!ATOMIC_CAS_ADDR (&lru_list->victim_hint, old_victim_hint, bcb));
14500 
14501  /* update victim counter. */
14502  /* add to lock-free circular queue so victimizers can find it... if this is not a private list under quota. */
14503  ATOMIC_INC_32 (&lru_list->count_vict_cand, 1);
14504  if (PGBUF_IS_SHARED_LRU_INDEX (lru_list->index) || PGBUF_LRU_LIST_IS_OVER_QUOTA (lru_list))
14505  {
14506  if (pgbuf_lfcq_add_lru_with_victims (lru_list)
14508  {
14509  /* added to queue of lru lists having victims. */
14510  }
14511  }
14512 }
14513 
14514 /*
14515  * pgbuf_lru_decrement_victim_candidates () - decrement lru list victim candidate counter
14516  *
14517  * return : void
14518  * lru_list (in) : lru list
14519  */
14520 STATIC_INLINE void
14522 {
14523  /* first update victim counter */
14524  if (ATOMIC_INC_32 (&lru_list->count_vict_cand, -1) == 0)
14525  {
14526  /* we cannot remove an entry from lock-free circular queue easily. we just hope that this does not happen too
14527  * often. do nothing here. */
14528  }
14529 }
14530 
14531 /*
14532  * pgbuf_lru_advance_victim_hint () - invalidate bcb_prev_hint as victim hint and advance to bcb_new_hint (if possible).
14533  * in the case we'd reset hint to NULL, but we know victim candidates still exist,
14534  * hint is set to list bottom.
14535  *
14536  * return : void
14537  * thread_p (in) : thread entry
14538  * lru_list (in) : LRU list
14539  * bcb_prev_hint (in) : bcb being invalidated as hint
14540  * bcb_new_hint (in) : new desired hint (can be adjusted to NULL or bottom)
14541  * was_vict_count_updated (in) : was victim count updated? (false if bcb_prev_hint is still counted as victim candidate)
14542  */
14543 STATIC_INLINE void
14544 pgbuf_lru_advance_victim_hint (THREAD_ENTRY * thread_p, PGBUF_LRU_LIST * lru_list, PGBUF_BCB * bcb_prev_hint,
14545  PGBUF_BCB * bcb_new_hint, bool was_vict_count_updated)
14546 {
14547  PGBUF_BCB *new_victim_hint = NULL;
14548 
14549  /* note: caller must have lock on lru list! */
14550  /* todo: add watchers on lru list mutexes */
14551 
14552  /* new victim hint should be either NULL or in the victimization zone */
14553  new_victim_hint = (bcb_new_hint != NULL && PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (bcb_new_hint)) ? bcb_new_hint : NULL;
14554 
14555  /* restart from bottom if hint is NULL but we have victim candidates */
14556  new_victim_hint = ((new_victim_hint == NULL && lru_list->count_vict_cand > (was_vict_count_updated ? 0 : 1))
14557  ? lru_list->bottom : new_victim_hint);
14558 
14559  new_victim_hint = ((new_victim_hint != NULL && PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (new_victim_hint))
14560  ? new_victim_hint : NULL);
14561 
14562  /* update hint (if it was not already updated) */
14563  assert (new_victim_hint == NULL || pgbuf_bcb_get_lru_index (new_victim_hint) == lru_list->index);
14564  if (ATOMIC_CAS_ADDR (&lru_list->victim_hint, bcb_prev_hint, new_victim_hint))
14565  {
14566  /* updated hint */
14567  }
14568 
14569  assert (lru_list->victim_hint == NULL || PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE (lru_list->victim_hint));
14570 }
14571 
14572 /*
14573  * pgbuf_bcb_update_flags () - update bcb flags (not zone and not lru index)
14574  *
14575  * return : void
14576  * bcb (in) : bcb
14577  * set_flags (in) : flags to set
14578  * clear_flags (in) : flags to clear
14579  *
14580  * note: this makes sure the bcb flags field (which is actually flags + zone + lru index) is modified atomically. it
14581  * also handles changes of victim candidates.
14582  */
14583 STATIC_INLINE void
14584 pgbuf_bcb_update_flags (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, int set_flags, int clear_flags)
14585 {
14586  int old_flags;
14587  int new_flags;
14588  bool old_dirty, new_dirty;
14589 
14590  /* sanity checks */
14591  assert (bcb != NULL);
14592  assert ((set_flags & (~PGBUF_BCB_FLAGS_MASK)) == 0);
14593  assert ((clear_flags & (~PGBUF_BCB_FLAGS_MASK)) == 0);
14594 
14595  /* update flags by making sure that other flags + zone + lru_index are not modified. */
14596  do
14597  {
14598  old_flags = bcb->flags;
14599  new_flags = old_flags | set_flags;
14600  new_flags = new_flags & (~clear_flags);
14601 
14602  if (old_flags == new_flags)
14603  {
14604  /* no changes are required. */
14605  return;
14606  }
14607  }
14608  while (!ATOMIC_CAS_32 (&bcb->flags, old_flags, new_flags));
14609 
14610  if (PGBUF_GET_ZONE (old_flags) == PGBUF_LRU_3_ZONE)
14611  {
14612  /* bcb is in lru zone that can be victimized. some flags invalidate the victimization candidacy of a bcb;
14613  * therefore we need to check if the bcb status regarding victimization is changed. */
14614  bool is_old_invalid_victim_candidate = (old_flags & PGBUF_BCB_INVALID_VICTIM_CANDIDATE_MASK) != 0;
14615  bool is_new_invalid_victim_candidate = (new_flags & PGBUF_BCB_INVALID_VICTIM_CANDIDATE_MASK) != 0;
14616  PGBUF_LRU_LIST *lru_list;
14617 
14618  lru_list = pgbuf_lru_list_from_bcb (bcb);
14619 
14620  if (is_old_invalid_victim_candidate && !is_new_invalid_victim_candidate)
14621  {
14622  /* bcb has become a victim candidate */
14623  pgbuf_lru_add_victim_candidate (thread_p, lru_list, bcb);
14624  }
14625  else if (!is_old_invalid_victim_candidate && is_new_invalid_victim_candidate)
14626  {
14627  /* bcb is no longer a victim candidate */
14628  pgbuf_lru_remove_victim_candidate (thread_p, lru_list, bcb);
14629  }
14630  else
14631  {
14632  /* bcb status remains the same */
14633  }
14634  }
14635 
14636  old_dirty = (old_flags & PGBUF_BCB_DIRTY_FLAG) != 0;
14637  new_dirty = (new_flags & PGBUF_BCB_DIRTY_FLAG) != 0;
14638 
14639  if (old_dirty && !new_dirty)
14640  {
14641  /* cleared dirty flag. */
14642  ATOMIC_INC_64 (&pgbuf_Pool.monitor.dirties_cnt, -1);
14643  }
14644  else if (!old_dirty && new_dirty)
14645  {
14646  /* added dirty flag */
14647  ATOMIC_INC_64 (&pgbuf_Pool.monitor.dirties_cnt, 1);
14648  }
14649 
14650  assert (pgbuf_Pool.monitor.dirties_cnt >= 0 && pgbuf_Pool.monitor.dirties_cnt <= pgbuf_Pool.num_buffers);
14651 }
14652 
14653 /*
14654  * pgbuf_bcb_change_zone () - change the zone and lru index of bcb, but keep the bcb flags. also handles the zone
14655  * counters, victim counter and victim hint for lru lists.
14656  *
14657  * return : void
14658  * bcb (in) : bcb
14659  * lru_idx (in) : lru index (0 if not in any lru zone)
14660  * zone (in) : zone
14661  *
14662  * this is called whenever the bcb is moved from a logical zone to another. possible transitions:
14663  *
14664  * FIXME: correct the following description
14665  * 1. get from invalid list invalid => void (bcb is locked)
14666  * 2. get victim lru => void (list & bcb are locked)
14667  * 3. unfix void/lru => lru (list & bcb are locked)
14668  * 4. lru adjust zones lru => lru (list is locked)
14669  *
14670  * note: two simultaneous change zones on the same bcb should not be possible. the only case when bcb is not locked
14671  * is case 4, however list is locked. other possible cases that can call change zone on same bcb must have lock
14672  * on lru mutex.
14673  *
14674  * note: bcb->flags is changed here and simultaneous calls of pgbuf_bcb_update_flags is possible. in some cases, the
14675  * flags may change even with no mutex (pgbuf_set_dirty). since we have to handle victim counter and hint for
14676  * lru lists, we must do atomic operations to modify the zone, and keep previous and new flag values. based on
14677  * these flags, we then update lru zone counters, lru victim counter and lru victim hint. lru zone counters can
14678  * only be modified by other calls pgbuf_bcb_change_zone in same lru and are protected by lru mutex, so they can
14679  * be modified without atomic operations.
14680  */
14681 STATIC_INLINE void
14682 pgbuf_bcb_change_zone (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, int new_lru_idx, PGBUF_ZONE new_zone)
14683 {
14684  int old_flags;
14685  int new_flags;
14686  int new_zone_idx = PGBUF_MAKE_ZONE (new_lru_idx, new_zone);
14687  bool is_valid_victim_candidate;
14688  PGBUF_LRU_LIST *lru_list;
14689 
14690  /* note: make sure the zones from and to are changing are blocked */
14691 
14692  /* sanity checks */
14693  assert (bcb != NULL);
14694  assert (new_lru_idx == 0 || new_zone == PGBUF_LRU_1_ZONE || new_zone == PGBUF_LRU_2_ZONE
14695  || new_zone == PGBUF_LRU_3_ZONE);
14696 
14697  /* update bcb->flags. make sure we are only changing the values for zone and lru index, but we preserve the flags. */
14698  do
14699  {
14700  /* get current value of bcb->flags */
14701  old_flags = bcb->flags;
14702 
14703  /* now set new flags to same bcb flags + new zone & lru index */
14704  new_flags = (old_flags & PGBUF_BCB_FLAGS_MASK) | new_zone_idx;
14705 
14706  /* compare & swap. if we fail, we have to try again. until we succeed. */
14707  }
14708  while (!ATOMIC_CAS_32 (&bcb->flags, old_flags, new_flags));
14709 
14710  /* was bcb a valid victim candidate (we only consider flags, not fix counters or zone)? note that this is still true
14711  * after the change of zone. */
14712  is_valid_victim_candidate = (old_flags & PGBUF_BCB_INVALID_VICTIM_CANDIDATE_MASK) == 0;
14713 
14714  if (old_flags & PGBUF_LRU_ZONE_MASK)
14715  {
14716  /* bcb was in a lru list. we need to update zone counters. */
14717  int lru_idx = PGBUF_GET_LRU_INDEX (old_flags);
14718  lru_list = PGBUF_GET_LRU_LIST (lru_idx);
14719 
14720  /* hint should have been changed already if the BCB was in LRU3; otherwise (if downgraded, we may expect that
14721  * victim hint is changed by other thread (checkpoint->pgbuf_bcb_update_flags) */
14722  assert (lru_list->victim_hint != bcb || PGBUF_GET_ZONE (old_flags) != PGBUF_LRU_3_ZONE);
14723 
14725  {
14726  ATOMIC_INC_32 (&pgbuf_Pool.monitor.lru_shared_pgs_cnt, -1);
14727  }
14728 
14729  switch (PGBUF_GET_ZONE (old_flags))
14730  {
14731  case PGBUF_LRU_1_ZONE:
14732  lru_list->count_lru1--;
14733  break;
14734  case PGBUF_LRU_2_ZONE:
14735  lru_list->count_lru2--;
14736  break;
14737  case PGBUF_LRU_3_ZONE:
14738  lru_list->count_lru3--;
14739  if (is_valid_victim_candidate)
14740  {
14741  /* bcb was a valid victim and in the zone that could be victimized. update victim counter & hint */
14742  pgbuf_lru_remove_victim_candidate (thread_p, lru_list, bcb);
14743  }
14744  break;
14745  default:
14746  assert (false);
14747  break;
14748  }
14749  }
14750  if (new_zone & PGBUF_LRU_ZONE_MASK)
14751  {
14752  lru_list = PGBUF_GET_LRU_LIST (new_lru_idx);
14753 
14755  {
14756  ATOMIC_INC_32 (&pgbuf_Pool.monitor.lru_shared_pgs_cnt, 1);
14757  }
14758 
14759  switch (new_zone)
14760  {
14761  case PGBUF_LRU_1_ZONE:
14762  lru_list->count_lru1++;
14763  break;
14764  case PGBUF_LRU_2_ZONE:
14765  lru_list->count_lru2++;
14766  break;
14767  case PGBUF_LRU_3_ZONE:
14768  lru_list->count_lru3++;
14769  if (is_valid_victim_candidate)
14770  {
14771  pgbuf_lru_add_victim_candidate (thread_p, lru_list, bcb);
14772  }
14773  break;
14774  default:
14775  assert (false);
14776  break;
14777  }
14778  }
14779 }
14780 
14781 /*
14782  * pgbuf_bcb_get_zone () - get zone of bcb
14783  *
14784  * return : PGBUF_ZONE
14785  * bcb (in) : bcb
14786  */
14789 {
14790  return PGBUF_GET_ZONE (bcb->flags);
14791 }
14792 
14793 /*
14794  * pgbuf_bcb_get_lru_index () - get lru index of bcb. make sure bcb is in lru zones.
14795  *
14796  * return : lru index
14797  * bcb (in) : bcb
14798  */
14799 STATIC_INLINE int
14801 {
14802  assert (PGBUF_IS_BCB_IN_LRU (bcb));
14803  return PGBUF_GET_LRU_INDEX (bcb->flags);
14804 }
14805 
14806 /*
14807  * pgbuf_bcb_is_dirty () - is bcb dirty?
14808  *
14809  * return : true/false
14810  * bcb (in) : bcb
14811  */
14812 STATIC_INLINE bool
14814 {
14815  return (bcb->flags & PGBUF_BCB_DIRTY_FLAG) != 0;
14816 }
14817 
14818 /*
14819  * pgbuf_bcb_set_dirty () - set dirty flag to bcb
14820  *
14821  * return : void
14822  * bcb (in) : bcb
14823  */
14824 STATIC_INLINE void
14826 {
14827  /* set dirty flag and clear none */
14828  /* note: we usually use pgbuf_bcb_update_flags function. we do an exception for pgbuf_bcb_set_dirty to since it is the
14829  * most used case and the code should be as optimal as possible. */
14830  int old_flags;
14831 
14832  do
14833  {
14834  old_flags = bcb->flags;
14835  if (old_flags & PGBUF_BCB_DIRTY_FLAG)
14836  {
14837  /* already dirty */
14838  return;
14839  }
14840  }
14841  while (!ATOMIC_CAS_32 (&bcb->flags, old_flags, old_flags | PGBUF_BCB_DIRTY_FLAG));
14842 
14843  /* was changed to dirty */
14844  ATOMIC_INC_64 (&pgbuf_Pool.monitor.dirties_cnt, 1);
14845  assert (pgbuf_Pool.monitor.dirties_cnt >= 0 && pgbuf_Pool.monitor.dirties_cnt <= pgbuf_Pool.num_buffers);
14846 
14847  if (PGBUF_GET_ZONE (old_flags) == PGBUF_LRU_3_ZONE && (old_flags & PGBUF_BCB_INVALID_VICTIM_CANDIDATE_MASK) == 0)
14848  {
14849  /* invalidate victim */
14851  }
14852 }
14853 
14854 /*
14855  * pgbuf_bcb_clear_dirty () - clear dirty flag from bcb
14856  *
14857  * return : void
14858  * bcb (in) : bcb
14859  */
14860 STATIC_INLINE void
14862 {
14863  /* set no flag and clear dirty */
14864  pgbuf_bcb_update_flags (thread_p, bcb, 0, PGBUF_BCB_DIRTY_FLAG);
14865 }
14866 
14867 /*
14868  * pgbuf_bcb_mark_is_flushing () - mark page is being flushed. dirty flag is also cleared because while the page is
14869  * flushed to disk, another thread may fix the page and modify it. the new change must
14870  * be tracked.
14871  *
14872  * return : void
14873  * bcb (in) : bcb
14874  */
14875 STATIC_INLINE bool
14877 {
14878  if (pgbuf_bcb_is_dirty (bcb))
14879  {
14880  /* set flushing flag and clear dirty */
14883  return true;
14884  }
14885  else
14886  {
14888  return false;
14889  }
14890 }
14891 
14892 /*
14893  * pgbuf_bcb_mark_was_flushed () - mark page was flushed to disk
14894  *
14895  * return : void
14896  * bcb (in) : bcb
14897  */
14898 STATIC_INLINE void
14900 {
14901  /* set no flag and clear flushing */
14903 }
14904 
14905 /*
14906  * pgbuf_bcb_mark_was_not_flushed () - page flush failed
14907  *
14908  * return : void
14909  * bcb (in) : bcb
14910  * mark_dirty(in): true if BCB needs to be marked dirty
14911  */
14912 STATIC_INLINE void
14913 pgbuf_bcb_mark_was_not_flushed (THREAD_ENTRY * thread_p, PGBUF_BCB * bcb, bool mark_dirty)
14914 {
14915  /* set dirty flag and clear flushing */
14917 }
14918 
14919 /*
14920  * pgbuf_bcb_is_flushing () - is page being flushed to disk?
14921  *
14922  * return : true/false
14923  * bcb (in) : bcb
14924  */
14925 STATIC_INLINE bool
14927 {
14928  return (bcb->flags & PGBUF_BCB_FLUSHING_TO_DISK_FLAG) != 0;
14929 }
14930 
14931 /*
14932  * pgbuf_bcb_is_direct_victim () - is bcb assigned as victim directly?
14933  *
14934  * return : true/false
14935  * bcb (in) : bcb
14936  */
14937 STATIC_INLINE bool
14939 {
14940  return (bcb->flags & PGBUF_BCB_VICTIM_DIRECT_FLAG) != 0;
14941 }
14942 
14943 /*
14944  * pgbuf_bcb_is_invalid_direct_victim () - is bcb assigned as victim directly, but invalidated after?
14945  *
14946  * return : true/false
14947  * bcb (in) : bcb
14948  */
14949 STATIC_INLINE bool
14951 {
14952  return (bcb->flags & PGBUF_BCB_INVALIDATE_DIRECT_VICTIM_FLAG) != 0;
14953 }
14954 
14955 /*
14956  * pgbuf_bcb_is_async_flush_request () - is bcb async flush requested?
14957  *
14958  * return : true/false
14959  * bcb (in) : bcb
14960  */
14961 STATIC_INLINE bool
14963 {
14964  return (bcb->flags & PGBUF_BCB_ASYNC_FLUSH_REQ) != 0;
14965 }
14966 
14967 /*
14968  * pgbuf_bcb_should_be_moved_to_bottom_lru () - is bcb supposed to be moved to the bottom of lru?
14969  *
14970  * return : true/false
14971  * bcb (in) : bcb
14972  */
14973 STATIC_INLINE bool
14975 {
14976  return (bcb->flags & PGBUF_BCB_MOVE_TO_LRU_BOTTOM_FLAG) != 0;
14977 }
14978 
14979 /*
14980  * pgbuf_set_to_vacuum () - notify that page will likely be accessed by vacuum
14981  *
14982  * return : void
14983  * thread_p (in) : thread entry
14984  * page (in) : page
14985  */
14986 void
14988 {
14989  PGBUF_BCB *bcb;
14990 
14991  CAST_PGPTR_TO_BFPTR (bcb, page);
14993 }
14994 
14995 /*
14996  * pgbuf_bcb_is_flushing () - is page going to be accessed by vacuum?
14997  *
14998  * return : true/false
14999  * bcb (in) : bcb
15000  */
15001 STATIC_INLINE bool
15003 {
15004  return (bcb->flags & PGBUF_BCB_TO_VACUUM_FLAG) != 0;
15005 }
15006 
15007 /*
15008  * pgbuf_bcb_avoid_victim () - should bcb be avoid for victimization?
15009  *
15010  * return : true/false
15011  * bcb (in) : bcb
15012  *
15013  * note: no flag that invalidates a bcb victim candidacy
15014  */
15015 STATIC_INLINE bool
15017 {
15018  return (bcb->flags & PGBUF_BCB_INVALID_VICTIM_CANDIDATE_MASK) != 0;
15019 }
15020 
15021 /*
15022  * pgbuf_bcb_get_pool_index () - get bcb pool index
15023  *
15024  * return : pool index
15025  * bcb (in) : BCB
15026  */
15027 STATIC_INLINE int
15029 {
15030  return (int) (bcb - pgbuf_Pool.BCB_table);
15031 }
15032 
15033 /*
15034  * pgbuf_bcb_register_avoid_deallocation () - avoid deallocating bcb's page.
15035  *
15036  * return : void
15037  * bcb (in) : bcb
15038  */
15039 STATIC_INLINE void
15041 {
15042  assert ((bcb->count_fix_and_avoid_dealloc & 0x00008000) == 0);
15043  (void) ATOMIC_INC_32 (&bcb->count_fix_and_avoid_dealloc, 1);
15044 }
15045 
15046 /*
15047  * pgbuf_bcb_unregister_avoid_deallocation () - avoiding page deallocation no longer required
15048  *
15049  * return : void
15050  * bcb (in) : bcb
15051  */
15052 STATIC_INLINE void
15054 {
15055  int count_crt;
15056  do
15057  {
15058  /* get bcb->count_fix_and_avoid_dealloc (volatile) */
15059  count_crt = bcb->count_fix_and_avoid_dealloc;
15060  assert ((count_crt & 0x00008000) == 0);
15061  if ((count_crt & PGBUF_BCB_AVOID_DEALLOC_MASK) > 0)
15062  {
15063  /* we can decrement counter */
15064  }
15065  else
15066  {
15067  /* interestingly enough, this case can happen. how?
15068  *
15069  * well, pgbuf_ordered_fix may be forced to unfix all pages currently held by transaction to fix a new page.
15070  * all pages that are "less" than new page are marked to avoid deallocation and unfixed. then transaction is
15071  * blocked on latching new page, which may take a while, pages previously unfixed can be victimized.
15072  * when pgbuf_ordered_fix tries to fix back these pages, it will load them from disk and tadaa, the avoid
15073  * deallocation count is 0. so we expect the case.
15074  *
15075  * note: avoid deallocation count is supposed to prevent vacuum workers from deallocating these pages.
15076  * so, victimizing a bcb marked to avoid deallocation is not perfectly safe. however, the likelihood of
15077  * page really getting deallocated is ... almost zero. the alternative of avoiding victimization when
15078  * bcb's are marked for deallocation is much more complicated and poses serious risks (what if we leak
15079  * the counter and prevent bcb from being victimized indefinitely?). so, we prefer the existing risks.
15080  */
15082  "pgbuf_bcb_unregister_avoid_deallocation: bcb %p, vpid = %d|%d was probably victimized.\n",
15083  bcb, VPID_AS_ARGS (&bcb->vpid));
15084  break;
15085  }
15086  }
15087  while (!ATOMIC_CAS_32 (&bcb->count_fix_and_avoid_dealloc, count_crt, count_crt - 1));
15088 }
15089 
15090 /*
15091  * pgbuf_bcb_should_avoid_deallocation () - should avoid deallocating page?
15092  *
15093  * return : true/false
15094  * bcb (in) : bcb
15095  */
15096 STATIC_INLINE bool
15098 {
15099  assert (bcb->count_fix_and_avoid_dealloc >= 0);
15100  assert ((bcb->count_fix_and_avoid_dealloc & 0x00008000) == 0);
15102 }
15103 
15104 /*
15105  * pgbuf_bcb_check_and_reset_fix_and_avoid_dealloc () - check avoid deallocation is 0 and reset the whole bcb field.
15106  *
15107  * return : void
15108  * bcb (in) : bcb
15109  * file (in) : caller file
15110  * line (in) : caller line
15111  *
15112  * note: avoid deallocation is allowed to be non-zero due to pgbuf_ordered_fix and the possibility of victimizing its
15113  * bcb. avoid crashing the server and just issue a warning.
15114  */
15115 STATIC_INLINE void
15117 {
15119  {
15120  er_log_debug (file, line, "warning: bcb %p, vpid = %d|%d, should not have avoid deallocation marker.\n",
15121  bcb, VPID_AS_ARGS (&bcb->vpid));
15122  }
15123  bcb->count_fix_and_avoid_dealloc = 0;
15124 }
15125 
15126 /*
15127  * pgbuf_bcb_register_fix () - register page fix
15128  *
15129  * return : void
15130  * bcb (in) : bcb
15131  */
15132 STATIC_INLINE void
15134 {
15135  /* note: we only register to detect hot pages. once we hit the threshold, we are no longer required to fix it. */
15137  {
15138 #if !defined (NDEBUG)
15139  int newval =
15140 #endif /* !NDEBUG */
15141  ATOMIC_INC_32 (&bcb->count_fix_and_avoid_dealloc, 1 << PGBUF_BCB_COUNT_FIX_SHIFT_BITS);
15142  assert (newval >= (1 << PGBUF_BCB_COUNT_FIX_SHIFT_BITS));
15144  }
15145 }
15146 
15147 /*
15148  * pgbuf_bcb_is_hot () - is bcb hot (was fixed more then threshold times?)
15149  *
15150  * return : true/false
15151  * bcb (in) : bcb
15152  */
15153 STATIC_INLINE bool
15155 {
15156  assert (bcb->count_fix_and_avoid_dealloc >= 0);
15158 }
15159 
15160 /*
15161  * pgbuf_lfcq_add_lru_with_victims () - add lru list to queue of lists that can be victimized. this queue was designed
15162  * so victimizers can find a list with victims quickly without iterating through
15163  * many lists that are full.
15164  *
15165  * return : true if list was added, false if it was already added by someone else.
15166  * lru_list (in) : lru list
15167  */
15168 STATIC_INLINE bool
15170 {
15171  int old_flags = lru_list->flags;
15172 
15173  if (old_flags & PGBUF_LRU_VICTIM_LFCQ_FLAG)
15174  {
15175  /* already added. */
15176  return false;
15177  }
15178 
15179  /* use compare & swap because we cannot allow two threads adding same list in queue */
15180  if (ATOMIC_CAS_32 (&lru_list->flags, old_flags, old_flags | PGBUF_LRU_VICTIM_LFCQ_FLAG))
15181  {
15182  /* add to queues. we keep private and shared lists separated. */
15183  if (PGBUF_IS_PRIVATE_LRU_INDEX (lru_list->index))
15184  {
15185  /* private list */
15186  if (pgbuf_Pool.private_lrus_with_victims->produce (lru_list->index))
15187  {
15188  return true;
15189  }
15190  }
15191  else
15192  {
15193  /* shared list */
15194  if (pgbuf_Pool.shared_lrus_with_victims->produce (lru_list->index))
15195  {
15196  return true;
15197  }
15198  }
15199  /* clear the flag */
15200  lru_list->flags &= ~PGBUF_LRU_VICTIM_LFCQ_FLAG;
15201  }
15202 
15203  /* not added */
15204  return false;
15205 }
15206 
15207 /*
15208  * pgbuf_lfcq_get_victim_from_private_lru () - get a victim from a private list in lock-free queues.
15209  *
15210  * return : victim or NULL
15211  * thread_p (in) : thread entry
15212  * restricted (in) : true if victimizing is restricted to big private lists
15213  */
15214 static PGBUF_BCB *
15216 {
15217 #define PERF(id) if (detailed_perf) perfmon_inc_stat (thread_p, id)
15218 
15219  int lru_idx;
15220  PGBUF_LRU_LIST *lru_list;
15221  PGBUF_BCB *victim = NULL;
15223  bool added_back = false;
15224 
15225  if (pgbuf_Pool.private_lrus_with_victims == NULL)
15226  {
15227  return NULL;
15228  }
15229  assert (pgbuf_Pool.big_private_lrus_with_victims != NULL);
15230 
15231  if (pgbuf_Pool.big_private_lrus_with_victims->consume (lru_idx))
15232  {
15233  /* prioritize big lists */
15236  }
15237  else
15238  {
15239  if (restricted)
15240  {
15241  return NULL;
15242  }
15244  if (!pgbuf_Pool.private_lrus_with_victims->consume (lru_idx))
15245  {
15246  /* empty handed */
15248  return NULL;
15249  }
15250  }
15251  assert (PGBUF_IS_PRIVATE_LRU_INDEX (lru_idx));
15252 
15253  lru_list = PGBUF_GET_LRU_LIST (lru_idx);
15255  && PGBUF_LRU_LIST_COUNT (lru_list) > 2 * lru_list->quota && lru_list->count_vict_cand > 1)
15256  {
15257  /* add big private lists back immediately */
15258  if (pgbuf_Pool.big_private_lrus_with_victims->produce (lru_idx))
15259  {
15260  added_back = true;
15261  }
15262  }
15263 
15264  /* get victim from list */
15265  victim = pgbuf_get_victim_from_lru_list (thread_p, lru_idx);
15267 
15268  if (added_back)
15269  {
15270  /* already added back to queue */
15271  return victim;
15272  }
15273 
15274  if (lru_list->count_vict_cand > 0 && PGBUF_LRU_LIST_IS_OVER_QUOTA (lru_list))
15275  {
15276  if (pgbuf_Pool.private_lrus_with_victims->produce (lru_idx))
15277  {
15278  return victim;
15279  }
15280  }
15281 
15282  /* we're not adding the list back to the queue... so we need to reflect that in the list flags. next time when a new
15283  * candidate is added, lru list should also be added to the queue.
15284  *
15285  * note: we can have a race here. candidates are 0 now and incremented before we manage to change the victim
15286  * counter. we should not worry that much, the list will be added by pgbuf_adjust_quotas eventually.
15287  */
15288  assert ((lru_list->flags & PGBUF_LRU_VICTIM_LFCQ_FLAG) != 0);
15289  /* note: we are not using an atomic operation here, because this is the only flag and we are certain no one else
15290  * changes it from set to cleared. however, if more flags are added, or more cases that should clear the flag,
15291  * then consider replacing with some atomic operation. */
15292  lru_list->flags &= ~PGBUF_LRU_VICTIM_LFCQ_FLAG;
15293 
15294  return victim;
15295 
15296 #undef PERF
15297 }
15298 
15299 /*
15300  * pgbuf_lfcq_get_victim_from_shared_lru () - get a victim from a shared list in lock-free queues.
15301  *
15302  * return : victim or NULL
15303  * thread_p (in) : thread entry
15304  * multi_threaded (in) : true if multi-threaded system
15305  */
15306 static PGBUF_BCB *
15307 pgbuf_lfcq_get_victim_from_shared_lru (THREAD_ENTRY * thread_p, bool multi_threaded)
15308 {
15309 #define PERF(id) if (detailed_perf) perfmon_inc_stat (thread_p, id)
15310 
15311  int lru_idx;
15312  PGBUF_LRU_LIST *lru_list;
15313  PGBUF_BCB *victim = NULL;
15315 
15317 
15318  if (!pgbuf_Pool.shared_lrus_with_victims->consume (lru_idx))
15319  {
15320  /* no list has candidates! */
15322  return NULL;
15323  }
15324  /* popped a list with victim candidates from queue */
15325  assert (PGBUF_IS_SHARED_LRU_INDEX (lru_idx));
15326 
15327  lru_list = PGBUF_GET_LRU_LIST (lru_idx);
15328  victim = pgbuf_get_victim_from_lru_list (thread_p, lru_idx);
15330 
15331  /* no victim found in first step, but flush thread ran and candidates can be found, try again */
15332  if (victim == NULL && multi_threaded == false && lru_list->count_vict_cand > 0)
15333  {
15334  victim = pgbuf_get_victim_from_lru_list (thread_p, lru_idx);
15335  PERF (victim != NULL ? PSTAT_PB_VICTIM_SHARED_LRU_SUCCESS : PSTAT_PB_VICTIM_SHARED_LRU_FAIL);
15336  }
15337 
15338  if ((multi_threaded || victim != NULL) && lru_list->count_vict_cand > 0)
15339  {
15340  /* add lru list back to queue */
15341  if (pgbuf_Pool.shared_lrus_with_victims->produce (lru_idx))
15342  {
15343  return victim;
15344  }
15345  else
15346  {
15347  /* we couldn't add to queue. it usually does not happen, but a consumer can be preempted for a long time,
15348  * temporarily creating the impression that queue is full. it will be added later, when a new victim
15349  * candidate shows up or when adjust quota checks it. */
15350  /* fall through */
15351  }
15352  }
15353 
15354  /* we're not adding the list back to the queue... so we need to reflect that in the list flags. next time when a new
15355  * candidate is added, lru list should also be added to the queue.
15356  *
15357  * note: we can have a race here. candidates are 0 now and incremented before we manage to change the victim
15358  * counter. we should not worry that much, the list will be added by pgbuf_adjust_quotas eventually.
15359  */
15360  assert ((lru_list->flags & PGBUF_LRU_VICTIM_LFCQ_FLAG) != 0);
15361  /* note: we are not using an atomic operation here, because this is the only flag and we are certain no one else
15362  * changes it from set to cleared. however, if more flags are added, or more cases that should clear the flag,
15363  * then consider replacing with some atomic operation. */
15364  lru_list->flags &= ~PGBUF_LRU_VICTIM_LFCQ_FLAG;
15365 
15366  return victim;
15367 
15368 #undef PERF
15369 }
15370 
15371 /*
15372  * pgbuf_lru_list_from_bcb () - get lru list of bcb
15373  *
15374  * return : lru list
15375  * bcb (in) : bcb
15376  */
15379 {
15380  assert (PGBUF_IS_BCB_IN_LRU (bcb));
15381 
15383 }
15384 
15385 /*
15386  * pgbuf_bcb_register_hit_for_lru () - register hit when bcb is unfixed for its current lru.
15387  *
15388  * return : void
15389  * bcb (in) : BCB
15390  */
15391 STATIC_INLINE void
15393 {
15394  assert (PGBUF_IS_BCB_IN_LRU (bcb));
15395 
15396  if (bcb->hit_age < pgbuf_Pool.quota.adjust_age)
15397  {
15398  pgbuf_Pool.monitor.lru_hits[pgbuf_bcb_get_lru_index (bcb)]++;
15399  bcb->hit_age = pgbuf_Pool.quota.adjust_age;
15400  }
15401 }
15402 
15403 /*
15404  * pgbuf_is_io_stressful () - is io stressful (are pages waiting for victims?)
15405  *
15406  * return : true/false
15407  */
15408 bool
15410 {
15411 #if defined (SERVER_MODE)
15412  /* we consider the IO stressful if threads end up waiting for victims */
15413  return !pgbuf_Pool.direct_victims.waiter_threads_low_priority->is_empty ();
15414 #else /* !SERVER_MODE */
15415  return false;
15416 #endif /* !SERVER_MODE */
15417 }
15418 
15419 /*
15420  * pgbuf_is_hit_ratio_low () - is page buffer hit ratio low? currently target is set to 99.9%.
15421  *
15422  * return : true/false
15423  */
15424 STATIC_INLINE bool
15426 {
15427 #define PGBUF_MIN_VICTIM_REQ 10 /* set a minimum number of requests */
15428 #define PGBUF_DESIRED_HIT_VS_MISS_RATE 1000 /* 99.9% hit ratio */
15429 
15430  return (pgbuf_Pool.monitor.lru_victim_req_cnt > PGBUF_MIN_VICTIM_REQ
15432 
15433 #undef PGBUF_DESIRED_HIT_VS_MISS_RATE
15434 #undef PGBUF_MIN_VICTIM_REQ
15435 }
15436 
15437 #if defined (SERVER_MODE)
15438 /*
15439  * pgbuf_bcbmon_lock () - monitor and lock bcb mutex
15440  *
15441  * return : void
15442  * bcb (in) : BCB to lock
15443  * caller_line (in) : caller line
15444  */
15445 static void
15446 pgbuf_bcbmon_lock (PGBUF_BCB * bcb, int caller_line)
15447 {
15449  PGBUF_MONITOR_BCB_MUTEX *monitor_bcb_mutex = &pgbuf_Pool.monitor.bcb_locks[index];
15450 
15451  assert_release (pgbuf_Monitor_locks);
15452 
15453  if (monitor_bcb_mutex->bcb != NULL)
15454  {
15455  /* already have a bcb mutex. we cannot lock another one unless try lock is used. */
15457  }
15458  if (monitor_bcb_mutex->bcb_second != NULL)
15459  {
15460  /* already have a bcb mutex. we cannot lock another one unless try lock is used. */
15462  }
15463  if (bcb->owner_mutex == index)
15464  {
15465  /* double lock */
15467  }
15468  /* ok, we can lock */
15469  (void) pthread_mutex_lock (&bcb->mutex);
15470  if (bcb->owner_mutex >= 0)
15471  {
15472  /* somebody else has mutex? */
15474  }
15475  monitor_bcb_mutex->bcb = bcb;
15476  monitor_bcb_mutex->line = caller_line;
15477  bcb->owner_mutex = index;
15478 }
15479 
15480 /*
15481  * pgbuf_bcbmon_trylock () - monitor and try locking bcb mutex. do not wait if it is already locked
15482  *
15483  * return : try lock result
15484  * bcb (in) : BCB to lock
15485  * caller_line (in) : caller line
15486  */
15487 static int
15488 pgbuf_bcbmon_trylock (PGBUF_BCB * bcb, int caller_line)
15489 {
15491  int rv;
15492  PGBUF_MONITOR_BCB_MUTEX *monitor_bcb_mutex = &pgbuf_Pool.monitor.bcb_locks[index];
15493 
15494  assert_release (pgbuf_Monitor_locks);
15495 
15496  if (bcb->owner_mutex == index)
15497  {
15498  /* double lock */
15500  }
15501  if (monitor_bcb_mutex->bcb != NULL && monitor_bcb_mutex->bcb_second != NULL)
15502  {
15503  /* two bcb's are already locked. */
15505  }
15506  if (monitor_bcb_mutex->bcb != NULL && monitor_bcb_mutex->bcb == bcb)
15507  {
15508  /* same bcb is already locked?? */
15510  }
15511  /* try lock */
15512  rv = pthread_mutex_trylock (&bcb->mutex);
15513  if (rv == 0)
15514  {
15515  /* success. monitor it. */
15516  if (monitor_bcb_mutex->bcb == NULL)
15517  {
15518  monitor_bcb_mutex->bcb = bcb;
15519  monitor_bcb_mutex->line = caller_line;
15520  }
15521  else
15522  {
15523  monitor_bcb_mutex->bcb_second = bcb;
15524  monitor_bcb_mutex->line_second = caller_line;
15525  }
15526  bcb->owner_mutex = index;
15527  }
15528  else
15529  {
15530  /* failed */
15531  }
15532  return rv;
15533 }
15534 
15535 /*
15536  * pgbuf_bcbmon_unlock () - monitor and unlock BCB mutex
15537  *
15538  * return : void
15539  * bcb (in) : BCB to unlock
15540  */
15541 static void
15542 pgbuf_bcbmon_unlock (PGBUF_BCB * bcb)
15543 {
15545  PGBUF_MONITOR_BCB_MUTEX *monitor_bcb_mutex = &pgbuf_Pool.monitor.bcb_locks[index];
15546 
15547  assert_release (pgbuf_Monitor_locks);
15548 
15549  /* should be monitored */
15550  if (bcb->owner_mutex != index)
15551  {
15552  /* I did not lock it?? */
15554  }
15555  bcb->owner_mutex = -1;
15556 
15557  if (monitor_bcb_mutex->bcb == bcb)
15558  {
15559  /* remove bcb from monitor. */
15560  monitor_bcb_mutex->bcb = NULL;
15561  }
15562  else if (monitor_bcb_mutex->bcb_second == bcb)
15563  {
15564  /* remove bcb from monitor */
15565  monitor_bcb_mutex->bcb_second = NULL;
15566  }
15567  else
15568  {
15569  /* I did not monitor it?? */
15571  }
15572 
15573  pthread_mutex_unlock (&bcb->mutex);
15574 }
15575 
15576 /*
15577  * pgbuf_bcbmon_check_own () - check current thread owns bcb mutex.
15578  *
15579  * return : void
15580  * bcb (in) : BCB
15581  *
15582  * note: monitoring page buffer locks must be activated
15583  */
15584 static void
15585 pgbuf_bcbmon_check_own (PGBUF_BCB * bcb)
15586 {
15588  PGBUF_MONITOR_BCB_MUTEX *monitor_bcb_mutex = &pgbuf_Pool.monitor.bcb_locks[index];
15589 
15590  assert_release (pgbuf_Monitor_locks);
15591 
15592  if (bcb->owner_mutex != index)
15593  {
15594  /* not owned */
15596  }
15597  if (monitor_bcb_mutex->bcb != bcb && monitor_bcb_mutex->bcb_second != bcb)
15598  {
15599  /* not monitored? */
15601  }
15602 }
15603 
15604 /*
15605  * pgbuf_bcbmon_check_mutex_leaks () - check for mutex leaks. must be called on exit points where no BCB should be
15606  * locked.
15607  *
15608  * note: only works if page buffer lock monitoring is enabled.
15609  */
15610 static void
15611 pgbuf_bcbmon_check_mutex_leaks (void)
15612 {
15614  PGBUF_MONITOR_BCB_MUTEX *monitor_bcb_mutex = &pgbuf_Pool.monitor.bcb_locks[index];
15615 
15616  assert_release (pgbuf_Monitor_locks);
15617 
15618  if (monitor_bcb_mutex->bcb != NULL)
15619  {
15621  }
15622  if (monitor_bcb_mutex->bcb_second != NULL)
15623  {
15625  }
15626 }
15627 #endif /* SERVER_MODE */
15628 
15629 /*
15630  * pgbuf_flags_mask_sanity_check () - check flags mask do not overlap!
15631  *
15632  */
15633 static void
15635 {
15636  /* sanity check: make sure the masks for bcb flags, zone and lru index do not overlap. this should be immediately
15637  * caught, so abort the server whenever happens. */
15639  {
15641  }
15643  {
15645  }
15646  if (PGBUF_ZONE_MASK & PGBUF_LRU_INDEX_MASK)
15647  {
15649  }
15651  {
15653  }
15654 }
15655 
15656 /*
15657  * pgbuf_lru_sanity_check () - check lru list is sane
15658  *
15659  * return : void
15660  * lru (in) : lru list
15661  */
15662 static void
15664 {
15665 #if !defined (NDEBUG)
15666  if (lru->top == NULL)
15667  {
15668  /* empty list */
15669  assert (lru->count_lru1 == 0 && lru->count_lru2 == 0 && lru->count_lru3 == 0 && lru->bottom == NULL
15670  && lru->bottom_1 == NULL && lru->bottom_2 == NULL);
15671  return;
15672  }
15673 
15674  /* not empty */
15675  assert (lru->bottom != NULL);
15676  assert (lru->count_lru1 != 0 || lru->count_lru2 != 0 || lru->count_lru3 != 0);
15677 
15678  /* zone 1 */
15679  assert ((lru->count_lru1 == 0) == (lru->bottom_1 == NULL));
15680  if (lru->bottom_1 != NULL)
15681  {
15684  if (lru->bottom_1->next_BCB != NULL)
15685  {
15687  {
15688  assert (false);
15689  }
15691  {
15692  assert (lru->count_lru2 != 0 && lru->bottom_2 != NULL);
15693  }
15694  else
15695  {
15696  assert (lru->count_lru3 != 0);
15697  }
15698  }
15699  else
15700  {
15701  assert (lru->count_lru2 == 0 && lru->count_lru3 == 0 && lru->bottom_2 == NULL
15702  && lru->bottom == lru->bottom_1);
15703  }
15704  }
15705 
15706  /* zone 2 */
15707  assert ((lru->count_lru2 == 0) == (lru->bottom_2 == NULL));
15708  if (lru->bottom_2 != NULL)
15709  {
15711  assert (lru->bottom_2 != NULL || pgbuf_bcb_get_zone (lru->top) == PGBUF_LRU_2_ZONE);
15712  if (lru->bottom_2->next_BCB != NULL)
15713  {
15715  {
15716  assert (false);
15717  }
15719  {
15720  assert (false);
15721  }
15722  else if (lru->count_lru3 == 0)
15723  {
15724  assert (false);
15725  }
15726  }
15727  else
15728  {
15729  assert (lru->count_lru3 == 0 && lru->bottom == lru->bottom_2);
15730  }
15731  }
15732 #endif /* !NDEBUG */
15733 }
15734 
15735 // TODO: find a better place for this, but not log_impl.h
15736 /*
15737  * pgbuf_find_current_wait_msecs - find waiting times for current transaction
15738  *
15739  * return : wait_msecs...
15740  *
15741  * Note: Find the waiting time for the current transaction.
15742  */
15743 STATIC_INLINE int
15745 {
15746  LOG_TDES *tdes; /* Transaction descriptor */
15747  int tran_index;
15748 
15749  tran_index = LOG_FIND_THREAD_TRAN_INDEX (thread_p);
15750  tdes = LOG_FIND_TDES (tran_index);
15751  if (tdes != NULL)
15752  {
15753  return tdes->wait_msecs;
15754  }
15755  else
15756  {
15757  return 0;
15758  }
15759 }
15760 
15761 /*
15762  * pgbuf_get_page_flush_interval () - setup page flush daemon period based on system parameter
15763  */
15764 void
15766 {
15767  int page_flush_interval_msecs = prm_get_integer_value (PRM_ID_PAGE_BG_FLUSH_INTERVAL_MSECS);
15768 
15769  assert (page_flush_interval_msecs >= 0);
15770 
15771  if (page_flush_interval_msecs > 0)
15772  {
15773  // if page_flush_interval_msecs > 0 (zero) then loop for fixed interval
15774  is_timed_wait = true;
15775  period = std::chrono::milliseconds (page_flush_interval_msecs);
15776  }
15777  else
15778  {
15779  // infinite wait
15780  is_timed_wait = false;
15781  }
15782 }
15783 
15784 // *INDENT-OFF*
15785 #if defined (SERVER_MODE)
15786 static void
15787 pgbuf_page_maintenance_execute (cubthread::entry & thread_ref)
15788 {
15789  if (!BO_IS_SERVER_RESTARTED ())
15790  {
15791  // wait for boot to finish
15792  return;
15793  }
15794 
15795  /* page buffer maintenance thread adjust quota's based on thread activity. */
15796  pgbuf_adjust_quotas (&thread_ref);
15797 
15798  /* search lists and assign victims directly */
15799  pgbuf_direct_victims_maintenance (&thread_ref);
15800 }
15801 #endif /* SERVER_MODE */
15802 
15803 #if defined (SERVER_MODE)
15804 // class pgbuf_page_flush_daemon_task
15805 //
15806 // description:
15807 // page flush daemon task
15808 //
15809 class pgbuf_page_flush_daemon_task : public cubthread::entry_task
15810 {
15811  private:
15812  PERF_UTIME_TRACKER m_perf_track;
15813 
15814  public:
15815  pgbuf_page_flush_daemon_task ()
15816  {
15817  PERF_UTIME_TRACKER_START (NULL, &m_perf_track);
15818  }
15819 
15820  void execute (cubthread::entry & thread_ref) override
15821  {
15822  if (!BO_IS_SERVER_RESTARTED ())
15823  {
15824  // wait for boot to finish
15825  return;
15826  }
15827 
15828  // did not timeout, someone requested flush... run at least once
15829  bool force_one_run = pgbuf_Page_flush_daemon->was_woken_up ();
15830  bool stop_iteration = false;
15831 
15832  /* flush pages as long as necessary */
15833  while (force_one_run || pgbuf_keep_victim_flush_thread_running ())
15834  {
15836  &stop_iteration);
15837  force_one_run = false;
15838  if (stop_iteration)
15839  {
15840  break;
15841  }
15842  }
15843 
15844  /* performance tracking */
15845  if (m_perf_track.is_perf_tracking)
15846  {
15847  /* register sleep time. */
15848  PERF_UTIME_TRACKER_TIME_AND_RESTART (&thread_ref, &m_perf_track, PSTAT_PB_FLUSH_SLEEP);
15849 
15850  /* update is_perf_tracking */
15851  m_perf_track.is_perf_tracking = perfmon_is_perf_tracking ();
15852  }
15853  else
15854  {
15855  /* update is_perf_tracking and start timer if it became true */
15856  PERF_UTIME_TRACKER_START (&thread_ref, &m_perf_track);
15857  }
15858  }
15859 };
15860 #endif /* SERVER_MODE */
15861 
15862 #if defined (SERVER_MODE)
15863 static void
15864 pgbuf_page_post_flush_execute (cubthread::entry & thread_ref)
15865 {
15866  if (!BO_IS_SERVER_RESTARTED ())
15867  {
15868  // wait for boot to finish
15869  return;
15870  }
15871 
15872  /* assign flushed pages */
15873  if (pgbuf_assign_flushed_pages (&thread_ref))
15874  {
15875  /* reset daemon looper and be prepared to start over */
15876  pgbuf_Page_post_flush_daemon->reset_looper ();
15877  }
15878 }
15879 #endif /* SERVER_MODE */
15880 
15881 #if defined (SERVER_MODE)
15882 // class pgbuf_flush_control_daemon_task
15883 //
15884 // description:
15885 // flush control daemon task
15886 //
15887 class pgbuf_flush_control_daemon_task : public cubthread::entry_task
15888 {
15889  private:
15890  struct timeval m_end;
15891  bool m_first_run;
15892 
15893  public:
15894  pgbuf_flush_control_daemon_task ()
15895  : m_end ({0, 0})
15896  , m_first_run (true)
15897  {
15898  }
15899 
15900  int initialize ()
15901  {
15903  }
15904 
15905  void execute (cubthread::entry & thread_ref) override
15906  {
15907  if (!BO_IS_SERVER_RESTARTED ())
15908  {
15909  // wait for boot to finish
15910  return;
15911  }
15912 
15913  if (m_first_run)
15914  {
15915  gettimeofday (&m_end, NULL);
15916  m_first_run = false;
15917  return;
15918  }
15919 
15920  struct timeval begin, diff;
15921  int token_gen, token_consumed;
15922 
15923  gettimeofday (&begin, NULL);
15924  perfmon_diff_timeval (&diff, &m_end, &begin);
15925 
15926  int64_t diff_usec = diff.tv_sec * 1000000LL + diff.tv_usec;
15927  fileio_flush_control_add_tokens (&thread_ref, diff_usec, &token_gen, &token_consumed);
15928 
15929  gettimeofday (&m_end, NULL);
15930  }
15931 
15932  void retire (void) override
15933  {
15935  delete this;
15936  }
15937 };
15938 #endif /* SERVER_MODE */
15939 
15940 #if defined (SERVER_MODE)
15941 /*
15942  * pgbuf_page_maintenance_daemon_init () - initialize page maintenance daemon thread
15943  */
15944 void
15945 pgbuf_page_maintenance_daemon_init ()
15946 {
15947  assert (pgbuf_Page_maintenance_daemon == NULL);
15948 
15949  cubthread::looper looper = cubthread::looper (std::chrono::milliseconds (100));
15950  cubthread::entry_callable_task *daemon_task = new cubthread::entry_callable_task (pgbuf_page_maintenance_execute);
15951 
15952  pgbuf_Page_maintenance_daemon = cubthread::get_manager ()->create_daemon (looper, daemon_task,
15953  "pgbuf_page_maintenance");
15954 }
15955 #endif /* SERVER_MODE */
15956 
15957 #if defined (SERVER_MODE)
15958 /*
15959  * pgbuf_page_flush_daemon_init () - initialize page flush daemon thread
15960  */
15961 void
15962 pgbuf_page_flush_daemon_init ()
15963 {
15964  assert (pgbuf_Page_flush_daemon == NULL);
15965 
15967  pgbuf_page_flush_daemon_task *daemon_task = new pgbuf_page_flush_daemon_task ();
15968 
15969  pgbuf_Page_flush_daemon = cubthread::get_manager ()->create_daemon (looper, daemon_task, "pgbuf_page_flush");
15970 }
15971 #endif /* SERVER_MODE */
15972 
15973 #if defined (SERVER_MODE)
15974 /*
15975  * pgbuf_page_post_flush_daemon_init () - initialize page post flush daemon thread
15976  */
15977 void
15978 pgbuf_page_post_flush_daemon_init ()
15979 {
15980  assert (pgbuf_Page_post_flush_daemon == NULL);
15981 
15982  std::array<cubthread::delta_time, 3> looper_interval {{
15983  std::chrono::milliseconds (1),
15984  std::chrono::milliseconds (10),
15985  std::chrono::milliseconds (100)
15986  }};
15987 
15988  cubthread::looper looper = cubthread::looper (looper_interval);
15989  cubthread::entry_callable_task *daemon_task = new cubthread::entry_callable_task (pgbuf_page_post_flush_execute);
15990 
15991  pgbuf_Page_post_flush_daemon = cubthread::get_manager ()->create_daemon (looper, daemon_task,
15992  "pgbuf_page_post_flush");
15993 }
15994 #endif /* SERVER_MODE */
15995 
15996 #if defined (SERVER_MODE)
15997 /*
15998  * pgbuf_flush_control_daemon_init () - initialize flush control daemon thread
15999  */
16000 void
16001 pgbuf_flush_control_daemon_init ()
16002 {
16003  assert (pgbuf_Flush_control_daemon == NULL);
16004 
16005  pgbuf_flush_control_daemon_task *daemon_task = new pgbuf_flush_control_daemon_task ();
16006 
16007  if (daemon_task->initialize () != NO_ERROR)
16008  {
16009  delete daemon_task;
16010  return;
16011  }
16012 
16013  cubthread::looper looper = cubthread::looper (std::chrono::milliseconds (50));
16014  pgbuf_Flush_control_daemon = cubthread::get_manager ()->create_daemon (looper, daemon_task,
16015  "pgbuf_flush_control");
16016 }
16017 #endif /* SERVER_MODE */
16018 
16019 #if defined (SERVER_MODE)
16020 /*
16021  * pgbuf_daemons_init () - initialize page buffer daemon threads
16022  */
16023 void
16024 pgbuf_daemons_init ()
16025 {
16026  pgbuf_page_maintenance_daemon_init ();
16027  pgbuf_page_flush_daemon_init ();
16028  pgbuf_page_post_flush_daemon_init ();
16029  pgbuf_flush_control_daemon_init ();
16030 }
16031 #endif /* SERVER_MODE */
16032 
16033 #if defined (SERVER_MODE)
16034 /*
16035  * pgbuf_daemons_destroy () - destroy page buffer daemon threads
16036  */
16037 void
16038 pgbuf_daemons_destroy ()
16039 {
16040  cubthread::get_manager ()->destroy_daemon (pgbuf_Page_maintenance_daemon);
16041  cubthread::get_manager ()->destroy_daemon (pgbuf_Page_flush_daemon);
16042  cubthread::get_manager ()->destroy_daemon (pgbuf_Page_post_flush_daemon);
16043  cubthread::get_manager ()->destroy_daemon (pgbuf_Flush_control_daemon);
16044 }
16045 #endif /* SERVER_MODE */
16046 
16047 void
16048 pgbuf_daemons_get_stats (UINT64 * stats_out)
16049 {
16050 #if defined (SERVER_MODE)
16051  UINT64 *statsp = stats_out;
16052 
16053  if (pgbuf_Page_flush_daemon != NULL)
16054  {
16055  pgbuf_Page_flush_daemon->get_stats (statsp);
16056  }
16058 
16059  if (pgbuf_Page_post_flush_daemon != NULL)
16060  {
16061  pgbuf_Page_post_flush_daemon->get_stats (statsp);
16062  }
16064 
16065  if (pgbuf_Flush_control_daemon != NULL)
16066  {
16067  pgbuf_Flush_control_daemon->get_stats (statsp);
16068  }
16070 
16071  if (pgbuf_Page_maintenance_daemon != NULL)
16072  {
16073  pgbuf_Page_maintenance_daemon->get_stats (statsp);
16074  }
16075 #endif
16076 }
16077 // *INDENT-ON*
16078 
16079 /*
16080  * pgbuf_is_page_flush_daemon_available () - check if page flush daemon is available
16081  * return: true if page flush daemon is available, false otherwise
16082  */
16083 static bool
16085 {
16086 #if defined (SERVER_MODE)
16087  return pgbuf_Page_flush_daemon != NULL;
16088 #else
16089  return false;
16090 #endif
16091 }
16092 
16093 static bool
16095 {
16096  return lsa == PGBUF_TEMP_LSA;
16097 }
16098 
16099 static void
16101 {
16102  io_page->prv.lsa = PGBUF_TEMP_LSA;
16103 
16104  FILEIO_PAGE_WATERMARK *prv2 = fileio_get_page_watermark_pos (io_page, page_size);
16105  prv2->lsa = PGBUF_TEMP_LSA;
16106 }
16107 
16108 /*
16109  * pgbuf_scan_bcb_table () - scan bcb table to count snapshot data with no bcb mutex
16110  */
16111 static void
16113 {
16114  int bufid;
16115  int flags;
16116  PGBUF_BCB *bufptr;
16117  PAGE_TYPE page_type;
16118  VPID vpid;
16119  PGBUF_STATUS_SNAPSHOT *show_status_snapshot = &pgbuf_Pool.show_status_snapshot;
16120 
16121  memset (show_status_snapshot, 0, sizeof (PGBUF_STATUS_SNAPSHOT));
16122 
16123  for (bufid = 0; bufid < pgbuf_Pool.num_buffers; bufid++)
16124  {
16125  bufptr = PGBUF_FIND_BCB_PTR (bufid);
16126  page_type = (PAGE_TYPE) (bufptr->iopage_buffer->iopage.prv.ptype);
16127  vpid = bufptr->vpid;
16128  flags = bufptr->flags;
16129 
16130  if ((flags & PGBUF_BCB_DIRTY_FLAG) != 0)
16131  {
16132  show_status_snapshot->dirty_pages++;
16133  }
16134  else
16135  {
16136  show_status_snapshot->clean_pages++;
16137  }
16138 
16139  if ((flags & PGBUF_INVALID_ZONE) != 0)
16140  {
16141  show_status_snapshot->free_pages++;
16142  continue;
16143  }
16144 
16145  if ((PGBUF_GET_ZONE (flags) == PGBUF_LRU_3_ZONE) && (flags & PGBUF_BCB_DIRTY_FLAG) != 0)
16146  {
16147  show_status_snapshot->victim_candidate_pages++;
16148  }
16149 
16150  /* count temporary and permanent pages */
16151  if (pgbuf_is_temporary_volume (vpid.volid) == true)
16152  {
16153  show_status_snapshot->num_temp_pages++;
16154 
16155  assert ((page_type == PAGE_UNKNOWN) || /* dealloc pages, we don't know page type */
16156  (page_type == PAGE_AREA) || (page_type == PAGE_QRESULT) || /* temporary page type */
16157  (page_type == PAGE_EHASH) || (page_type == PAGE_VOLHEADER) /* It can be temporary or permanent pages */
16158  || (page_type == PAGE_VOLBITMAP) || (page_type == PAGE_FTAB)); /* It can be temporary or permanent pages */
16159  }
16160  else
16161  {
16162  switch (page_type)
16163  {
16164  case PAGE_BTREE:
16165  show_status_snapshot->num_index_pages++;
16166  break;
16167  case PAGE_OVERFLOW:
16168  case PAGE_HEAP:
16169  show_status_snapshot->num_data_pages++;
16170  break;
16171  case PAGE_CATALOG:
16172  case PAGE_VOLBITMAP:
16173  case PAGE_VOLHEADER:
16174  case PAGE_FTAB:
16175  case PAGE_EHASH:
16176  case PAGE_VACUUM_DATA:
16177  case PAGE_DROPPED_FILES:
16178  show_status_snapshot->num_system_pages++;
16179  break;
16180  default:
16181  /* dealloc pages, we don't know page type */
16182  assert (page_type == PAGE_UNKNOWN);
16183  break;
16184  }
16185  }
16186  }
16187 }
16188 
16189 /*
16190  * pgbuf_start_scan () - start scan function for show page buffer status
16191  * return: NO_ERROR, or ER_code
16192  *
16193  * thread_p(in):
16194  * type (in):
16195  * arg_values(in):
16196  * arg_cnt(in):
16197  * ptr(in/out):
16198  */
16199 int
16200 pgbuf_start_scan (THREAD_ENTRY * thread_p, int type, DB_VALUE ** arg_values, int arg_cnt, void **ptr)
16201 {
16203  const int num_cols = 19;
16204  time_t cur_time;
16205  int idx, i;
16206  int error = NO_ERROR;
16207  DB_VALUE *vals = NULL, db_val;
16208  unsigned long long delta, hit_delta, request_delta;
16209  double time_delta;
16210  double hit_rate;
16211  DB_DATA_STATUS data_status;
16212  PGBUF_STATUS status_accumulated = { };
16213  PGBUF_STATUS_SNAPSHOT *status_snapshot = &pgbuf_Pool.show_status_snapshot;
16214  PGBUF_STATUS_OLD *status_old = &pgbuf_Pool.show_status_old;
16215 
16216  *ptr = NULL;
16217 
16218 #if defined(SERVER_MODE)
16219  (void) pthread_mutex_lock (&pgbuf_Pool.show_status_mutex);
16220 #endif
16221 
16223 
16224  for (i = 0; i <= MAX_NTRANS; i++)
16225  {
16226  status_accumulated.num_hit += pgbuf_Pool.show_status[i].num_hit;
16227  status_accumulated.num_page_request += pgbuf_Pool.show_status[i].num_page_request;
16228  status_accumulated.num_pages_created += pgbuf_Pool.show_status[i].num_pages_created;
16229  status_accumulated.num_pages_written += pgbuf_Pool.show_status[i].num_pages_written;
16230  status_accumulated.num_pages_read += pgbuf_Pool.show_status[i].num_pages_read;
16231  status_accumulated.num_flusher_waiting_threads += pgbuf_Pool.show_status[i].num_flusher_waiting_threads;
16232  }
16233 
16234  ctx = showstmt_alloc_array_context (thread_p, 1, num_cols);
16235  if (ctx == NULL)
16236  {
16237  error = er_errid ();
16238  return error;
16239  }
16240 
16241  vals = showstmt_alloc_tuple_in_context (thread_p, ctx);
16242  if (vals == NULL)
16243  {
16244  error = er_errid ();
16245  goto exit_on_error;
16246  }
16247 
16248  cur_time = time (NULL);
16249 
16250  time_delta = difftime (cur_time, status_old->print_out_time) + 0.0001; // avoid dividing by 0
16251 
16252  idx = 0;
16253 
16254  hit_rate = (status_accumulated.num_hit - status_old->num_hit) /
16255  ((status_accumulated.num_page_request - status_old->num_page_request) + 0.0000000000001);
16256  hit_rate = hit_rate * 100;
16257 
16258  db_make_double (&db_val, hit_rate);
16259  db_value_domain_init (&vals[idx], DB_TYPE_NUMERIC, 13, 10);
16260  error = numeric_db_value_coerce_to_num (&db_val, &vals[idx], &data_status);
16261  idx++;
16262  if (error != NO_ERROR)
16263  {
16264  goto exit_on_error;
16265  }
16266 
16267  delta = status_accumulated.num_hit - status_old->num_hit;
16268  db_make_bigint (&vals[idx], delta);
16269  idx++;
16270 
16271  delta = status_accumulated.num_page_request - status_old->num_page_request;
16272  db_make_bigint (&vals[idx], delta);
16273  idx++;
16274 
16275  db_make_int (&vals[idx], pgbuf_Pool.num_buffers);
16276  idx++;
16277 
16278  db_make_int (&vals[idx], PGBUF_IOPAGE_BUFFER_SIZE);
16279  idx++;
16280 
16281  db_make_int (&vals[idx], status_snapshot->free_pages);
16282  idx++;
16283 
16284  db_make_int (&vals[idx], status_snapshot->victim_candidate_pages);
16285  idx++;
16286 
16287  db_make_int (&vals[idx], status_snapshot->clean_pages);
16288  idx++;
16289 
16290  db_make_int (&vals[idx], status_snapshot->dirty_pages);
16291  idx++;
16292 
16293  db_make_int (&vals[idx], status_snapshot->num_index_pages);
16294  idx++;
16295 
16296  db_make_int (&vals[idx], status_snapshot->num_data_pages);
16297  idx++;
16298 
16299  db_make_int (&vals[idx], status_snapshot->num_system_pages);
16300  idx++;
16301 
16302  db_make_int (&vals[idx], status_snapshot->num_temp_pages);
16303  idx++;
16304 
16305  delta = status_accumulated.num_pages_created - status_old->num_pages_created;
16306  db_make_bigint (&vals[idx], delta);
16307  idx++;
16308 
16309  delta = status_accumulated.num_pages_written - status_old->num_pages_written;
16310  db_make_bigint (&vals[idx], delta);
16311  idx++;
16312 
16313  db_make_double (&db_val, delta / time_delta);
16314  db_value_domain_init (&vals[idx], DB_TYPE_NUMERIC, 20, 10);
16315  error = numeric_db_value_coerce_to_num (&db_val, &vals[idx], &data_status);
16316  idx++;
16317  if (error != NO_ERROR)
16318  {
16319  goto exit_on_error;
16320  }
16321 
16322  delta = status_accumulated.num_pages_read - status_old->num_pages_read;
16323  db_make_bigint (&vals[idx], delta);
16324  idx++;
16325 
16326  db_make_double (&db_val, delta / time_delta);
16327  db_value_domain_init (&vals[idx], DB_TYPE_NUMERIC, 20, 10);
16328  error = numeric_db_value_coerce_to_num (&db_val, &vals[idx], &data_status);
16329  idx++;
16330  if (error != NO_ERROR)
16331  {
16332  goto exit_on_error;
16333  }
16334 
16335  db_make_int (&vals[idx], status_accumulated.num_flusher_waiting_threads);
16336  idx++;
16337 
16338  assert (idx == num_cols);
16339 
16340  /* set now data to old data */
16341  status_old->num_hit = status_accumulated.num_hit;
16342  status_old->num_page_request = status_accumulated.num_page_request;
16343  status_old->num_pages_created = status_accumulated.num_pages_created;
16344  status_old->num_pages_written = status_accumulated.num_pages_written;
16345  status_old->num_pages_read = status_accumulated.num_pages_read;
16346  status_old->print_out_time = cur_time;
16347 
16348  *ptr = ctx;
16349 
16350 #if defined(SERVER_MODE)
16351  pthread_mutex_unlock (&pgbuf_Pool.show_status_mutex);
16352 #endif
16353 
16354  return NO_ERROR;
16355 
16356 exit_on_error:
16357 
16358  if (ctx != NULL)
16359  {
16360  showstmt_free_array_context (thread_p, ctx);
16361  }
16362 
16363 #if defined(SERVER_MODE)
16364  pthread_mutex_unlock (&pgbuf_Pool.show_status_mutex);
16365 #endif
16366 
16367  return error;
16368 }
#define MAX_PRIVATE_RATIO
PGLENGTH offset
Definition: recovery.h:201
int tick_lru_list
Definition: page_buffer.c:487
static std::size_t get_stats_value_count(void)
static int pgbuf_unlock_page(THREAD_ENTRY *thread_p, PGBUF_BUFFER_HASH *hash_anchor, const VPID *vpid, int need_hash_mutex)
Definition: page_buffer.c:7315
#define PGBUF_NEIGHBOR_POS(idx)
Definition: page_buffer.c:295
static PGBUF_BCB * pgbuf_get_victim_from_lru_list(THREAD_ENTRY *thread_p, const int lru_idx)
Definition: page_buffer.c:8494
char * PAGE_PTR
STATIC_INLINE FILEIO_PAGE_WATERMARK * fileio_get_page_watermark_pos(FILEIO_PAGE *io_page, PGLENGTH page_size)
Definition: file_io.h:202
unsigned int add_shared_lru_idx
Definition: page_buffer.c:684
#define PGBUF_IS_SHARED_LRU_INDEX(lru_idx)
Definition: page_buffer.c:937
unsigned int num_flusher_waiting_threads
Definition: page_buffer.c:376
PAGE_PTR pgbuf_fix_debug(THREAD_ENTRY *thread_p, const VPID *vpid, PAGE_FETCH_MODE fetch_mode, PGBUF_LATCH_MODE request_mode, PGBUF_LATCH_CONDITION condition, const char *caller_file, int caller_line)
Definition: page_buffer.c:1789
#define ER_LK_UNILATERALLY_ABORTED
Definition: error_code.h:130
PGBUF_STATUS * show_status
Definition: page_buffer.c:783
PGBUF_HOLDER * thrd_reserved_holder
Definition: page_buffer.c:747
PGBUF_BUFFER_LOCK * buf_lock_table
Definition: page_buffer.c:721
STATIC_INLINE void pgbuf_lru_advance_victim_hint(THREAD_ENTRY *thread_p, PGBUF_LRU_LIST *lru_list, PGBUF_BCB *bcb_prev_hint, PGBUF_BCB *bcb_new_hint, bool was_vict_count_updated)
#define PGBUF_TRAN_THRESHOLD_ACTIVITY
Definition: page_buffer.c:266
void disk_volheader_check_magic(THREAD_ENTRY *thread_p, const PAGE_PTR page_volheader)
#define ER_PAGE_LATCH_ABORTED
Definition: error_code.h:1074
DISK_ISVALID pgbuf_is_valid_page(THREAD_ENTRY *thread_p, const VPID *vpid, bool no_error, DISK_ISVALID(*fun)(const VPID *vpid, void *args), void *args)
bool logpb_need_wal(const LOG_LSA *lsa)
bool pgbuf_has_perm_pages_fixed(THREAD_ENTRY *thread_p)
#define ER_FAILED_ASSERTION
Definition: error_code.h:695
float * lru_victim_flush_priority_per_lru
Definition: page_buffer.c:678
int page_size
Definition: unloaddb.c:52
#define pgbuf_attach_watcher(...)
Definition: page_buffer.h:394
cubthread::entry * thread_get_thread_entry_info(void)
STATIC_INLINE void pgbuf_remove_from_lru_list(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr, PGBUF_LRU_LIST *lru_list)
Definition: page_buffer.c:9530
#define VACUUM_MAX_WORKER_COUNT
#define PGBUF_CHKPT_MAX_FLUSH_RATE
Definition: page_buffer.c:303
#define NO_ERROR
Definition: error_code.h:46
PGBUF_HOLDER_SET * next_set
Definition: page_buffer.c:467
static void pgbuf_lru_boost_bcb(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb)
Definition: page_buffer.c:9299
static PGBUF_BCB * pgbuf_get_victim(THREAD_ENTRY *thread_p)
Definition: page_buffer.c:8248
#define __attribute__(X)
Definition: porting.h:36
#define AOUT_HASH_IDX(vpid, list)
Definition: page_buffer.c:817
int heap_get_class_oid_from_page(THREAD_ENTRY *thread_p, PAGE_PTR page_p, OID *class_oid)
Definition: heap_file.c:18824
void er_stack_push(void)
int tick_lru3
Definition: page_buffer.c:489
#define PGBUF_BCB_COUNT_FIX_SHIFT_BITS
Definition: page_buffer.c:258
#define PGBUF_LRU_LIST_IS_ONE_TWO_OVER_QUOTA(list)
Definition: page_buffer.c:941
int pgbuf_rv_new_page_undo(THREAD_ENTRY *thread_p, LOG_RCV *rcv)
#define PGBUF_AOUT_NOT_FOUND
Definition: page_buffer.c:269
PERF_HOLDER_LATCH
Definition: perf_monitor.h:171
static int pgbuf_put_bcb_into_invalid_list(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr)
Definition: page_buffer.c:8136
void log_append_undoredo_data2(THREAD_ENTRY *thread_p, LOG_RCVINDEX rcvindex, const VFID *vfid, PAGE_PTR pgptr, PGLENGTH offset, int undo_length, int redo_length, const void *undo_data, const void *redo_data)
Definition: log_manager.c:1861
STATIC_INLINE bool pgbuf_bcb_avoid_victim(const PGBUF_BCB *bcb)
static int pgbuf_flush_seq_list(THREAD_ENTRY *thread_p, PGBUF_SEQ_FLUSHER *seq_flusher, struct timeval *limit_time, const LOG_LSA *prev_chkpt_redo_lsa, LOG_LSA *chkpt_smallest_lsa, int *time_rem)
Definition: page_buffer.c:3801
bool pgbuf_is_lsa_temporary(PAGE_PTR pgptr)
Definition: page_buffer.c:4885
PERF_PAGE_MODE
Definition: perf_monitor.h:197
#define IO_PAGESIZE
#define BO_IS_SERVER_RESTARTED()
Definition: boot_sr.h:84
static int pgbuf_initialize_bcb_table(void)
Definition: page_buffer.c:4924
#define PGBUF_PRIVATE_LRU_FROM_THREAD(thread_p)
Definition: page_buffer.c:926
STATIC_INLINE void pgbuf_lru_add_new_bcb_to_top(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb, int lru_idx)
Definition: page_buffer.c:9374
PGBUF_IOPAGE_BUFFER * iopage_table
Definition: page_buffer.c:722
unsigned long long num_page_request
Definition: page_buffer.c:372
#define PGBUF_TOTAL_LRU_COUNT
Definition: page_buffer.c:932
int pgbuf_flush_all_unfixed_and_set_lsa_as_null(THREAD_ENTRY *thread_p, VOLID volid)
Definition: page_buffer.c:3116
#define ASSERT_ERROR()
void pgbuf_log_new_page(THREAD_ENTRY *thread_p, PAGE_PTR page_new, int data_size, PAGE_TYPE ptype_new)
STATIC_INLINE PGBUF_HOLDER * pgbuf_find_thrd_holder(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr) __attribute__((ALWAYS_INLINE))
Definition: page_buffer.c:5450
void pgbuf_replace_watcher_debug(THREAD_ENTRY *thread_p, PGBUF_WATCHER *old_watcher, PGBUF_WATCHER *new_watcher, const char *caller_file, const int caller_line)
static int pgbuf_remove_vpid_from_aout_list(THREAD_ENTRY *thread_p, const VPID *vpid)
Definition: page_buffer.c:9723
bool pgbuf_has_any_non_vacuum_waiters(PAGE_PTR pgptr)
LOG_LSA chkpt_redo_lsa
Definition: log_impl.h:660
static PGBUF_BCB * pgbuf_lfcq_get_victim_from_private_lru(THREAD_ENTRY *thread_p, bool restricted)
void pgbuf_unfix_debug(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, const char *caller_file, int caller_line)
Definition: page_buffer.c:2444
#define ER_CSS_PTHREAD_COND_TIMEDOUT
Definition: error_code.h:1428
bool LSA_EQ(const log_lsa *plsa1, const log_lsa *plsa2)
Definition: log_lsa.hpp:160
#define ER_PB_ORDERED_NO_HEAP
Definition: error_code.h:1529
void showstmt_free_array_context(THREAD_ENTRY *thread_p, SHOWSTMT_ARRAY_CONTEXT *ctx)
Definition: show_scan.c:373
STATIC_INLINE int pgbuf_bcb_get_pool_index(const PGBUF_BCB *bcb)
#define PGBUF_LRU_LIST_IS_OVER_QUOTA(list)
Definition: page_buffer.c:940
static const char * pgbuf_consistent_str(int consistent)
void pgbuf_dealloc_page(THREAD_ENTRY *thread_p, PAGE_PTR page_dealloc)
int pgbuf_rv_dealloc_redo(THREAD_ENTRY *thread_p, LOG_RCV *rcv)
#define PGBUF_MIN_PAGES_IN_SHARED_LIST
Definition: page_buffer.c:909
static PGBUF_BUFFER_POOL pgbuf_Pool
Definition: page_buffer.c:802
unsigned int dirty_pages
Definition: page_buffer.c:385
PGBUF_BCB * bottom_1
Definition: page_buffer.c:550
#define LOG_DATA_ADDR_INITIALIZER
Definition: log_append.hpp:63
PERF_CONDITIONAL_FIX_TYPE perf_cond_type
Definition: page_buffer.c:859
#define PGBUF_FLUSH_VICTIM_BOOST_MULT
Definition: page_buffer.c:286
void LSA_COPY(log_lsa *plsa1, const log_lsa *plsa2)
Definition: log_lsa.hpp:139
STATIC_INLINE void pgbuf_bcb_update_flags(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb, int set_flags, int clear_flags)
void pgbuf_attach_watcher_debug(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, PGBUF_LATCH_MODE latch_mode, HFID *hfid, PGBUF_WATCHER *watcher, const char *caller_file, const int caller_line)
void pgbuf_ordered_unfix_debug(THREAD_ENTRY *thread_p, PGBUF_WATCHER *watcher_object, const char *caller_file, int caller_line)
#define PGBUF_NUM_ALLOC_HOLDER
Definition: page_buffer.c:87
unsigned long long num_pages_created
Definition: page_buffer.c:373
#define PGBUF_LRU_LIST_COUNT(list)
Definition: page_buffer.c:894
static int pgbuf_remove_private_from_aout_list(const int lru_idx)
Definition: page_buffer.c:9809
time_t print_out_time
Definition: page_buffer.c:399
#define VPID_COPY(dest_ptr, src_ptr)
Definition: dbtype_def.h:909
PAGEID pgbuf_get_page_id(PAGE_PTR pgptr)
Definition: page_buffer.c:4657
void * fileio_read(THREAD_ENTRY *thread_p, int vol_fd, void *io_page_p, PAGEID page_id, size_t page_size)
Definition: file_io.c:3950
int db_make_bigint(DB_VALUE *value, const DB_BIGINT num)
static int pgbuf_latch_idle_page(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr, PGBUF_LATCH_MODE request_mode)
Definition: page_buffer.c:5619
static int pgbuf_lock_page(THREAD_ENTRY *thread_p, PGBUF_BUFFER_HASH *hash_anchor, const VPID *vpid)
Definition: page_buffer.c:7202
static int pgbuf_get_groupid_and_unfix(THREAD_ENTRY *thread_p, const VPID *req_vpid, PAGE_PTR *pgptr, VPID *groupid, bool do_unfix)
static API_MUTEX mutex
Definition: api_util.c:72
#define ER_FAILED
Definition: error_code.h:47
unsigned char ptype
Definition: file_io.h:177
STATIC_INLINE void pgbuf_bcb_unregister_avoid_deallocation(PGBUF_BCB *bcb)
int pgbuf_compare_vpid(const void *key_vpid1, const void *key_vpid2)
Definition: page_buffer.c:1287
#define PGBUF_NEIGHBOR_PAGES
Definition: page_buffer.c:292
int pgbuf_start_scan(THREAD_ENTRY *thread_p, int type, DB_VALUE **arg_values, int arg_cnt, void **ptr)
LOG_GLOBAL log_Gl
static PGBUF_HOLDER * pgbuf_get_holder(THREAD_ENTRY *thread_p, PAGE_PTR pgptr)
#define ALWAYS_INLINE
unsigned hold_has_write_latch
Definition: page_buffer.c:421
STATIC_INLINE int pgbuf_flush_neighbor_safe(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr, VPID *expected_vpid, bool *flushed)
PGBUF_AOUT_BUF * Aout_top
Definition: page_buffer.c:613
STATIC_INLINE bool pgbuf_assign_direct_victim(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb)
int pgbuf_promote_read_latch_debug(THREAD_ENTRY *thread_p, PAGE_PTR *pgptr_p, PGBUF_PROMOTE_CONDITION condition, const char *caller_file, int caller_line)
Definition: page_buffer.c:2198
int mht_rem(MHT_TABLE *ht, const void *key, int(*rem_func)(const void *key, void *data, void *args), void *func_args)
Definition: memory_hash.c:1952
bool dwb_is_created(void)
STATIC_INLINE bool pgbuf_is_exist_blocked_reader_writer(PGBUF_BCB *bufptr)
#define PAGE_ADD_REFRESH_STAT
#define PGBUF_PRIVATE_LRU_COUNT
Definition: page_buffer.c:931
#define PGBUF_IS_BCB_IN_LRU(bcb)
Definition: page_buffer.c:883
#define pgbuf_unfix(thread_p, pgptr)
Definition: page_buffer.h:276
TSC_TICKS start_tick
Definition: page_buffer.c:854
PERF_PROMOTE_CONDITION
Definition: perf_monitor.h:189
STATIC_INLINE void pgbuf_lru_add_bcb_to_top(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb, PGBUF_LRU_LIST *lru_list)
Definition: page_buffer.c:8873
#define PGBUF_BUFFER_HASH_SIZEOF
Definition: page_buffer.c:115
#define PGBUF_THREAD_SHOULD_IGNORE_UNFIX(th)
Definition: page_buffer.c:275
#define PGBUF_LRU_ZONE_ONE_TWO_COUNT(list)
Definition: page_buffer.c:893
UINT64 tsc_elapsed_utime(TSC_TICKS end_tick, TSC_TICKS start_tick)
Definition: tsc_timer.c:135
#define pgbuf_invalidate(thread_p, pgptr)
Definition: page_buffer.h:290
void pgbuf_unfix_all(THREAD_ENTRY *thread_p)
Definition: page_buffer.c:2656
STATIC_INLINE bool pgbuf_get_check_page_validation_level(int page_validation_level)
void logpb_force_flush_pages(THREAD_ENTRY *thread_p)
#define PAGEID_MAX
char watched_at[128]
Definition: page_buffer.h:232
const void * mht_put(MHT_TABLE *ht, const void *key, void *data)
Definition: memory_hash.c:1778
STATIC_INLINE void fileio_init_lsa_of_page(FILEIO_PAGE *io_page, PGLENGTH page_size)
Definition: file_io.h:208
unsigned int victim_candidate_pages
Definition: page_buffer.c:383
#define ASSERT_ERROR_AND_SET(error_code)
void thread_sleep(double millisec)
#define LSA_INITIALIZER
Definition: log_lsa.hpp:76
#define PGBUF_HOLDER_SET_SIZEOF
Definition: page_buffer.c:123
#define ER_PB_UNKNOWN_PAGEPTR
Definition: error_code.h:70
void get_stats(cubperf::stat_value *stats_out)
unsigned long long num_pages_created
Definition: page_buffer.c:396
#define assert_release(e)
Definition: error_manager.h:96
static int pgbuf_block_bcb(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr, PGBUF_LATCH_MODE request_mode, int request_fcnt, bool as_promote)
Definition: page_buffer.c:6410
#define PGBUF_AGE_DIFF(bcb_age, list_age)
Definition: page_buffer.c:886
void thread_wakeup_already_had_mutex(cubthread::entry *thread_p, thread_resume_suspend_status resume_reason)
void pgbuf_set_dirty(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, bool free_page)
Definition: page_buffer.c:4280
#define pthread_mutex_destroy(a)
Definition: page_buffer.c:92
unsigned int num_system_pages
Definition: page_buffer.c:388
LOG_LSA * log_get_restart_lsa(void)
Definition: log_manager.c:515
static bool pgbuf_is_temp_lsa(const log_lsa &lsa)
static void pgbuf_wake_flush_waiters(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb)
#define PGBUF_MAX_PAGE_FIXED_BY_TRAN
Definition: page_buffer.c:300
#define VACUUM_IS_THREAD_VACUUM
Definition: vacuum.h:215
void pgbuf_notify_vacuum_follows(THREAD_ENTRY *thread_p, PAGE_PTR page)
#define ER_LOG_FLUSH_VICTIM_FINISHED
Definition: error_code.h:1243
int thread_get_current_entry_index(void)
#define PGBUF_HOLDER_SIZEOF
Definition: page_buffer.c:121
static int pgbuf_invalidate_bcb(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr)
Definition: page_buffer.c:7892
void fileio_flush_control_finalize(void)
Definition: file_io.c:702
STATIC_INLINE void pgbuf_lru_add_bcb_to_middle(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb, PGBUF_LRU_LIST *lru_list)
Definition: page_buffer.c:8923
char init_at[256]
Definition: page_buffer.h:233
PGBUF_ORDERED_RANK
Definition: page_buffer.h:208
INT16 VOLID
unsigned int num_temp_pages
Definition: page_buffer.c:389
PGBUF_BCB * top
Definition: page_buffer.c:548
bool pgbuf_check_page_type_no_error(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, PAGE_TYPE ptype)
STATIC_INLINE PGBUF_BCB * pgbuf_search_hash_chain(THREAD_ENTRY *thread_p, PGBUF_BUFFER_HASH *hash_anchor, const VPID *vpid) __attribute__((ALWAYS_INLINE))
Definition: page_buffer.c:6898
PGBUF_VICTIM_CANDIDATE_LIST * victim_cand_list
Definition: page_buffer.c:733
PGBUF_ORDERED_GROUP group_id
Definition: page_buffer.h:225
#define MEM_SIZE_IS_VALID(size)
Definition: porting.h:85
#define MEM_REGION_GUARD_MARK
Definition: memory_alloc.h:102
#define ER_PRM_BAD_VALUE
Definition: error_code.h:1048
struct timeval TSCTIMEVAL
Definition: tsc_timer.h:40
static INLINE unsigned int pgbuf_hash_func_mirror(const VPID *vpid) __attribute__((ALWAYS_INLINE))
STATIC_INLINE void pgbuf_bcb_change_zone(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb, int new_lru_idx, PGBUF_ZONE new_zone)
#define CAST_PGPTR_TO_IOPGPTR(io_pgptr, pgptr)
Definition: page_buffer.c:145
void thread_suspend_wakeup_and_unlock_entry(cubthread::entry *thread_p, thread_resume_suspend_status suspended_reason)
LOG_TDES * LOG_FIND_TDES(int tran_index)
Definition: log_impl.h:1095
int pgbuf_flush_all_unfixed(THREAD_ENTRY *thread_p, VOLID volid)
Definition: page_buffer.c:3099
void tsc_elapsed_time_usec(TSCTIMEVAL *tv, TSC_TICKS end_tick, TSC_TICKS start_tick)
Definition: tsc_timer.c:101
#define PGBUF_MAX_PAGE_WATCHERS
Definition: page_buffer.c:298
int32_t pageid
Definition: dbtype_def.h:879
STATIC_INLINE void pgbuf_bcb_check_and_reset_fix_and_avoid_dealloc(PGBUF_BCB *bcb, const char *file, int line)
STATIC_INLINE bool pgbuf_should_move_private_to_shared(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb, int thread_private_lru_index) __attribute__((ALWAYS_INLINE))
Definition: page_buffer.c:6365
#define LSA_AS_ARGS(lsa_ptr)
Definition: log_lsa.hpp:78
static int pgbuf_flush_page_and_neighbors_fb(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr, int *flushed_pages)
PGBUF_STATUS_OLD show_status_old
Definition: page_buffer.c:784
#define PGBUF_IS_ORDERED_PAGETYPE(ptype)
Definition: page_buffer.h:156
void pgbuf_ordered_set_dirty_and_free(THREAD_ENTRY *thread_p, PGBUF_WATCHER *pg_watcher)
#define MAX_NTRANS
INT32 hpgid
int numeric_db_value_coerce_to_num(DB_VALUE *src, DB_VALUE *dest, DB_DATA_STATUS *data_status)
static int pgbuf_initialize_page_monitor(void)
#define diff
Definition: mprec.h:352
int pgbuf_flush_victim_candidates(THREAD_ENTRY *thread_p, float flush_ratio, PERF_UTIME_TRACKER *perf_tracker, bool *stop)
Definition: page_buffer.c:3237
PAGE_PTR pgbuf_flush_with_wal(THREAD_ENTRY *thread_p, PAGE_PTR pgptr)
Definition: page_buffer.c:2956
int er_errid(void)
STATIC_INLINE int perfmon_get_activation_flag(void) __attribute__((ALWAYS_INLINE))
PGBUF_HOLDER_ANCHOR * thrd_holder_info
Definition: page_buffer.c:746
#define VPID_INITIALIZER
Definition: dbtype_def.h:894
int pgbuf_get_fix_count(PAGE_PTR pgptr)
int pgbuf_flush_control_from_dirty_ratio(void)
#define PGBUF_IS_CLEAN_WATCHER(w)
Definition: page_buffer.h:153
int * private_lru_session_cnt
Definition: page_buffer.c:680
STATIC_INLINE void pgbuf_bcb_clear_dirty(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb)
PAGE_TYPE
int dwb_read_page(THREAD_ENTRY *thread_p, const VPID *vpid, void *io_page, bool *success)
#define PGBUF_BCB_DIRTY_FLAG
Definition: page_buffer.c:214
#define PTR_ALIGN(addr, boundary)
Definition: memory_alloc.h:77
PGBUF_BCB * bufptr
Definition: page_buffer.c:440
bool logtb_get_check_interrupt(THREAD_ENTRY *thread_p)
STATIC_INLINE int pgbuf_find_current_wait_msecs(THREAD_ENTRY *thread_p)
PERF_PAGE_MODE perf_page_found
Definition: page_buffer.c:857
SHOWSTMT_ARRAY_CONTEXT * showstmt_alloc_array_context(THREAD_ENTRY *thread_p, int num_total, int num_cols)
Definition: show_scan.c:336
#define PGBUF_MIN_VICTIM_REQ
#define ER_CSS_PTHREAD_COND_SIGNAL
Definition: error_code.h:1010
#define PGBUF_MAKE_ZONE(list_id, zone)
Definition: page_buffer.c:204
unsigned int free_pages
Definition: page_buffer.c:382
bool LSA_LT(const log_lsa *plsa1, const log_lsa *plsa2)
Definition: log_lsa.hpp:174
#define PGBUF_BCB_FLAGS_MASK
Definition: page_buffer.c:234
#define er_log_debug(...)
PGBUF_BCB * pages_bufptr[2 *PGBUF_MAX_NEIGHBOR_PAGES-1]
Definition: page_buffer.c:432
PGBUF_HOLDER * thrd_link
Definition: page_buffer.c:441
int pgbuf_ordered_fix_debug(THREAD_ENTRY *thread_p, const VPID *req_vpid, PAGE_FETCH_MODE fetch_mode, const PGBUF_LATCH_MODE request_mode, PGBUF_WATCHER *req_watcher, const char *caller_file, int caller_line)
#define VPID_AS_ARGS(vpidp)
Definition: dbtype_def.h:896
volatile int flags
Definition: page_buffer.c:578
static int pgbuf_initialize_lock_table(void)
Definition: page_buffer.c:5065
PGBUF_AOUT_BUF * prev
Definition: page_buffer.c:604
#define PGBUF_TIMEOUT
Definition: page_buffer.c:99
PGBUF_LATCH_MODE latch_mode
Definition: page_buffer.c:479
PGBUF_SEQ_FLUSHER seq_chkpt_flusher
Definition: page_buffer.c:734
PGBUF_PAGE_QUOTA quota
Definition: page_buffer.c:737
unsigned int num_index_pages
Definition: page_buffer.c:386
STATIC_INLINE PGBUF_HOLDER * pgbuf_allocate_thrd_holder_entry(THREAD_ENTRY *thread_p) __attribute__((ALWAYS_INLINE))
Definition: page_buffer.c:5371
STATIC_INLINE int pgbuf_bcb_flush_with_wal(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr, bool is_page_flush_thread, bool *is_bcb_locked)
Definition: page_buffer.c:9897
INT64 timeval_diff_in_msec(const struct timeval *end_time, const struct timeval *start_time)
Definition: porting.c:2133
#define pgbuf_invalidate_all(thread_p, volid)
Definition: page_buffer.h:286
void _er_log_debug(const char *file_name, const int line_no, const char *fmt,...)
void * pgbuf_copy_to_area(THREAD_ENTRY *thread_p, const VPID *vpid, int start_offset, int length, void *area, bool do_fetch)
Definition: page_buffer.c:4067
#define FILEIO_PAGE_FLAG_ENCRYPTED_MASK
Definition: file_io.h:66
#define MAX_ALIGNMENT
Definition: memory_alloc.h:70
void log_wakeup_log_flush_daemon()
Definition: log_manager.c:9715
void pgbuf_log_redo_new_page(THREAD_ENTRY *thread_p, PAGE_PTR page_new, int data_size, PAGE_TYPE ptype_new)
STATIC_INLINE void pgbuf_bcb_set_dirty(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb)
#define PGBUF_IS_PRIVATE_LRU_ONE_TWO_OVER_QUOTA(lru_idx)
Definition: page_buffer.c:946
#define PGBUF_CLEAR_WATCHER(w)
Definition: page_buffer.h:115
static int pgbuf_initialize_lru_list(void)
Definition: page_buffer.c:5105
#define pthread_mutex_init(a, b)
Definition: page_buffer.c:91
#define PGBUF_SHARED_LRU_COUNT
Definition: page_buffer.c:930
volatile int count_fix_and_avoid_dealloc
Definition: page_buffer.c:491
void pgbuf_finalize(void)
Definition: page_buffer.c:1528
#define PGBUF_PRIVATE_LRU_MIN_COUNT
Definition: page_buffer.c:905
#define PGBUF_LRU_INDEX_FROM_PRIVATE(private_id)
Definition: page_buffer.c:935
#define PGBUF_ABORT_RELEASE()
Definition: page_buffer.c:967
void THREAD_ENTRY
#define NULL_PAGEID
#define pgbuf_unfix_and_init(thread_p, pgptr)
Definition: page_buffer.h:63
FILEIO_WRITE_MODE
Definition: file_io.h:164
PGBUF_BCB * bottom_2
Definition: page_buffer.c:551
#define PGBUF_IS_AUXILIARY_VOLUME(volid)
Definition: page_buffer.c:162
PGBUF_BUFFER_HASH * buf_hash_table
Definition: page_buffer.c:720
#define PGBUF_BCB_TRYLOCK(bcb)
Definition: page_buffer.c:843
#define ER_PB_UNFIXED_PAGEPTR
Definition: error_code.h:69
int thread_get_entry_index(cubthread::entry *thread_p)
volatile int lru_shared_pgs_cnt
Definition: page_buffer.c:659
#define FREE(PTR)
Definition: cas_common.h:56
void log_append_redo_data2(THREAD_ENTRY *thread_p, LOG_RCVINDEX rcvindex, const VFID *vfid, PAGE_PTR pgptr, PGLENGTH offset, int length, const void *data)
Definition: log_manager.c:1995
static int pgbuf_flush_all_helper(THREAD_ENTRY *thread_p, VOLID volid, bool is_unfixed_only, bool is_set_lsa_as_null)
Definition: page_buffer.c:3030
STATIC_INLINE void pgbuf_lru_adjust_zone2(THREAD_ENTRY *thread_p, PGBUF_LRU_LIST *lru_list, bool min_one)
Definition: page_buffer.c:9113
#define PGBUF_MIN_SHARED_LIST_ADJUST_SIZE
Definition: page_buffer.c:910
#define PGBUF_CHKPT_BURST_PAGES
Definition: page_buffer.c:307
PGBUF_BCB * bottom
Definition: page_buffer.c:549
void mht_destroy(MHT_TABLE *ht)
Definition: memory_hash.c:1140
#define VOLID_LSB_BITS
int boot_find_root_heap(HFID *root_hfid_p)
Definition: boot_sr.c:325
int pgbuf_rv_dealloc_undo(THREAD_ENTRY *thread_p, LOG_RCV *rcv)
manager * get_manager(void)
PAGE_TYPE pgbuf_get_page_ptype(THREAD_ENTRY *thread_p, PAGE_PTR pgptr)
Definition: page_buffer.c:4675
const char * tde_get_algorithm_name(TDE_ALGORITHM tde_algo)
Definition: tde.c:1694
static bool pgbuf_is_valid_page_ptr(const PAGE_PTR pgptr)
#define PERF(id)
#define PGBUF_BCB_INVALID_VICTIM_CANDIDATE_MASK
Definition: page_buffer.c:248
#define PGBUF_IOPAGE_BUFFER_SIZE
Definition: page_buffer.c:111
void er_set(int severity, const char *file_name, const int line_no, int err_id, int num_args,...)
STATIC_INLINE PGBUF_ZONE pgbuf_bcb_get_zone(const PGBUF_BCB *bcb)
#define PGBUF_PRIVATE_LRU_MAX_HARD_QUOTA
Definition: page_buffer.c:906
#define PGBUF_BCB_CHECK_MUTEX_LEAKS()
Definition: page_buffer.c:846
int pgbuf_initialize(void)
Definition: page_buffer.c:1311
#define ER_PB_ALL_BUFFERS_DIRTY
Definition: error_code.h:1356
char * fileio_get_volume_label(VOLID vol_id, bool is_peek)
Definition: file_io.c:6182
static int pgbuf_initialize_aout_list(void)
Definition: page_buffer.c:5168
PGBUF_AOUT_BUF * bufarray
Definition: page_buffer.c:618
static int pgbuf_initialize_seq_flusher(PGBUF_SEQ_FLUSHER *seq_flusher, PGBUF_VICTIM_CANDIDATE_LIST *f_list, const int cnt)
PGBUF_INVALID_LIST buf_invalid_list
Definition: page_buffer.c:731
#define PGBUF_DEFAULT_FIX_COUNT
Definition: page_buffer.c:83
static int pgbuf_flush_chkpt_seq_list(THREAD_ENTRY *thread_p, PGBUF_SEQ_FLUSHER *seq_flusher, const LOG_LSA *prev_chkpt_redo_lsa, LOG_LSA *chkpt_smallest_lsa)
Definition: page_buffer.c:3693
static PGBUF_BATCH_FLUSH_HELPER pgbuf_Flush_helper
Definition: page_buffer.c:803
PAGE_FETCH_MODE
Definition: page_buffer.h:160
PAGE_PTR pgptr
Definition: recovery.h:199
bool pgbuf_has_prevent_dealloc(PAGE_PTR pgptr)
#define assert(x)
void log_skip_logging(THREAD_ENTRY *thread_p, LOG_DATA_ADDR *addr)
Definition: log_manager.c:3244
STATIC_INLINE void pgbuf_set_dirty_buffer_ptr(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr)
PGBUF_WATCHER * first_watcher
Definition: page_buffer.c:450
STATIC_INLINE bool pgbuf_bcb_is_invalid_direct_victim(const PGBUF_BCB *bcb)
bool log_is_logged_since_restart(const LOG_LSA *lsa_ptr)
Definition: log_manager.c:593
const VOLID LOG_DBFIRST_VOLID
Definition: log_volids.hpp:38
#define ER_LK_PAGE_TIMEOUT
Definition: error_code.h:134
TDE_ALGORITHM
Definition: tde.h:71
PGBUF_BCB * prev_BCB
Definition: page_buffer.c:485
bool pgbuf_check_page_ptype(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, PAGE_TYPE ptype)
static const char * pgbuf_latch_mode_str(PGBUF_LATCH_MODE latch_mode)
volatile int flags
Definition: page_buffer.c:480
DISK_ISVALID disk_is_page_sector_reserved_with_debug_crash(THREAD_ENTRY *thread_p, VOLID volid, PAGEID pageid, bool debug_crash)
int prm_get_integer_value(PARAM_ID prm_id)
#define pgbuf_set_dirty_and_free(thread_p, pgptr)
Definition: page_buffer.h:351
#define ER_GENERIC_ERROR
Definition: error_code.h:49
#define STATIC_INLINE
static const char * pgbuf_zone_str(PGBUF_ZONE zone)
static int pgbuf_initialize_page_quota(void)
#define OID_IS_ROOTOID(oidp)
Definition: oid.h:82
LOG_LSA * pgbuf_get_lsa(PAGE_PTR pgptr)
Definition: page_buffer.c:4318
const log_lsa PGBUF_TEMP_LSA
Definition: page_buffer.h:244
PGBUF_LATCH_MODE
Definition: page_buffer.h:176
DISK_VOLPURPOSE xdisk_get_purpose(THREAD_ENTRY *thread_p, VOLID volid)
void pgbuf_flush_if_requested(THREAD_ENTRY *thread_p, PAGE_PTR page)
Definition: page_buffer.c:2996
#define VACUUM_IS_THREAD_VACUUM_WORKER
Definition: vacuum.h:216
#define ER_PAGE_LATCH_TIMEDOUT
Definition: error_code.h:1042
lockfree::circular_queue< int > * private_lrus_with_victims
Definition: page_buffer.c:778
const char * pgbuf_get_volume_label(PAGE_PTR pgptr)
Definition: page_buffer.c:4731
static int pgbuf_victimize_bcb(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr)
Definition: page_buffer.c:7845
PGBUF_HOLDER * next_holder
Definition: page_buffer.c:442
PERF_CONDITIONAL_FIX_TYPE
Definition: perf_monitor.h:180
#define HASH_SIZE_BITS
Definition: page_buffer.c:278
#define ER_OUT_OF_VIRTUAL_MEMORY
Definition: error_code.h:50
#define MEM_REGION_SCRAMBLE_MARK
Definition: memory_alloc.h:101
#define detailed_er_log(...)
#define REFPTR(T, name)
Definition: porting.h:1089
#define CAST_IOPGPTR_TO_PGPTR(pgptr, io_pgptr)
Definition: page_buffer.c:150
static int pgbuf_initialize_invalid_list(void)
Definition: page_buffer.c:5272
TDE_CIPHER tde_Cipher
Definition: tde.c:69
#define PGBUF_HASH_VALUE(vpid)
Definition: page_buffer.c:281
PGBUF_BCB * next_BCB
Definition: page_buffer.c:486
PGBUF_LATCH_MODE pgbuf_get_latch_mode(PAGE_PTR pgptr)
Definition: page_buffer.c:4633
TSC_TICKS last_adjust_time
Definition: page_buffer.c:690
PGBUF_LATCH_CONDITION
Definition: page_buffer.h:185
#define PGBUF_TRAN_MAX_ACTIVITY
Definition: page_buffer.c:267
bool LSA_LE(const log_lsa *plsa1, const log_lsa *plsa2)
Definition: log_lsa.hpp:167
#define PGBUF_IS_BCB_OLD_ENOUGH(bcb, lru_list)
Definition: page_buffer.c:890
bool log_is_log_flush_daemon_available()
Definition: log_manager.c:9730
void pgbuf_rv_flush_page_dump(FILE *fp, int length, void *data)
#define pgbuf_replace_watcher(thread_p, old_watcher, new_watcher)
Definition: page_buffer.h:337
PGBUF_IOPAGE_BUFFER * iopage_buffer
Definition: page_buffer.c:500
STATIC_INLINE void pgbuf_add_watch_instance_internal(PGBUF_HOLDER *holder, PAGE_PTR pgptr, PGBUF_WATCHER *watcher, const PGBUF_LATCH_MODE latch_mode, const bool clear_unfix_flag, const char *caller_file, const int caller_line)
#define DB_INT32_MAX
Definition: dbtype_def.h:633
STATIC_INLINE bool pgbuf_lfcq_add_lru_with_victims(PGBUF_LRU_LIST *lru_list)
#define PGBUF_IS_PRIVATE_LRU_INDEX(lru_idx)
Definition: page_buffer.c:938
int pgbuf_rv_set_tde_algorithm(THREAD_ENTRY *thread_p, LOG_RCV *rcv)
Definition: page_buffer.c:4528
int pgbuf_rv_new_page_redo(THREAD_ENTRY *thread_p, LOG_RCV *rcv)
#define pthread_mutex_trylock(a)
Definition: lock_free.c:38
STATIC_INLINE void pgbuf_lru_fall_bcb_to_zone_3(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb, PGBUF_LRU_LIST *lru_list)
Definition: page_buffer.c:9229
int pgbuf_fix_if_not_deallocated_with_caller(THREAD_ENTRY *thread_p, const VPID *vpid, PGBUF_LATCH_MODE latch_mode, PGBUF_LATCH_CONDITION latch_condition, PAGE_PTR *page, const char *caller_file, int caller_line)
VPID vpids[2 *PGBUF_MAX_NEIGHBOR_PAGES-1]
Definition: page_buffer.c:433
unsigned latch_mode
Definition: page_buffer.h:226
unsigned long long num_hit
Definition: page_buffer.c:371
void log_append_compensate_with_undo_nxlsa(THREAD_ENTRY *thread_p, LOG_RCVINDEX rcvindex, const VPID *vpid, PGLENGTH offset, PAGE_PTR pgptr, int length, const void *data, LOG_TDES *tdes, const LOG_LSA *undo_nxlsa)
Definition: log_manager.c:2990
PGLENGTH db_page_size(void)
#define pthread_mutex_lock(a)
Definition: page_buffer.c:93
bool pgbuf_is_log_check_for_interrupts(THREAD_ENTRY *thread_p)
Definition: page_buffer.c:4762
void pgbuf_watcher_init_debug(PGBUF_WATCHER *watcher, const char *caller_file, const int caller_line, bool add)
bool logtb_is_current_active(THREAD_ENTRY *thread_p)
PGBUF_BUFFER_LOCK * lock_next
Definition: page_buffer.c:523
#define VPID_EQ(vpid_ptr1, vpid_ptr2)
Definition: dbtype_def.h:915
unsigned long long num_pages_written
Definition: page_buffer.c:374
#define PGBUF_LIMIT_AOUT_BUFFERS
void pgbuf_get_page_flush_interval(bool &is_timed_wait, cubthread::delta_time &period)
STATIC_INLINE bool pgbuf_bcb_is_direct_victim(const PGBUF_BCB *bcb)
STATIC_INLINE void pgbuf_add_vpid_to_aout_list(THREAD_ENTRY *thread_p, const VPID *vpid, const int lru_idx)
Definition: page_buffer.c:9642
short volid
Definition: dbtype_def.h:880
static void pgbuf_move_bcb_to_bottom_lru(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb)
Definition: page_buffer.c:9593
void reset_looper(void)
TDE_ALGORITHM pgbuf_get_tde_algorithm(PAGE_PTR pgptr)
Definition: page_buffer.c:4548
static INLINE bool pgbuf_is_temporary_volume(VOLID volid) __attribute__((ALWAYS_INLINE))
Definition: page_buffer.c:4908
static void pgbuf_flags_mask_sanity_check(void)
#define PGBUF_GET_LRU_INDEX(flags)
Definition: page_buffer.c:206
VFID vfid
char fixed_at[64 *1024]
Definition: page_buffer.c:445
std::int64_t pageid
Definition: log_lsa.hpp:36
int length
Definition: recovery.h:202
STATIC_INLINE bool pgbuf_is_hit_ratio_low(void)
PERF_HOLDER_LATCH perf_latch_mode
Definition: page_buffer.c:858
STATIC_INLINE bool pgbuf_bcb_should_avoid_deallocation(const PGBUF_BCB *bcb)
void * mht_get(MHT_TABLE *ht, const void *key)
Definition: memory_hash.c:1419
lockfree::circular_queue< int > * shared_lrus_with_victims
Definition: page_buffer.c:780
PGBUF_BCB * hash_next
Definition: page_buffer.c:484
#define NULL
Definition: freelistheap.h:34
const VPID vpid_Null_vpid
Definition: page_buffer.c:74
#define PGBUF_NEIGHBOR_FLUSH_NONDIRTY
Definition: page_buffer.c:288
#define ER_PAGE_LATCH_PROMOTE_FAIL
Definition: error_code.h:1512
MHT_TABLE ** aout_buf_ht
Definition: page_buffer.c:621
#define PGBUF_FIX_COUNT_THRESHOLD
Definition: page_buffer.c:100
static void pgbuf_add_fixed_at(PGBUF_HOLDER *holder, const char *caller_file, int caller_line, bool reset)
STATIC_INLINE int pgbuf_latch_bcb_upon_fix(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr, PGBUF_LATCH_MODE request_mode, int buf_lock_acquired, PGBUF_LATCH_CONDITION condition, bool *is_latch_wait) __attribute__((ALWAYS_INLINE))
Definition: page_buffer.c:5682
STATIC_INLINE void pgbuf_lru_remove_bcb(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb)
Definition: page_buffer.c:9477
#define PGBUF_BCB_UNLOCK(bcb)
Definition: page_buffer.c:844
STATIC_INLINE void pgbuf_bcb_mark_was_not_flushed(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb, bool mark_dirty)
unsigned int num_data_pages
Definition: page_buffer.c:387
#define PGBUF_BCB_AVOID_DEALLOC_MASK
Definition: page_buffer.c:259
#define CAST_BFPTR_TO_PGPTR(pgptr, bufptr)
Definition: page_buffer.c:155
PERF_PAGE_TYPE btree_get_perf_btree_page_type(THREAD_ENTRY *thread_p, PAGE_PTR page_ptr)
Definition: btree.c:33161
void tsc_getticks(TSC_TICKS *tck)
Definition: tsc_timer.c:81
if(extra_options)
Definition: dynamic_load.c:958
PGBUF_HOLDER * thrd_hold_list
Definition: page_buffer.c:460
int pgbuf_rv_flush_page(THREAD_ENTRY *thread_p, LOG_RCV *rcv)
STATIC_INLINE void fileio_set_page_lsa(FILEIO_PAGE *io_page, const LOG_LSA *lsa, PGLENGTH page_size)
Definition: file_io.h:227
int pgbuf_assign_private_lru(THREAD_ENTRY *thread_p, bool is_vacuum, const int id)
STATIC_INLINE bool perfmon_is_perf_tracking(void) __attribute__((ALWAYS_INLINE))
STATIC_INLINE bool perfmon_is_perf_tracking_and_active(int activation_flag) __attribute__((ALWAYS_INLINE))
PGBUF_HOLDER_SET * free_holder_set
Definition: page_buffer.c:761
PGBUF_PROMOTE_CONDITION
Definition: page_buffer.h:191
bool log_is_in_crash_recovery(void)
Definition: log_manager.c:476
const VFID * vfid
Definition: log_append.hpp:56
bool pgbuf_is_io_stressful(void)
STATIC_INLINE int pgbuf_insert_into_hash_chain(THREAD_ENTRY *thread_p, PGBUF_BUFFER_HASH *hash_anchor, PGBUF_BCB *bufptr) __attribute__((ALWAYS_INLINE))
Definition: page_buffer.c:7053
void pgbuf_force_to_check_for_interrupts(void)
Definition: page_buffer.c:4750
static void pgbuf_wakeup_page_flush_daemon(THREAD_ENTRY *thread_p)
STATIC_INLINE int pgbuf_get_shared_lru_index_for_add(void)
Definition: page_buffer.c:1022
static int success()
STATIC_INLINE void pgbuf_lru_adjust_zone1(THREAD_ENTRY *thread_p, PGBUF_LRU_LIST *lru_list, bool min_one)
Definition: page_buffer.c:9060
#define PGBUF_BCB_FLUSHING_TO_DISK_FLAG
Definition: page_buffer.c:217
bool LSA_ISNULL(const log_lsa *lsa_ptr)
Definition: log_lsa.hpp:153
#define ER_PB_ORDERED_REFIX_FAILED
Definition: error_code.h:1522
static int pgbuf_bcb_safe_flush_force_unlock(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr, bool synchronous)
Definition: page_buffer.c:7962
PGBUF_BCB * hash_next
Definition: page_buffer.c:538
PAGE_PTR pgptr
Definition: log_append.hpp:57
static void pgbuf_remove_watcher(PGBUF_HOLDER *holder, PGBUF_WATCHER *watcher_object)
#define err(fd,...)
Definition: porting.h:431
#define PGBUF_FIND_BUFFER_GUARD(bufptr)
Definition: page_buffer.c:134
#define ER_PB_BAD_PAGEID
Definition: error_code.h:67
#define PGBUF_LRU_NBITS
Definition: page_buffer.c:170
#define MIN_PRIVATE_RATIO
PGBUF_BCB * BCB_table
Definition: page_buffer.c:719
unsigned int clean_pages
Definition: page_buffer.c:384
int pgbuf_get_hold_count(THREAD_ENTRY *thread_p)
void thread_lock_entry(cubthread::entry *thread_p)
#define pgbuf_fix(thread_p, vpid, fetch_mode, requestmode, condition)
Definition: page_buffer.h:255
PGBUF_ORDERED_GROUP group_id
Definition: page_buffer.c:405
MHT_TABLE * mht_create(const char *name, int est_size, unsigned int(*hash_func)(const void *key, unsigned int ht_size), int(*cmp_func)(const void *key1, const void *key2))
Definition: memory_hash.c:894
DISK_ISVALID disk_is_page_sector_reserved(THREAD_ENTRY *thread_p, VOLID volid, PAGEID pageid)
STATIC_INLINE bool pgbuf_bcb_should_be_moved_to_bottom_lru(const PGBUF_BCB *bcb)
DB_VALUE * showstmt_alloc_tuple_in_context(THREAD_ENTRY *thread_p, SHOWSTMT_ARRAY_CONTEXT *ctx)
Definition: show_scan.c:402
PGBUF_HOLDER * thrd_free_list
Definition: page_buffer.c:459
#define PGBUF_PAGE_QUOTA_IS_ENABLED
Definition: page_buffer.c:912
LOG_LSA flush_upto_lsa
Definition: page_buffer.c:635
#define PGBUF_BCB_VICTIM_DIRECT_FLAG
Definition: page_buffer.c:224
unsigned dirty_before_hold
Definition: page_buffer.c:419
int pgbuf_get_condition_for_ordered_fix(const VPID *vpid_new_page, const VPID *vpid_fixed_page, const HFID *hfid)
#define PGBUF_WATCHER_MAGIC_NUMBER
Definition: page_buffer.h:84
unsigned dirtied_by_holder
Definition: page_buffer.c:420
unsigned char pflag
Definition: file_io.h:178
STATIC_INLINE void pgbuf_bcb_register_hit_for_lru(PGBUF_BCB *bcb)
PGBUF_BCB * invalid_top
Definition: page_buffer.c:589
#define FILEIO_PAGE_FLAG_ENCRYPTED_AES
Definition: file_io.h:63
STATIC_INLINE void pgbuf_lru_add_victim_candidate(THREAD_ENTRY *thread_p, PGBUF_LRU_LIST *lru_list, PGBUF_BCB *bcb)
TSCTIMEVAL tv_diff
Definition: page_buffer.c:861
int pgbuf_flush_all(THREAD_ENTRY *thread_p, VOLID volid)
Definition: page_buffer.c:3083
STATIC_INLINE void pgbuf_lru_add_new_bcb_to_middle(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb, int lru_idx)
Definition: page_buffer.c:9410
void er_set_with_oserror(int severity, const char *file_name, const int line_no, int err_id, int num_args,...)
int count(int &result, const cub_regex_object &reg, const std::string &src, const int position, const INTL_CODESET codeset)
static bool pgbuf_is_page_flush_daemon_available()
PGBUF_PAGE_MONITOR monitor
Definition: page_buffer.c:736
static int pgbuf_bcb_safe_flush_force_lock(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr, bool synchronous)
Definition: page_buffer.c:7985
#define ER_CSS_PTHREAD_COND_TIMEDWAIT
Definition: error_code.h:1009
STATIC_INLINE bool pgbuf_bcb_is_to_vacuum(const PGBUF_BCB *bcb)
void pgbuf_get_vpid(PAGE_PTR pgptr, VPID *vpid)
Definition: page_buffer.c:4579
offset_type offset
Definition: log_append.hpp:58
const LOG_LSA * pgbuf_set_lsa(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, const LOG_LSA *lsa_ptr)
Definition: page_buffer.c:4364
int dwb_set_data_on_next_slot(THREAD_ENTRY *thread_p, FILEIO_PAGE *io_page_p, bool can_wait, DWB_SLOT **p_dwb_slot)
#define PGBUF_BCB_LOCK(bcb)
Definition: page_buffer.c:842
STATIC_INLINE void perfmon_diff_timeval(struct timeval *elapsed, struct timeval *start, struct timeval *end) __attribute__((ALWAYS_INLINE))
#define PGBUF_FIND_IOPAGE_PTR(i)
Definition: page_buffer.c:131
void er_stack_pop(void)
STATIC_INLINE bool pgbuf_bcb_is_hot(const PGBUF_BCB *bcb)
STATIC_INLINE int pgbuf_delete_from_hash_chain(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr) __attribute__((ALWAYS_INLINE))
Definition: page_buffer.c:7099
LOG_LSA reference_lsa
Definition: recovery.h:204
#define FILEIO_PAGE_FLAG_ENCRYPTED_ARIA
Definition: file_io.h:64
TSC_TICKS end_tick
Definition: page_buffer.c:855
static int pgbuf_get_victim_candidates_from_lru(THREAD_ENTRY *thread_p, int check_count, float lru_sum_flush_priority, bool *assigned_directly)
Definition: page_buffer.c:3156
#define MAX_DEPTH
int pgbuf_invalidate_all_debug(THREAD_ENTRY *thread_p, VOLID volid, const char *caller_file, int caller_line)
Definition: page_buffer.c:2852
STATIC_INLINE int pgbuf_unlatch_bcb_upon_unfix(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr, int holder_status) __attribute__((ALWAYS_INLINE))
Definition: page_buffer.c:6045
bool logtb_is_interrupted(THREAD_ENTRY *thread_p, bool clear, bool *continue_checking)
#define PGBUF_GET_ZONE(flags)
Definition: page_buffer.c:205
static void error(const char *msg)
Definition: gencat.c:331
#define pthread_mutex_unlock(a)
Definition: page_buffer.c:94
#define VPID_ISNULL(vpid_ptr)
Definition: dbtype_def.h:925
void thread_unlock_entry(cubthread::entry *thread_p)
const char * data
Definition: recovery.h:203
void * fileio_write(THREAD_ENTRY *thread_p, int vol_fd, void *io_page_p, PAGEID page_id, size_t page_size, FILEIO_WRITE_MODE write_mode)
Definition: file_io.c:4150
static int rc
Definition: serial.c:50
STATIC_INLINE void perfmon_inc_stat(THREAD_ENTRY *thread_p, PERF_STAT_ID psid) __attribute__((ALWAYS_INLINE))
STATIC_INLINE void pgbuf_bcb_register_fix(PGBUF_BCB *bcb)
#define ER_INTERRUPTED
Definition: error_code.h:51
std::chrono::system_clock::duration delta_time
LOG_TDES * LOG_FIND_CURRENT_TDES(THREAD_ENTRY *thread_p=NULL)
Definition: log_impl.h:1115
#define PBGUF_BIG_PRIVATE_MIN_SIZE
Definition: page_buffer.c:953
static int pgbuf_bcb_safe_flush_internal(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr, bool synchronous, bool *locked)
Definition: page_buffer.c:8018
#define LOG_FIND_THREAD_TRAN_INDEX(thrd)
Definition: perf_monitor.h:158
PGBUF_VICTIM_CANDIDATE_LIST * flush_list
Definition: page_buffer.c:634
STATIC_INLINE bool pgbuf_bcb_mark_is_flushing(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb)
#define HFID_IS_NULL(hfid)
static int pgbuf_compare_hold_vpid_for_sort(const void *p1, const void *p2)
QUERY_ID qmgr_get_current_query_id(THREAD_ENTRY *thread_p)
static PGBUF_BCB * pgbuf_get_bcb_from_invalid_list(THREAD_ENTRY *thread_p)
Definition: page_buffer.c:8087
#define ARG_FILE_LINE
Definition: error_manager.h:44
#define TSC_ADD_TIMEVAL(total, diff)
Definition: tsc_timer.h:31
void pgbuf_daemons_get_stats(UINT64 *stats_out)
#define PGBUF_BUFFER_LOCK_SIZEOF
Definition: page_buffer.c:117
void destroy_daemon(daemon *&daemon_arg)
PERF_PAGE_TYPE pgbuf_get_page_type_for_stat(THREAD_ENTRY *thread_p, PAGE_PTR pgptr)
void logpb_flush_log_for_wal(THREAD_ENTRY *thread_p, const LOG_LSA *lsa_ptr)
static void pgbuf_lru_move_from_private_to_shared(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb)
Definition: page_buffer.c:9505
#define snprintf_dots_truncate(dest, max_len,...)
Definition: porting.h:323
VOLID pgbuf_get_volume_id(PAGE_PTR pgptr)
Definition: page_buffer.c:4707
PGBUF_AOUT_LIST buf_AOUT_list
Definition: page_buffer.c:730
STATIC_INLINE void pgbuf_lru_remove_victim_candidate(THREAD_ENTRY *thread_p, PGBUF_LRU_LIST *lru_list, PGBUF_BCB *bcb)
STATIC_INLINE void perfmon_time_stat(THREAD_ENTRY *thread_p, PERF_STAT_ID psid, UINT64 timediff) __attribute__((ALWAYS_INLINE))
float prm_get_float_value(PARAM_ID prm_id)
PGBUF_AOUT_BUF * Aout_free
Definition: page_buffer.c:616
INT16 PGLENGTH
int wait_msecs
Definition: log_impl.h:471
void * pgbuf_copy_from_area(THREAD_ENTRY *thread_p, const VPID *vpid, int start_offset, int length, void *area, bool do_fetch, TDE_ALGORITHM tde_algo)
Definition: page_buffer.c:4192
int er_errid_if_has_error(void)
#define PGBUF_IS_BCB_IN_LRU_VICTIM_ZONE(bcb)
Definition: page_buffer.c:882
DB_DATA_STATUS
STATIC_INLINE int pgbuf_unlatch_thrd_holder(THREAD_ENTRY *thread_p, PGBUF_BCB *bufptr, PGBUF_HOLDER_STAT *holder_perf_stat_p) __attribute__((ALWAYS_INLINE))
Definition: page_buffer.c:5484
PGBUF_BUFFER_LOCK * lock_next
Definition: page_buffer.c:539
#define free_and_init(ptr)
Definition: memory_alloc.h:147
#define LOG_ISRESTARTED()
Definition: log_impl.h:232
UINT64 fix_wait_time
Definition: page_buffer.c:864
void pgbuf_flush(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, bool free_page)
Definition: page_buffer.c:2933
int tde_decrypt_data_page(const FILEIO_PAGE *iopage_cipher, TDE_ALGORITHM tde_algo, bool is_temp, FILEIO_PAGE *iopage_plain)
Definition: tde.c:949
#define strlen(s1)
Definition: intl_support.c:43
unsigned long long num_page_request
Definition: page_buffer.c:395
void LSA_SET_NULL(log_lsa *lsa_ptr)
Definition: log_lsa.hpp:146
bool pgbuf_is_page_fixed_by_thread(THREAD_ENTRY *thread_p, const VPID *vpid_p)
#define const
Definition: cnvlex.c:77
#define ER_LOG_CHECKPOINT_SKIP_INVALID_PAGE
Definition: error_code.h:1360
#define DB_PAGESIZE
bool LSA_GT(const log_lsa *plsa1, const log_lsa *plsa2)
Definition: log_lsa.hpp:188
PAGE_PTR pgbuf_fix_with_retry(THREAD_ENTRY *thread_p, const VPID *vpid, PAGE_FETCH_MODE fetch_mode, PGBUF_LATCH_MODE request_mode, int retry)
Definition: page_buffer.c:1723
#define PGBUF_BCB_CHECK_OWN(bcb)
Definition: page_buffer.c:845
UINT64 lock_wait_time
Definition: page_buffer.c:862
void log_append_empty_record(THREAD_ENTRY *thread_p, LOG_RECTYPE logrec_type, LOG_DATA_ADDR *addr)
Definition: log_manager.c:3117
#define PGBUF_LRU_ZONE_MIN_RATIO
Definition: page_buffer.c:323
STATIC_INLINE bool pgbuf_bcb_is_flushing(const PGBUF_BCB *bcb)
#define PGBUF_LRU_ARE_ZONES_ONE_TWO_OVER_THRESHOLD(list)
Definition: page_buffer.c:899
#define ER_CSS_PTHREAD_MUTEX_TRYLOCK
Definition: error_code.h:1000
PGBUF_STATUS_SNAPSHOT show_status_snapshot
Definition: page_buffer.c:785
float private_pages_ratio
Definition: page_buffer.c:681
#define PGBUF_BCB_TO_VACUUM_FLAG
Definition: page_buffer.c:229
void pgbuf_set_page_ptype(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, PAGE_TYPE ptype)
Definition: page_buffer.c:4847
#define AOUT_HASH_DIVIDE_RATIO
Definition: page_buffer.c:816
STATIC_INLINE bool pgbuf_bcb_is_dirty(const PGBUF_BCB *bcb)
bool heap_is_page_header(THREAD_ENTRY *thread_p, PAGE_PTR page)
Definition: heap_file.c:24869
bool prm_get_bool_value(PARAM_ID prm_id)
bool is_loaded
Definition: tde.h:148
unsigned long long num_pages_written
Definition: page_buffer.c:397
int pgbuf_page_has_changed(PAGE_PTR pgptr, LOG_LSA *ref_lsa)
Definition: page_buffer.c:4342
UINT64 holder_wait_time
Definition: page_buffer.c:863
int logtb_find_client_name_host_pid(int tran_index, const char **client_prog_name, const char **client_user_name, const char **client_host_name, int *client_pid)
unsigned initial_rank
Definition: page_buffer.h:228
int thread_suspend_timeout_wakeup_and_unlock_entry(cubthread::entry *thread_p, struct timespec *time_p, thread_resume_suspend_status suspended_reason)
int pgbuf_invalidate_debug(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, const char *caller_file, int caller_line)
Definition: page_buffer.c:2748
unsigned page_was_unfixed
Definition: page_buffer.h:227
static PGBUF_BCB * pgbuf_lfcq_get_victim_from_shared_lru(THREAD_ENTRY *thread_p, bool multi_threaded)
#define PGBUF_DESIRED_HIT_VS_MISS_RATE
void er_clear(void)
PGBUF_HOLDER element[PGBUF_NUM_ALLOC_HOLDER]
Definition: page_buffer.c:466
unsigned long long num_hit
Definition: page_buffer.c:394
std::size_t thread_num_total_threads(void)
#define INIT_HOLDER_STAT(perf_stat)
Definition: page_buffer.c:309
PGBUF_WATCHER * next
Definition: page_buffer.h:223
unsigned curr_rank
Definition: page_buffer.h:229
HFID * pgbuf_ordered_null_hfid
Definition: page_buffer.c:805
unsigned int dummy
Definition: page_buffer.c:377
STATIC_INLINE void pgbuf_lru_adjust_zones(THREAD_ENTRY *thread_p, PGBUF_LRU_LIST *lru_list, bool min_one)
Definition: page_buffer.c:9163
PGBUF_WATCHER * last_watcher
Definition: page_buffer.c:451
STATIC_INLINE void pgbuf_bcb_mark_was_flushed(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb)
int i
Definition: dynamic_load.c:954
#define PGBUF_THREAD_HAS_PRIVATE_LRU(thread_p)
Definition: page_buffer.c:927
struct pgbuf_aout_buf PGBUF_AOUT_BUF
Definition: page_buffer.c:347
unsigned int pgbuf_hash_vpid(const void *key_vpid, unsigned int htsize)
Definition: page_buffer.c:1273
PGBUF_AOUT_BUF * next
Definition: page_buffer.c:603
lockfree::circular_queue< int > * big_private_lrus_with_victims
Definition: page_buffer.c:779
bool log_is_in_crash_recovery_and_not_yet_completes_redo(void)
Definition: log_manager.c:495
#define INLINE
unsigned int magic
Definition: page_buffer.h:231
#define ER_LOG_FLUSH_VICTIM_STARTED
Definition: error_code.h:1242
#define PGBUF_LRU_ZONE_MAX_RATIO
Definition: page_buffer.c:324
#define CAST_PGPTR_TO_BFPTR(bufptr, pgptr)
Definition: page_buffer.c:138
STATIC_INLINE void pgbuf_add_bufptr_to_batch(PGBUF_BCB *bufptr, int idx)
int db_make_double(DB_VALUE *value, const DB_C_DOUBLE num)
#define PGBUF_BCB_SIZEOF
Definition: page_buffer.c:110
PGBUF_WATCHER * watcher[PGBUF_MAX_PAGE_WATCHERS]
Definition: page_buffer.c:408
FILEIO_PAGE iopage
Definition: page_buffer.c:512
STATIC_INLINE void perfmon_add_stat(THREAD_ENTRY *thread_p, PERF_STAT_ID psid, UINT64 amount) __attribute__((ALWAYS_INLINE))
#define PGBUF_LRU_INDEX_MASK
Definition: page_buffer.c:172
#define NULL_VOLID
int pgbuf_rv_dealloc_undo_compensate(THREAD_ENTRY *thread_p, LOG_RCV *rcv)
#define PGBUF_BCB_INVALIDATE_DIRECT_VICTIM_FLAG
Definition: page_buffer.c:225
#define IO_MAX_PAGE_SIZE
PGBUF_BCB *volatile victim_hint
Definition: page_buffer.c:552
char page[1]
Definition: file_io.h:195
STATIC_INLINE bool pgbuf_bcb_is_async_flush_request(const PGBUF_BCB *bcb)
static int pgbuf_initialize_page_quota_parameters(void)
int db_make_int(DB_VALUE *value, const int num)
STATIC_INLINE bool pgbuf_check_page_ptype_internal(PAGE_PTR pgptr, PAGE_TYPE ptype, bool no_error)
STATIC_INLINE bool pgbuf_check_bcb_page_vpid(PGBUF_BCB *bufptr, bool maybe_deallocated)
bool pgbuf_has_any_waiters(PAGE_PTR pgptr)
short volid
Definition: dbtype_def.h:887
PGBUF_WATCHER * prev
Definition: page_buffer.h:224
PERF_STAT_ID
Definition: perf_monitor.h:268
#define PGBUF_PRIVATE_LIST_FROM_LRU_INDEX(i)
Definition: page_buffer.c:934
#define ER_PB_ORDERED_INCONSISTENCY
Definition: error_code.h:1523
#define PGBUF_FIND_BCB_PTR(i)
Definition: page_buffer.c:128
bool was_woken_up(void)
INT32 PAGEID
STATIC_INLINE void pgbuf_lru_add_bcb_to_bottom(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb, PGBUF_LRU_LIST *lru_list)
Definition: page_buffer.c:9011
#define DONT_FREE
Definition: page_buffer.h:41
#define PGBUF_GET_LRU_LIST(lru_idx)
Definition: page_buffer.c:880
int tde_encrypt_data_page(const FILEIO_PAGE *iopage_plain, TDE_ALGORITHM tde_algo, bool is_temp, FILEIO_PAGE *iopage_cipher)
Definition: tde.c:896
STATIC_INLINE int pgbuf_bcb_get_lru_index(const PGBUF_BCB *bcb)
#define PGBUF_BCB_ASYNC_FLUSH_REQ
Definition: page_buffer.c:231
#define PGBUF_MAX_NEIGHBOR_PAGES
Definition: page_buffer.c:291
PGBUF_ZONE
Definition: page_buffer.c:175
STATIC_INLINE void pgbuf_bcb_register_avoid_deallocation(PGBUF_BCB *bcb)
void pgbuf_peek_stats(UINT64 *fixed_cnt, UINT64 *dirty_cnt, UINT64 *lru1_cnt, UINT64 *lru2_cnt, UINT64 *lru3_cnt, UINT64 *victim_candidates, UINT64 *avoid_dealloc_cnt, UINT64 *avoid_victim_cnt, UINT64 *private_quota, UINT64 *private_cnt, UINT64 *alloc_bcb_waiter_high, UINT64 *alloc_bcb_waiter_med, UINT64 *flushed_bcbs_waiting_direct_assign, UINT64 *lfcq_big_prv_num, UINT64 *lfcq_prv_num, UINT64 *lfcq_shr_num)
daemon * create_daemon(const looper &looper_arg, entry_task *exec_p, const char *daemon_name="", entry_manager *context_manager=NULL)
static void pgbuf_lru_sanity_check(const PGBUF_LRU_LIST *lru)
int heap_get_class_info(THREAD_ENTRY *thread_p, const OID *class_oid, HFID *hfid_out, FILE_TYPE *ftype_out, char **classname_out)
Definition: heap_file.c:16733
PAGE_PTR pgptr
Definition: page_buffer.h:222
STATIC_INLINE bool pgbuf_is_bcb_victimizable(PGBUF_BCB *bcb, bool has_mutex_lock)
Definition: page_buffer.c:8464
#define PGBUF_LRU_LIST_SIZEOF
Definition: page_buffer.c:119
#define pgbuf_ordered_unfix(thread_p, watcher_object)
Definition: page_buffer.h:280
STATIC_INLINE PGBUF_LRU_LIST * pgbuf_lru_list_from_bcb(const PGBUF_BCB *bcb)
int dwb_add_page(THREAD_ENTRY *thread_p, FILEIO_PAGE *io_page_p, VPID *vpid, DWB_SLOT **p_dwb_slot)
static void pgbuf_unlatch_void_zone_bcb(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb, int thread_private_lru_index)
Definition: page_buffer.c:6259
STATIC_INLINE void pgbuf_set_bcb_page_vpid(PGBUF_BCB *bufptr, bool force_set_vpid)
Definition: page_buffer.c:4810
FILEIO_PAGE_RESERVED prv
Definition: file_io.h:194
PGBUF_HOLDER_STAT perf_stat
Definition: page_buffer.c:443
PERF_PAGE_TYPE perf_page_type
Definition: page_buffer.c:860
#define PGBUF_LRU_VICTIM_LFCQ_FLAG
Definition: page_buffer.c:956
static void pgbuf_compute_lru_vict_target(float *lru_sum_flush_priority)
#define PGBUF_CHKPT_MIN_FLUSH_RATE
Definition: page_buffer.c:304
#define PGBUF_PAGES_COUNT_THRESHOLD
#define PEEK
Definition: file_io.h:74
PERF_PAGE_TYPE
Definition: perf_monitor.h:209
int timeval_add_msec(struct timeval *added_time, const struct timeval *start_time, int msec)
Definition: porting.c:2152
TSC_TICKS start_holder_tick
Definition: page_buffer.c:856
#define PGBUF_BCB_MOVE_TO_LRU_BOTTOM_FLAG
Definition: page_buffer.c:227
callable_task< entry > entry_callable_task
#define pgbuf_ordered_unfix_and_init(thread_p, page, pg_watcher)
Definition: page_buffer.h:69
int fileio_flush_control_initialize(void)
Definition: file_io.c:658
unsigned long long num_pages_read
Definition: page_buffer.c:375
#define PGBUF_LRU_LIST_IS_OVER_QUOTA_WITH_BUFFER(list)
Definition: page_buffer.c:950
PGBUF_LATCH_MODE latch_mode
Definition: page_buffer.c:409
#define VPID_SET_NULL(vpid_ptr)
Definition: dbtype_def.h:906
#define PGBUF_MINIMUM_BUFFERS
Definition: page_buffer.c:77
void pgbuf_set_lsa_as_temporary(THREAD_ENTRY *thread_p, PAGE_PTR pgptr)
Definition: page_buffer.c:4790
void pgbuf_set_tde_algorithm(THREAD_ENTRY *thread_p, PAGE_PTR pgptr, TDE_ALGORITHM tde_algo, bool skip_logging)
Definition: page_buffer.c:4473
bool is_perf_tracking
Definition: page_buffer.c:853
static int pgbuf_initialize_hash_table(void)
Definition: page_buffer.c:5036
int pgbuf_flush_checkpoint(THREAD_ENTRY *thread_p, const LOG_LSA *flush_upto_lsa, const LOG_LSA *prev_chkpt_redo_lsa, LOG_LSA *smallest_lsa, int *flushed_page_cnt)
Definition: page_buffer.c:3552
unsigned hold_has_read_latch
Definition: page_buffer.c:422
int pgbuf_release_private_lru(THREAD_ENTRY *thread_p, const int private_idx)
void pgbuf_adjust_quotas(THREAD_ENTRY *thread_p)
static void pgbuf_init_temp_page_lsa(FILEIO_PAGE *io_page, PGLENGTH page_size)
int logtb_find_wait_msecs(int tran_index)
static PGBUF_BCB * pgbuf_allocate_bcb(THREAD_ENTRY *thread_p, const VPID *src_vpid)
Definition: page_buffer.c:7400
#define PGBUF_INIT_WATCHER(w, rank, hfid)
Definition: page_buffer.h:123
int fileio_page_check_corruption(THREAD_ENTRY *thread_p, FILEIO_PAGE *io_page, bool *is_page_corrupted)
Definition: file_io.c:11831
static int pgbuf_initialize_thrd_holder(void)
Definition: page_buffer.c:5287
PGBUF_AOUT_BUF * Aout_bottom
Definition: page_buffer.c:614
#define MEM_REGION_INIT(region, size)
Definition: memory_alloc.h:111
#define PGBUF_HOLDER_ANCHOR_SIZEOF
Definition: page_buffer.c:125
std::int64_t offset
Definition: log_lsa.hpp:37
#define PGBUF_BCB_INIT_FLAGS
Definition: page_buffer.c:255
const char ** p
Definition: dynamic_load.c:945
STATIC_INLINE bool pgbuf_is_bcb_fixed_by_any(PGBUF_BCB *bcb, bool has_mutex_lock)
Definition: page_buffer.c:8438
static int rv
Definition: page_buffer.c:95
STATIC_INLINE void pgbuf_lru_add_new_bcb_to_bottom(THREAD_ENTRY *thread_p, PGBUF_BCB *bcb, int lru_idx)
Definition: page_buffer.c:9442
PGBUF_LRU_LIST * buf_LRU_list
Definition: page_buffer.c:726
VPID * pgbuf_get_vpid_ptr(PAGE_PTR pgptr)
Definition: page_buffer.c:4609
static int pgbuf_compare_victim_list(const void *p1, const void *p2)
Definition: page_buffer.c:3128
DISK_ISVALID
Definition: disk_manager.h:53
void pgbuf_reset_temp_lsa(PAGE_PTR pgptr)
Definition: page_buffer.c:4457
#define PGBUF_HASH_SIZE
Definition: page_buffer.c:279
static void pgbuf_scan_bcb_table()
static PGBUF_BCB * pgbuf_claim_bcb_for_fix(THREAD_ENTRY *thread_p, const VPID *vpid, PAGE_FETCH_MODE fetch_mode, PGBUF_BUFFER_HASH *hash_anchor, PGBUF_FIX_PERF *perf, bool *try_again)
Definition: page_buffer.c:7617
LOG_LSA oldest_unflush_lsa
Definition: page_buffer.c:499
int db_value_domain_init(DB_VALUE *value, const DB_TYPE type, const int precision, const int scale)
Definition: db_macro.c:153
#define WAIT_FLUSH_VICTIMS_MAX_MSEC
unsigned long long num_pages_read
Definition: page_buffer.c:398
STATIC_INLINE int pgbuf_remove_thrd_holder(THREAD_ENTRY *thread_p, PGBUF_HOLDER *holder) __attribute__((ALWAYS_INLINE))
Definition: page_buffer.c:5545
int fileio_get_volume_descriptor(VOLID vol_id)
Definition: file_io.c:6488
int fileio_flush_control_add_tokens(THREAD_ENTRY *thread_p, INT64 diff_usec, int *token_gen, int *token_consumed)
Definition: file_io.c:819
bool thread_get_sort_stats_active(cubthread::entry *thread_p)