CUBRID Engine  latest
log_page_buffer.c
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Search Solution Corporation
3  * Copyright 2016 CUBRID Corporation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 /*
20  * log_page_buffer.c -
21  */
22 
23 #ident "$Id$"
24 
25 #include "config.h"
26 
27 #include <stdio.h>
28 #include <stddef.h>
29 #include <stdlib.h>
30 #include <stdarg.h>
31 #include <string.h>
32 #include <time.h>
33 #include <limits.h>
34 #include <sys/stat.h>
35 #include <sys/types.h>
36 #if defined(WINDOWS)
37 #include <io.h>
38 #else /* !WINDOWS */
39 #include <unistd.h>
40 #endif /* !WINDOWS */
41 #include <stdarg.h>
42 
43 #if defined(SOLARIS)
44 #include <netdb.h>
45 #endif /* SOLARIS */
46 
47 #if !defined(WINDOWS)
48 #include <sys/param.h>
49 #include <fcntl.h>
50 #endif /* WINDOWS */
51 
52 #include <assert.h>
53 
54 #include "porting.h"
55 #include "porting_inline.hpp"
56 #include "connection_defs.h"
57 #include "language_support.h"
58 #include "log_append.hpp"
59 #include "log_impl.h"
60 #include "log_lsa.hpp"
61 #include "log_manager.h"
62 #include "log_comm.h"
63 #include "log_volids.hpp"
64 #include "log_writer.h"
65 #include "lock_manager.h"
66 #include "log_system_tran.hpp"
67 #include "boot_sr.h"
68 #if !defined(SERVER_MODE)
69 #include "boot_cl.h"
70 #else /* !SERVER_MODE */
71 #include "connection_defs.h"
72 #include "connection_sr.h"
73 #endif
74 #include "critical_section.h"
75 #include "page_buffer.h"
76 #include "double_write_buffer.h"
77 #include "file_io.h"
78 #include "disk_manager.h"
79 #include "error_manager.h"
80 #include "xserver_interface.h"
81 #include "perf_monitor.h"
82 #include "storage_common.h"
83 #include "system_parameter.h"
84 #include "memory_alloc.h"
85 #include "memory_hash.h"
86 #include "release_string.h"
87 #include "message_catalog.h"
88 #include "msgcat_set_log.hpp"
89 #include "environment_variable.h"
90 #include "util_func.h"
91 #include "errno.h"
92 #if defined(WINDOWS)
93 #include "wintcp.h"
94 #include "connection_error.h"
95 #else /* WINDOWS */
96 #include "tcp.h"
97 #endif /* WINDOWS */
98 #include "db.h" /* for db_Connect_status */
99 #include "log_compress.h"
100 #include "event_log.h"
101 #include "tsc_timer.h"
102 #include "vacuum.h"
103 #include "thread_entry.hpp"
104 #include "thread_manager.hpp"
105 #include "crypt_opfunc.h"
106 #include "object_representation.h"
107 
108 #if !defined(SERVER_MODE)
109 #define pthread_mutex_init(a, b)
110 #define pthread_mutex_destroy(a)
111 #define pthread_mutex_lock(a) 0
112 #define pthread_mutex_unlock(a)
113 static int rv;
114 #undef COND_INIT
115 #define COND_INIT(a)
116 #undef COND_BROADCAST
117 #define COND_BROADCAST(a)
118 #undef COND_DESTROY
119 #define COND_DESTROY(a)
120 #endif /* !SERVER_MODE */
121 
122 #define logpb_log(...) if (logpb_Logging) _er_log_debug (ARG_FILE_LINE, "LOGPB: " __VA_ARGS__)
123 #define log_archive_er_log(...) \
124  if (prm_get_bool_value (PRM_ID_DEBUG_LOG_ARCHIVES)) _er_log_debug (ARG_FILE_LINE, __VA_ARGS__)
125 
126 #define LOGPB_FIND_BUFPTR(bufid) &log_Pb.buffers[(bufid)]
127 
128 
129 /* PAGES OF ACTIVE LOG PORTION */
130 #define LOGPB_HEADER_PAGE_ID (-9) /* The first log page in the infinite log sequence. It is always kept
131  * on the active portion of the log. Log records are not stored on this
132  * page. This page is backed up in all archive logs */
133 #define LOGPB_NEXT_ARCHIVE_PAGE_ID (log_Gl.hdr.nxarv_pageid)
134 #define LOGPB_FIRST_ACTIVE_PAGE_ID (log_Gl.hdr.fpageid)
135 #define LOGPB_LAST_ACTIVE_PAGE_ID (log_Gl.hdr.nxarv_pageid + log_Gl.hdr.npages - 1)
136 #define LOGPB_ACTIVE_NPAGES (log_Gl.hdr.npages)
137 
138 /*
139  * TRANSLATING LOGICAL LOG PAGES (I.E., PAGES IN THE INFINITE LOG) TO PHYSICAL
140  * PAGES IN THE CURRENT LOG FILE
141  */
142 #define LOGPB_PHYSICAL_HEADER_PAGE_ID 0
143 
144 #define LOGPB_IS_FIRST_PHYSICAL_PAGE(pageid) (logpb_to_physical_pageid(pageid) == 1)
145 
146 /* ARCHIVE LOG PAGES */
147 #define LOGPB_IS_ARCHIVE_PAGE(pageid) \
148  ((pageid) != LOGPB_HEADER_PAGE_ID && (pageid) < LOGPB_NEXT_ARCHIVE_PAGE_ID)
149 #define LOGPB_AT_NEXT_ARCHIVE_PAGE_ID(pageid) \
150  (logpb_to_physical_pageid(pageid) == log_Gl.hdr.nxarv_phy_pageid)
151 
152 #define ARV_PAGE_INFO_TABLE_SIZE 256
153 
154 #define LOG_LAST_APPEND_PTR() ((char *) log_Gl.append.log_pgptr->area + LOGAREA_SIZE)
155 
156 #define LOG_APPEND_ALIGN(thread_p, current_setdirty) \
157  do { \
158  if ((current_setdirty) == LOG_SET_DIRTY) \
159  { \
160  logpb_set_dirty ((thread_p), log_Gl.append.log_pgptr); \
161  } \
162  log_Gl.hdr.append_lsa.offset = DB_ALIGN (log_Gl.hdr.append_lsa.offset, DOUBLE_ALIGNMENT); \
163  if (log_Gl.hdr.append_lsa.offset >= (int) LOGAREA_SIZE) \
164  { \
165  logpb_next_append_page((thread_p), LOG_DONT_SET_DIRTY); \
166  } \
167  } while (0)
168 
169 #define LOG_APPEND_ADVANCE_WHEN_DOESNOT_FIT(thread_p, length) \
170  do { \
171  if (log_Gl.hdr.append_lsa.offset + (int) (length) >= (int) LOGAREA_SIZE) \
172  { \
173  logpb_next_append_page ((thread_p), LOG_DONT_SET_DIRTY); \
174  } \
175  } while (0)
176 
177 #define LOG_APPEND_SETDIRTY_ADD_ALIGN(thread_p, add) \
178  do { \
179  log_Gl.hdr.append_lsa.offset += (add); \
180  LOG_APPEND_ALIGN ((thread_p), LOG_SET_DIRTY); \
181  } while (0)
182 
183 /* LOG BUFFER STRUCTURE */
184 
185 typedef struct log_buffer LOG_BUFFER;
187 {
188  volatile LOG_PAGEID pageid; /* Logical page of the log. (Page identifier of the infinite log) */
189  volatile LOG_PHY_PAGEID phy_pageid; /* Physical pageid for the active log portion */
190  bool dirty; /* Is page dirty */
191  LOG_PAGE *logpage; /* The actual buffered log page */
192 };
193 
194 /* Status for append record status during logpb_flush_all_append_pages.
195  * In normal conditions, only two statuses are used:
196  * - LOGPB_APPENDREC_IN_PROGRESS (set when append record is started)
197  * - LOGPB_APPENDREC_SUCCESS (set when append record is ended).
198  *
199  * If a log record append is not ended during flush, then we'll transition the states in the following order:
200  * - LOGPB_APPENDREC_IN_PROGRESS => LOGPB_APPENDREC_PARTIAL_FLUSHED_END_OF_LOG
201  * prev_lsa record is overwritten with end of log record and flushed to disk.
202  * - LOGPB_APPENDREC_PARTIAL_FLUSHED_END_OF_LOG => LOGPB_APPENDREC_PARTIAL_ENDED
203  * incomplete log record is now completely appended. logpb_flush_all_append_pages is called again.
204  * - LOGPB_APPENDREC_PARTIAL_ENDED => LOGPB_APPENDREC_PARTIAL_FLUSHED_ORIGINAL
205  * at the end of last flush, the prev_lsa record is restored and its page is flushed again to disk.
206  * - LOGPB_APPENDREC_PARTIAL_FLUSHED_ORIGINAL => LOGPB_APPENDREC_SUCCESS
207  * set the normal state of log record successful append.
208  */
209 typedef enum
210 {
211  LOGPB_APPENDREC_IN_PROGRESS, /* append record started */
212 
213  /* only for partial appended record flush: */
214  LOGPB_APPENDREC_PARTIAL_FLUSHED_END_OF_LOG, /* when flush is forced and record is not fully appended, it is
215  * replaced with end of log and its header page is flushed. */
216  LOGPB_APPENDREC_PARTIAL_ENDED, /* all record has been successfully appended */
217  LOGPB_APPENDREC_PARTIAL_FLUSHED_ORIGINAL, /* original header page is flushed */
218 
219  LOGPB_APPENDREC_SUCCESS /* finished appending record (in a stable way) */
221 
222 /* used to handle records partially written when logpb_flush_all_append_pages is forced */
225 {
227 
228  char buffer_log_page[IO_MAX_PAGE_SIZE + MAX_ALIGNMENT];
232 };
233 
234 /* Global structure to trantable, log buffer pool, etc */
237 {
238  LOG_BUFFER *buffers; /* Log buffer pool */
242  int num_buffers; /* Number of log buffers */
243 
245 };
246 
247 typedef struct arv_page_info
248 {
249  int arv_num;
252 } ARV_PAGE_INFO;
253 
254 typedef struct
255 {
257  int rear;
260 
261 
262 #define LOG_MAX_LOGINFO_LINE (PATH_MAX * 4)
263 
264 /* skip prompting for archive log location */
265 #if defined(SERVER_MODE)
267 #else
269 #endif
270 
272 
275 
276 static bool logpb_Initialized = false;
277 static bool logpb_Logging = false;
278 
279 /*
280  * Functions
281  */
282 
284 static bool logpb_is_dirty (THREAD_ENTRY * thread_p, LOG_PAGE * log_pgptr);
285 #if !defined(NDEBUG)
286 static bool logpb_is_any_dirty (THREAD_ENTRY * thread_p);
287 #endif /* !NDEBUG */
288 #if defined(CUBRID_DEBUG)
289 static bool logpb_is_any_fix (THREAD_ENTRY * thread_p);
290 #endif /* CUBRID_DEBUG */
291 static void logpb_dump_information (FILE * out_fp);
292 static void logpb_dump_to_flush_page (FILE * out_fp);
293 static void logpb_dump_pages (FILE * out_fp);
294 static void logpb_initialize_backup_info (LOG_HEADER * loghdr);
295 static LOG_PAGE **logpb_writev_append_pages (THREAD_ENTRY * thread_p, LOG_PAGE ** to_flush, DKNPAGES npages);
297 static void logpb_set_unavailable_archive (THREAD_ENTRY * thread_p, int arv_num);
298 static void logpb_dismount_log_archive (THREAD_ENTRY * thread_p);
299 static bool logpb_is_archive_available (THREAD_ENTRY * thread_p, int arv_num);
300 static void logpb_archive_active_log (THREAD_ENTRY * thread_p);
301 static int logpb_remove_archive_logs_internal (THREAD_ENTRY * thread_p, int first, int last, const char *info_reason);
302 static void logpb_append_archives_removed_to_log_info (int first, int last, const char *info_reason);
303 static int logpb_verify_length (const char *db_fullname, const char *log_path, const char *log_prefix);
304 static int logpb_backup_for_volume (THREAD_ENTRY * thread_p, VOLID volid, LOG_LSA * chkpt_lsa,
305  FILEIO_BACKUP_SESSION * session, bool only_updated);
306 static int logpb_update_backup_volume_info (const char *bkupinfo_file_name);
307 static int logpb_start_where_path (const char *to_db_fullname, const char *toext_path, const char **toext_name,
308  char **ext_path, char **alloc_extpath, const char *fileof_vols_and_wherepaths,
309  FILE ** where_paths_fp);
310 static int logpb_next_where_path (const char *to_db_fullname, const char *toext_path, const char *ext_name,
311  char *ext_path, const char *fileof_vols_and_wherepaths, FILE * where_paths_fp,
312  int num_perm_vols, VOLID volid, char *from_volname, char *to_volname);
313 static int logpb_copy_volume (THREAD_ENTRY * thread_p, VOLID from_volid, const char *tonew_volname, INT64 * db_creation,
314  LOG_LSA * vol_chkpt_lsa);
315 static bool logpb_check_if_exists (const char *fname, char *first_vol);
316 #if defined(SERVER_MODE)
317 static int logpb_backup_needed_archive_logs (THREAD_ENTRY * thread_p, FILEIO_BACKUP_SESSION * session,
318  int first_arv_num, int last_arv_num);
319 #endif /* SERVER_MODE */
320 static bool logpb_remote_ask_user_before_delete_volumes (THREAD_ENTRY * thread_p, const char *volpath);
321 static int logpb_initialize_flush_info (void);
322 static void logpb_finalize_flush_info (void);
323 static void logpb_finalize_writer_info (void);
324 static void logpb_dump_log_header (FILE * outfp);
325 static void logpb_dump_parameter (FILE * outfp);
326 static void logpb_dump_runtime (FILE * outfp);
327 static void logpb_initialize_log_buffer (LOG_BUFFER * log_buffer_p, LOG_PAGE * log_pg);
328 
329 static int logpb_check_stop_at_time (FILEIO_BACKUP_SESSION * session, time_t stop_at, time_t backup_time);
330 static void logpb_write_toflush_pages_to_archive (THREAD_ENTRY * thread_p);
331 static int logpb_add_archive_page_info (THREAD_ENTRY * thread_p, int arv_num, LOG_PAGEID start_page,
332  LOG_PAGEID end_page);
333 static int logpb_get_archive_num_from_info_table (THREAD_ENTRY * thread_p, LOG_PAGEID page_id);
334 
335 static int logpb_flush_all_append_pages (THREAD_ENTRY * thread_p);
336 static int logpb_append_next_record (THREAD_ENTRY * thread_p, LOG_PRIOR_NODE * ndoe);
337 
338 static void logpb_start_append (THREAD_ENTRY * thread_p, LOG_RECORD_HEADER * header);
339 static void logpb_end_append (THREAD_ENTRY * thread_p, LOG_RECORD_HEADER * header);
340 static void logpb_append_data (THREAD_ENTRY * thread_p, int length, const char *data);
341 static void logpb_append_crumbs (THREAD_ENTRY * thread_p, int num_crumbs, const LOG_CRUMB * crumbs);
342 static void logpb_next_append_page (THREAD_ENTRY * thread_p, LOG_SETDIRTY current_setdirty);
344 static int logpb_append_prior_lsa_list (THREAD_ENTRY * thread_p, LOG_PRIOR_NODE * list);
345 static int logpb_copy_page (THREAD_ENTRY * thread_p, LOG_PAGEID pageid, LOG_CS_ACCESS_MODE access_mode,
346  LOG_PAGE * log_pgptr);
347 
348 static void logpb_fatal_error_internal (THREAD_ENTRY * thread_p, bool log_exit, bool need_flush, const char *file_name,
349  const int lineno, const char *fmt, va_list ap);
350 
351 static int logpb_copy_log_header (THREAD_ENTRY * thread_p, LOG_HEADER * to_hdr, const LOG_HEADER * from_hdr);
354 static int logpb_fetch_header_from_active_log (THREAD_ENTRY * thread_p, const char *db_fullname,
355  const char *logpath, const char *prefix_logname, LOG_HEADER * hdr,
356  LOG_PAGE * log_pgptr);
357 static int logpb_compute_page_checksum (THREAD_ENTRY * thread_p, LOG_PAGE * log_pgptr, int *checksum_crc32);
358 static int logpb_page_has_valid_checksum (THREAD_ENTRY * thread_p, LOG_PAGE * log_pgptr, bool * has_valid_checksum);
359 
360 static bool logpb_is_log_active_from_backup_useful (THREAD_ENTRY * thread_p, const char *active_log_path,
361  const char *db_full_name);
362 static int logpb_peek_header_of_active_log_from_backup (THREAD_ENTRY * thread_p, const char *active_log_path,
363  LOG_HEADER * hdr);
364 
365 /*
366  * FUNCTIONS RELATED TO LOG BUFFERING
367  *
368  */
369 
370 /*
371  * logpb_get_log_buffer_index - get the current index in the log buffer
372  * return: index number
373  * log_pageid (in) : the pageid number
374  */
375 STATIC_INLINE int
377 {
378  return log_pageid % log_Pb.num_buffers;
379 }
380 
381 /*
382  * logpb_get_log_buffer - get the buffer from the log page
383  * return: the coresponding buffer
384  * log_pg (in) : the log page
385  * NOTE: the function finds the index of the log page and returns
386  * the coresponding buffer
387  */
390 {
391  int index;
392 
393  if (log_pg == log_Pb.header_page)
394  {
395  return &log_Pb.header_buffer;
396  }
397 
398  assert ((UINT64) ((char *) log_pg - (char *) log_Pb.pages_area) / LOG_PAGESIZE < INT_MAX);
399  index = (int) ((UINT64) ((char *) log_pg - (char *) log_Pb.pages_area) / LOG_PAGESIZE);
400 
401  /* Safe guard: index is valid. */
402  assert (index >= 0 && index < log_Pb.num_buffers);
403  /* Safe guard: log_pg is correctly aligned. */
404  assert ((char *) log_Pb.pages_area + (UINT64) LOG_PAGESIZE * index == (char *) log_pg);
405 
406  return &log_Pb.buffers[index];
407 }
408 
409 /*
410  * logpb_initialize_log_buffer -
411  *
412  * return: nothing
413  *
414  * log_buffer_p(in/oiut):
415  *
416  * NOTE:
417  *
418  */
419 static void
421 {
422  log_buffer_p->pageid = NULL_PAGEID;
423  log_buffer_p->phy_pageid = NULL_PAGEID;
424  log_buffer_p->dirty = false;
425  log_buffer_p->logpage = log_pg;
426  log_buffer_p->logpage->hdr.logical_pageid = NULL_PAGEID;
427  log_buffer_p->logpage->hdr.offset = NULL_OFFSET;
428  log_buffer_p->logpage->hdr.flags = 0;
429 }
430 
431 /*
432  * logpb_compute_page_checksum - Computes log page checksum.
433  * return: error code
434  * thread_p (in) : thread entry
435  * log_pgptr (in) : log page pointer
436  * checksum_crc32(out): computed checksum
437  * Note: Currently CRC32 is used as checksum.
438  * Note: any changes to this requires changes to logwr_check_page_checksum
439  */
440 static int
441 logpb_compute_page_checksum (THREAD_ENTRY * thread_p, LOG_PAGE * log_pgptr, int *checksum_crc32)
442 {
443  int error_code = NO_ERROR, saved_checksum_crc32;
444  const int block_size = 4096;
445  const int max_num_pages = IO_MAX_PAGE_SIZE / block_size;
446  const int sample_nbytes = 16;
447  int sampling_offset;
448  char buf[max_num_pages * sample_nbytes * 2];
449  const int num_pages = LOG_PAGESIZE / block_size;
450  const size_t sizeof_buf = num_pages * sample_nbytes * 2;
451 
452  assert (log_pgptr != NULL && checksum_crc32 != NULL);
453 
454  /* Save the old page checksum. */
455  saved_checksum_crc32 = log_pgptr->hdr.checksum;
456 
457  /* Resets checksum to not affect the new computation. */
458  log_pgptr->hdr.checksum = 0;
459 
460  char *p = buf;
461  for (int i = 0; i < num_pages; i++)
462  {
463  // first
464  sampling_offset = (i * block_size);
465  memcpy (p, ((char *) log_pgptr) + sampling_offset, sample_nbytes);
466  p += sample_nbytes;
467 
468  // last
469  sampling_offset = (i * block_size) + (block_size - sample_nbytes);
470  memcpy (p, ((char *) log_pgptr) + sampling_offset, sample_nbytes);
471  p += sample_nbytes;
472  }
473 
474  crypt_crc32 ((char *) buf, (int) sizeof_buf, checksum_crc32);
475 
476  /* Restores the saved checksum */
477  log_pgptr->hdr.checksum = saved_checksum_crc32;
478 
479  return error_code;
480 }
481 
482 /*
483  * logpb_set_page_checksum - Set log page checksum.
484  * return: error code
485  * thread_p (in) : thread entry
486  * log_pgptr (in) : log page pointer
487  * Note: Currently CRC32 is used as checksum.
488  */
489 int
491 {
492  int error_code = NO_ERROR, checksum_crc32;
493 
494  assert (log_pgptr != NULL);
495 
496  /* Computes the page checksum. */
497  error_code = logpb_compute_page_checksum (thread_p, log_pgptr, &checksum_crc32);
498  if (error_code != NO_ERROR)
499  {
500  return error_code;
501  }
502 
503  log_pgptr->hdr.checksum = checksum_crc32;
504  logpb_log ("logpb_set_page_checksum: log page %lld has checksum = %d\n",
505  (long long int) log_pgptr->hdr.logical_pageid, checksum_crc32);
506 
507  return NO_ERROR;
508 }
509 
510 /*
511  * logpb_page_has_valid_checksum - Check whether the log page checksum is valid.
512  * return: error code
513  * thread_p(in): thread entry
514  * log_pgptr(in): the log page
515  * has_valid_checksum(out): true, if has valid checksum.
516  */
517 static int
518 logpb_page_has_valid_checksum (THREAD_ENTRY * thread_p, LOG_PAGE * log_pgptr, bool * has_valid_checksum)
519 {
520  int checksum_crc32, error_code = NO_ERROR;
521 
522  assert (log_pgptr != NULL && has_valid_checksum != NULL);
523 
524  error_code = logpb_compute_page_checksum (thread_p, log_pgptr, &checksum_crc32);
525  if (error_code != NO_ERROR)
526  {
527  return error_code;
528  }
529 
530  *has_valid_checksum = (checksum_crc32 == log_pgptr->hdr.checksum);
531  if (*has_valid_checksum == false)
532  {
533  logpb_log ("logpb_page_has_valid_checksum: log page %lld has checksum = %d, computed checksum = %d\n",
534  (long long int) log_pgptr->hdr.logical_pageid, log_pgptr->hdr.checksum, checksum_crc32);
535  }
536 
537  return NO_ERROR;
538 }
539 
540 /*
541  * logpb_initialize_pool - Initialize the log buffer pool
542  *
543  * return: NO_ERROR if all OK, ER_ status otherwise
544  *
545  * NOTE:Initialize the log buffer pool. All resident pages are invalidated.
546  */
547 int
549 {
550  int error_code = NO_ERROR;
551  int i;
552  LOG_GROUP_COMMIT_INFO *group_commit_info = &log_Gl.group_commit_info;
553  LOGWR_INFO *writer_info = log_Gl.writer_info;
554  size_t size;
555 
556  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
557 
559 
560  if (logpb_Initialized == true)
561  {
562  logpb_finalize_pool (thread_p);
563  }
564 
565  assert (log_Pb.pages_area == NULL);
566  assert (logpb_Initialized == false);
567 
569 
570  /*
571  * Create an area to keep the number of desired buffers
572  */
574 
575  /* allocate a pointer array to point to each buffer */
576  size = ((size_t) log_Pb.num_buffers * sizeof (*log_Pb.buffers));
577  log_Pb.buffers = (LOG_BUFFER *) malloc (size);
578  if (log_Pb.buffers == NULL)
579  {
582  }
583 
584  size = ((size_t) log_Pb.num_buffers * (LOG_PAGESIZE));
585  log_Pb.pages_area = (LOG_PAGE *) malloc (size);
586  if (log_Pb.pages_area == NULL)
587  {
588  free_and_init (log_Pb.buffers);
591  }
592 
593  /* Initialize every new buffer */
594  memset (log_Pb.pages_area, LOG_PAGE_INIT_VALUE, size);
595  for (i = 0; i < log_Pb.num_buffers; i++)
596  {
598  (LOG_PAGE *) ((char *) log_Pb.pages_area + (UINT64) i * (LOG_PAGESIZE)));
599  }
600 
601  size = LOG_PAGESIZE;
602  log_Pb.header_page = (LOG_PAGE *) malloc (size);
603  if (log_Pb.header_page == NULL)
604  {
605  free_and_init (log_Pb.buffers);
606  free_and_init (log_Pb.pages_area);
609  }
610 
611  memset (log_Pb.header_page, LOG_PAGE_INIT_VALUE, size);
613 
614  error_code = logpb_initialize_flush_info ();
615  if (error_code != NO_ERROR)
616  {
617  goto error;
618  }
619 
620  /* Initialize partial append */
624 
625 #if !defined (NDEBUG)
626  // suppress valgrind complaint.
628 #endif // DEBUG
629 
630  logpb_Initialized = true;
631  pthread_mutex_init (&log_Gl.chkpt_lsa_lock, NULL);
632 
633  pthread_cond_init (&group_commit_info->gc_cond, NULL);
634  pthread_mutex_init (&group_commit_info->gc_mutex, NULL);
635 
636  pthread_mutex_init (&writer_info->wr_list_mutex, NULL);
637 
638  pthread_cond_init (&writer_info->flush_start_cond, NULL);
639  pthread_mutex_init (&writer_info->flush_start_mutex, NULL);
640 
641  pthread_cond_init (&writer_info->flush_wait_cond, NULL);
642  pthread_mutex_init (&writer_info->flush_wait_mutex, NULL);
643 
644  pthread_cond_init (&writer_info->flush_end_cond, NULL);
645  pthread_mutex_init (&writer_info->flush_end_mutex, NULL);
646 
647  writer_info->is_init = true;
648 
649  return error_code;
650 
651 error:
652 
653  logpb_finalize_pool (thread_p);
654  logpb_fatal_error (thread_p, false, ARG_FILE_LINE, "log_pbpool_init");
655 
656  return error_code;
657 }
658 
659 /*
660  * logpb_finalize_pool - TERMINATES THE LOG BUFFER POOL
661  *
662  * return: nothing
663  *
664  * NOTE:Terminate the log buffer pool. All log resident pages are invalidated.
665  */
666 void
668 {
670 
671  if (logpb_Initialized == false)
672  {
673  /* logpb already finalized */
674  return;
675  }
676 
677  if (log_Gl.append.log_pgptr != NULL)
678  {
680  }
683  /* copy log_Gl.append.prev_lsa to log_Gl.prior_info.prev_lsa */
685 
686 #if defined(CUBRID_DEBUG)
687  if (logpb_is_any_dirty (thread_p) == true || logpb_is_any_fix (thread_p) == true)
688  {
689  er_log_debug (ARG_FILE_LINE, "log_pbpool_final: Log Buffer pool contains dirty or fixed pages at the end.\n");
690  logpb_dump (thread_p, stdout);
691  }
692 #endif /* CUBRID_DEBUG */
693 
694  free_and_init (log_Pb.buffers);
695  free_and_init (log_Pb.pages_area);
696  free_and_init (log_Pb.header_page);
697  log_Pb.num_buffers = 0;
698  logpb_Initialized = false;
700 
701  pthread_mutex_destroy (&log_Gl.chkpt_lsa_lock);
702 
704  pthread_cond_destroy (&log_Gl.group_commit_info.gc_cond);
705 
707 
709 }
710 
711 /*
712  * logpb_is_pool_initialized - Find out if buffer pool has been initialized
713  *
714  * return:
715  *
716  * NOTE:Find out if the buffer pool has been initialized.
717  */
718 bool
720 {
722 
723  return logpb_Initialized;
724 }
725 
726 /*
727  * logpb_invalidate_pool - Invalidate all buffers in buffer pool
728  *
729  * return: Pointer to the page or NULL
730  *
731  * NOTE:Invalidate all unfixed buffers in the buffer pool.
732  * This is needed when we reset the log header information.
733  */
734 void
736 {
737  LOG_BUFFER *log_bufptr; /* A log buffer */
738  int i;
739 
740  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
741 
742  if (logpb_Initialized == false)
743  {
744  return;
745  }
746 
747  /*
748  * Flush any append dirty buffers at this moment.
749  * Then, invalidate any buffer that it is not fixed and dirty
750  */
751  logpb_flush_pages_direct (thread_p);
752 
753  for (i = 0; i < log_Pb.num_buffers; i++)
754  {
755  log_bufptr = LOGPB_FIND_BUFPTR (i);
756  if (log_bufptr->pageid != NULL_PAGEID && !log_bufptr->dirty == false)
757  {
758  logpb_initialize_log_buffer (log_bufptr, log_bufptr->logpage);
759  }
760  }
761 }
762 
763 
764 /*
765  * logpb_create_page - Create a log page on a log buffer
766  *
767  * return: Pointer to the page or NULL
768  *
769  * pageid(in): Page identifier
770  *
771  * NOTE:Creates the log page identified by pageid on a log buffer and return such buffer.
772  * Just initializes log buffer hdr,
773  * To read a page from disk is not needed.
774  */
775 LOG_PAGE *
777 {
778  return logpb_locate_page (thread_p, pageid, NEW_PAGE);
779 }
780 
781 /*
782  * logpb_locate_page - Fetch a log page
783  *
784  * return: Pointer to the page or NULL
785  *
786  * pageid(in): Page identifier
787  * fetch_mode(in): Is this a new log page ?. That is, can we avoid the I/O
788  *
789  * NOTE: first, the function checks if the pageid is the header page id.
790  * if it is not, it brings the coresponding page from the log page buffer.
791  * if the actual pageid differs from the buffer pageid, it means that it should
792  * be invalidated - it contains another page - and it is flushed to disk, if it
793  * is dirty. Now, if the pageid is null , we have a clear log page. If the fetch
794  * mode is NEW_PAGE, it set the fields in the buffer, else, if it is OLD_PAGE,
795  * it brings the page from the disk. The last case is if the page is not NULL, and
796  * it is equal with the pageid. This means it is an OLD_PAGE request, and it returns
797  * that page.
798  */
799 static LOG_PAGE *
801 {
802  LOG_BUFFER *log_bufptr = NULL; /* A log buffer */
803  LOG_PHY_PAGEID phy_pageid = NULL_PAGEID; /* The corresponding physical page */
804  bool is_perf_tracking;
805  TSC_TICKS start_tick, end_tick;
806  TSCTIMEVAL tv_diff;
807  UINT64 fix_wait_time;
809  int index;
810 
811  logpb_log ("called logpb_locate_page for pageid %lld, fetch_mode=%s", (long long int) pageid,
812  fetch_mode == NEW_PAGE ? "new_page" : "old_page\n");
813 
814  is_perf_tracking = perfmon_is_perf_tracking ();
815  if (is_perf_tracking)
816  {
817  tsc_getticks (&start_tick);
818  }
819 
820  assert (pageid != NULL_PAGEID);
821  assert ((fetch_mode == NEW_PAGE) || (fetch_mode == OLD_PAGE));
822  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
823 
824  if (pageid == LOGPB_HEADER_PAGE_ID)
825  {
826  log_bufptr = &log_Pb.header_buffer;
827  }
828  else
829  {
830  index = logpb_get_log_buffer_index ((int) pageid);
831  if (index >= 0 && index < log_Pb.num_buffers)
832  {
833  log_bufptr = &log_Pb.buffers[index];
834  }
835  else
836  {
838  return NULL;
839  }
840 
841  }
842  assert (log_bufptr != NULL);
843 
844  if (log_bufptr->pageid != NULL_PAGEID && log_bufptr->pageid != pageid)
845  {
846  if (log_bufptr->dirty == true)
847  {
848  /* should not happen */
849  assert_release (false);
850  logpb_log ("logpb_locate_page: fatal error, victimizing dirty log page %lld.\n",
851  (long long int) log_bufptr->pageid);
852 
853  if (logpb_write_page_to_disk (thread_p, log_bufptr->logpage, log_bufptr->pageid) != NO_ERROR)
854  {
855  assert_release (false);
856  return NULL;
857  }
858  log_bufptr->dirty = false;
860  }
861 
862  log_bufptr->pageid = NULL_PAGEID; /* invalidate buffer */
864  }
865 
866  if (log_bufptr->pageid == NULL_PAGEID)
867  {
868  if (fetch_mode == NEW_PAGE)
869  {
870  /* Fills log page with 0xff, for checksum consistency. */
871  memset (log_bufptr->logpage, LOG_PAGE_INIT_VALUE, LOG_PAGESIZE);
872  log_bufptr->logpage->hdr.logical_pageid = pageid;
873  log_bufptr->logpage->hdr.offset = NULL_OFFSET;
874  log_bufptr->logpage->hdr.flags = 0;
875  }
876  else
877  {
878  stat_page_found = PERF_PAGE_MODE_OLD_LOCK_WAIT;
879  if (logpb_read_page_from_file (thread_p, pageid, LOG_CS_FORCE_USE, log_bufptr->logpage) != NO_ERROR)
880  {
881  return NULL;
882  }
883  }
884  phy_pageid = logpb_to_physical_pageid (pageid);
885  log_bufptr->phy_pageid = phy_pageid;
886  log_bufptr->pageid = pageid;
887  }
888  else
889  {
890  assert (fetch_mode == OLD_PAGE);
891  assert (log_bufptr->pageid == pageid);
892  logpb_log ("logpb_locate_page using log buffer entry for pageid = %lld", pageid);
893  }
894 
896  if (is_perf_tracking)
897  {
898  tsc_getticks (&end_tick);
899  tsc_elapsed_time_usec (&tv_diff, end_tick, start_tick);
900  fix_wait_time = tv_diff.tv_sec * 1000000LL + tv_diff.tv_usec;
901  if (fix_wait_time > 0)
902  {
903  perfmon_pbx_fix_acquire_time (thread_p, PAGE_LOG, stat_page_found, PERF_HOLDER_LATCH_READ,
904  PERF_UNCONDITIONAL_FIX_WITH_WAIT, fix_wait_time);
905  }
906  }
907 
908  ASSERT_ALIGN (log_bufptr->logpage->area, MAX_ALIGNMENT);
909  return log_bufptr->logpage;
910 }
911 
912 /*
913  * logpb_set_dirty - Mark the current page dirty
914  *
915  * return: nothing
916  *
917  * log_pgptr(in): Log page pointer
918  *
919  * NOTE:Mark the current log page as dirty.
920  */
921 void
922 logpb_set_dirty (THREAD_ENTRY * thread_p, LOG_PAGE * log_pgptr)
923 {
924  LOG_BUFFER *bufptr; /* Log buffer associated with given page */
925 
926  /* Get the address of the buffer from the page. */
927  bufptr = logpb_get_log_buffer (log_pgptr);
928  if (!bufptr->dirty)
929  {
930  logpb_log ("dirty flag set for pageid = %lld\n", (long long int) bufptr->pageid);
931  }
932 #if defined(CUBRID_DEBUG)
933  if (bufptr->pageid != LOGPB_HEADER_PAGE_ID
935  {
937  }
938 #endif /* CUBRID_DEBUG */
939 
940  bufptr->dirty = true;
941 }
942 
943 /*
944  * logpb_is_dirty - Find if current log page pointer is dirty
945  *
946  * return:
947  *
948  * log_pgptr(in): Log page pointer
949  *
950  * NOTE:Find if the current log page is dirty.
951  */
952 static bool
953 logpb_is_dirty (THREAD_ENTRY * thread_p, LOG_PAGE * log_pgptr)
954 {
955  LOG_BUFFER *bufptr; /* Log buffer associated with given page */
956  bool is_dirty;
957 
958  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
959 
960  /* Get the address of the buffer from the page. */
961  bufptr = logpb_get_log_buffer (log_pgptr);
962  is_dirty = (bool) bufptr->dirty;
963 
964  return is_dirty;
965 }
966 
967 #if !defined(NDEBUG)
968 /*
969  * logpb_is_any_dirty - FIND IF ANY LOG BUFFER IS DIRTY
970  *
971  * return:
972  *
973  * NOTE:Find if any log buffer is dirty.
974  */
975 static bool
977 {
978  LOG_BUFFER *bufptr; /* A log buffer */
979  int i;
980  bool ret;
981 
982  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
983 
984  ret = false;
985  for (i = 0; i < log_Pb.num_buffers; i++)
986  {
987  bufptr = LOGPB_FIND_BUFPTR (i);
988  if (bufptr->dirty == true)
989  {
990  ret = true;
991  break;
992  }
993  }
994 
995  return ret;
996 }
997 #endif /* !NDEBUG || CUBRID_DEBUG */
998 
999 #if defined(CUBRID_DEBUG)
1000 /*
1001  * logpb_is_any_fix - Find if any log buffer is fixed
1002  *
1003  * return:
1004  *
1005  * NOTE:Find if any buffer is fixed
1006  */
1007 static bool
1008 logpb_is_any_fix (THREAD_ENTRY * thread_p)
1009 {
1010  LOG_BUFFER *bufptr; /* A log buffer */
1011  int i, rv;
1012  bool ret;
1013 
1014  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
1015 
1016  ret = false;
1017  for (i = 0; i < log_Pb.num_buffers; i++)
1018  {
1019  bufptr = LOGPB_FIND_BUFPTR (i);
1020  if (bufptr->pageid != NULL_PAGEID)
1021  {
1022  ret = true;
1023  break;
1024  }
1025  }
1026 
1027  return ret;
1028 }
1029 #endif /* CUBRID_DEBUG */
1030 
1031 /*
1032  * logpb_flush_page - Flush a page of the active portion of the log to disk
1033  *
1034  * return: nothing
1035  *
1036  * log_pgptr(in): Log page pointer
1037  *
1038  * NOTE:The log page (of the active portion of the log) associated
1039  * with pageptr is written out to disk and is optionally freed.
1040  */
1041 int
1042 logpb_flush_page (THREAD_ENTRY * thread_p, LOG_PAGE * log_pgptr)
1043 {
1044  LOG_BUFFER *bufptr; /* Log buffer associated with given page */
1045 
1046  /* Get the address of the buffer from the page. */
1047  bufptr = logpb_get_log_buffer (log_pgptr);
1048 
1049  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
1050 
1051  logpb_log ("called logpb_flush_page for pageid = %lld\n", (long long int) bufptr->pageid);
1052 
1053 #if defined(CUBRID_DEBUG)
1054  if (bufptr->pageid != LOGPB_HEADER_PAGE_ID
1056  {
1059  }
1060  if (bufptr->phy_pageid == NULL_PAGEID || bufptr->phy_pageid != logpb_to_physical_pageid (bufptr->pageid))
1061  {
1062  /* Bad physical log page for such logical page */
1064  logpb_fatal_error (thread_p, true, ARG_FILE_LINE, "logpb_flush_page");
1065  return ER_LOG_PAGE_CORRUPTED;
1066  }
1067 #endif /* CUBRID_DEBUG */
1068 
1069  if (bufptr->dirty == true)
1070  {
1071  /*
1072  * The buffer is dirty, flush it
1073  */
1074 
1075  /*
1076  * Even when the log has been open with the o_sync option, force a sync
1077  * since some Operationg system (HP) seems that does not have the effect
1078  * of forcing the page to disk without doing fync
1079  */
1080 
1081  if (logpb_write_page_to_disk (thread_p, log_pgptr, bufptr->pageid) != NO_ERROR)
1082  {
1083  goto error;
1084  }
1085  else
1086  {
1087  bufptr->dirty = false;
1088  }
1089  }
1090 
1091  return NO_ERROR;
1092 
1093 error:
1094 
1095  return ER_FAILED;
1096 }
1097 
1098 /*
1099  * logpb_get_page_id - Logical pageid of log buffer/page
1100  *
1101  * return: pageid
1102  *
1103  * log_pgptr(in): Log page pointer
1104  *
1105  * NOTE:The page identifier of the given log page/buffer.
1106  * The page is always fix when this funtion is called.
1107  * In replacement, the page cannot be replaced because fix > 0.
1108  * So, it isn't needed to lock mutex.
1109  */
1110 LOG_PAGEID
1112 {
1113  LOG_BUFFER *bufptr; /* Log buffer associated with given page */
1114 
1115  bufptr = logpb_get_log_buffer (log_pgptr);
1116 
1117  return bufptr->pageid;
1118 }
1119 
1120 /*
1121  * logpb_dump - DUMP THE LOG PAGE BUFFER POOL
1122  *
1123  * return: nothing
1124  *
1125  * NOTE:Dump the log page buffer pool. This function is used for debugging purposes.
1126  */
1127 void
1128 logpb_dump (THREAD_ENTRY * thread_p, FILE * out_fp)
1129 {
1130  if (logpb_Initialized == false)
1131  {
1132  return;
1133  }
1134 
1135  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
1136 
1137  logpb_dump_information (out_fp);
1138 
1139  if (log_Gl.flush_info.num_toflush > 0)
1140  {
1141  logpb_dump_to_flush_page (out_fp);
1142  }
1143 
1144  (void) fprintf (out_fp, "\n\n");
1145  (void) fprintf (out_fp, "Buf Log_Pageid Phy_pageid Drt Rct Bufaddr Pagearea HDR:Pageid offset\n");
1146 
1147  logpb_dump_pages (out_fp);
1148 }
1149 
1150 /*
1151  * logpb_dump_information -
1152  *
1153  * return: nothing
1154  *
1155  * NOTE:
1156  */
1157 static void
1158 logpb_dump_information (FILE * out_fp)
1159 {
1160  long long int append;
1161  int i;
1162 
1163  fprintf (out_fp, "\n\n ** DUMP OF LOG BUFFER POOL INFORMATION **\n\n");
1164 
1165  fprintf (out_fp, "\nHash table dump\n");
1166  for (i = 0; i < log_Pb.num_buffers; i++)
1167  {
1168  fprintf (out_fp, "Pageid = %5lld, Address = %p\n", (long long int) i, (void *) &log_Pb.buffers[i]);
1169  }
1170  fprintf (out_fp, "\n\n");
1171 
1172  fprintf (out_fp, " Next IO_LSA = %lld|%d, Current append LSA = %lld|%d, Prev append LSA = %lld|%d\n"
1173  " Prior LSA = %lld|%d, Prev prior LSA = %lld|%d\n\n",
1174  (long long int) log_Gl.append.get_nxio_lsa ().pageid, (int) log_Gl.append.get_nxio_lsa ().offset,
1175  (long long int) log_Gl.hdr.append_lsa.pageid, (int) log_Gl.hdr.append_lsa.offset,
1176  (long long int) log_Gl.append.prev_lsa.pageid, (int) log_Gl.append.prev_lsa.offset,
1178  (long long int) log_Gl.prior_info.prev_lsa.pageid, (int) log_Gl.prior_info.prev_lsa.offset);
1179 
1180  if (log_Gl.append.log_pgptr == NULL)
1181  {
1182  append = NULL_PAGEID;
1183  }
1184  else
1185  {
1187  }
1188 
1189  fprintf (out_fp, " Append to_flush array: max = %d, num_active = %d\n"
1190  " Current append page = %lld\n", log_Gl.flush_info.max_toflush, log_Gl.flush_info.num_toflush, append);
1191 }
1192 
1193 /*
1194  * logpb_dump_to_flush_page -
1195  *
1196  * return: nothing
1197  *
1198  * NOTE:
1199  */
1200 static void
1202 {
1203  int i;
1204  LOG_BUFFER *log_bufptr;
1205  LOG_FLUSH_INFO *flush_info = &log_Gl.flush_info;
1206 
1207  (void) fprintf (out_fp, " Candidate append pages to flush are:\n");
1208 
1209  for (i = 0; i < flush_info->num_toflush; i++)
1210  {
1211  log_bufptr = logpb_get_log_buffer (flush_info->toflush[i]);
1212  if (i != 0)
1213  {
1214  if ((i % 10) == 0)
1215  {
1216  fprintf (out_fp, ",\n");
1217  }
1218  else
1219  {
1220  fprintf (out_fp, ",");
1221  }
1222  }
1223  fprintf (out_fp, " %4lld", (long long int) log_bufptr->pageid);
1224  }
1225 
1226  fprintf (out_fp, "\n");
1227 }
1228 
1229 /*
1230  * logpb_dump_pages -
1231  *
1232  * return: nothing
1233  *
1234  * NOTE:
1235  */
1236 static void
1237 logpb_dump_pages (FILE * out_fp)
1238 {
1239  int i;
1240  LOG_BUFFER *log_bufptr;
1241 
1242  for (i = 0; i < log_Pb.num_buffers; i++)
1243  {
1244  log_bufptr = LOGPB_FIND_BUFPTR (i);
1245  if (log_bufptr->pageid == NULL_PAGEID)
1246  {
1247  /* *** ** (void)fprintf(stdout, "%3d ..\n", i); */
1248  continue;
1249  }
1250  else
1251  {
1252  fprintf (out_fp, "%3d %10lld %10d %3d %p %p-%p %4s %5lld %5d\n",
1253  i, (long long) log_bufptr->pageid, log_bufptr->phy_pageid, log_bufptr->dirty,
1254  (void *) log_bufptr, (void *) (log_bufptr->logpage),
1255  (void *) (&log_bufptr->logpage->area[LOGAREA_SIZE - 1]), "",
1256  (long long) log_bufptr->logpage->hdr.logical_pageid, log_bufptr->logpage->hdr.offset);
1257  }
1258  }
1259  fprintf (out_fp, "\n");
1260 }
1261 
1262 /*
1263  * logpb_initialize_backup_info - initialized backup information
1264  *
1265  * return: nothing
1266  *
1267  * NOTE:
1268  */
1269 static void
1271 {
1272  int i;
1273 
1274  for (i = 0; i < FILEIO_BACKUP_UNDEFINED_LEVEL; i++)
1275  {
1276  log_hdr->bkinfo[i].ndirty_pages_post_bkup = 0;
1277  log_hdr->bkinfo[i].io_baseln_time = 0;
1278  log_hdr->bkinfo[i].io_numpages = 0;
1279  log_hdr->bkinfo[i].io_bkuptime = 0;
1280  }
1281 }
1282 
1283 /*
1284  * logpb_initialize_header - Initialize log header structure
1285  *
1286  * return: nothing
1287  *
1288  * loghdr(in/out): Log header structure
1289  * prefix_logname(in): Name of the log volumes. It is usually set the same as
1290  * database name. For example, if the value is equal to
1291  * "db", the names of the log volumes created are as
1292  * follow:
1293  * Active_log = db_logactive
1294  * Archive_logs = db_logarchive.0
1295  * db_logarchive.1
1296  * .
1297  * .
1298  * .
1299  * db_logarchive.n
1300  * Log_information = db_loginfo
1301  * Database Backup = db_backup
1302  * npages(in): Size of active log in pages
1303  * db_creation(in): Database creation time.
1304  *
1305  * NOTE:Initialize a log header structure.
1306  */
1307 int
1308 logpb_initialize_header (THREAD_ENTRY * thread_p, LOG_HEADER * loghdr, const char *prefix_logname,
1309  DKNPAGES npages, INT64 * db_creation)
1310 {
1311  int i;
1312 
1313  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
1314  assert (loghdr != NULL);
1315 
1316  /* to also initialize padding bytes */
1317  memset (loghdr, 0, sizeof (LOG_HEADER));
1318 
1320 
1321  if (db_creation != NULL)
1322  {
1323  loghdr->db_creation = *db_creation;
1324  }
1325  else
1326  {
1327  loghdr->db_creation = -1;
1328  }
1329 
1331  {
1334  logpb_fatal_error (thread_p, true, ARG_FILE_LINE, "log_init_logheader");
1336  }
1337 
1340  loghdr->db_iopagesize = IO_PAGESIZE;
1341  loghdr->db_logpagesize = LOG_PAGESIZE;
1342  loghdr->is_shutdown = true;
1343  loghdr->next_trid = LOG_SYSTEM_TRANID + 1;
1344  loghdr->mvcc_next_id = MVCCID_FIRST;
1347  loghdr->npages = npages - 1; /* Hdr pg is stolen */
1348  loghdr->db_charset = lang_charset ();
1349  loghdr->fpageid = 0;
1350  loghdr->append_lsa.pageid = loghdr->fpageid;
1351  loghdr->append_lsa.offset = 0;
1352  LSA_COPY (&loghdr->chkpt_lsa, &loghdr->append_lsa);
1353  loghdr->nxarv_pageid = loghdr->fpageid;
1354  loghdr->nxarv_phy_pageid = 1;
1355  loghdr->nxarv_num = 0;
1356  loghdr->last_arv_num_for_syscrashes = -1;
1357  loghdr->last_deleted_arv_num = -1;
1358  loghdr->has_logging_been_skipped = false;
1359  LSA_SET_NULL (&loghdr->bkup_level0_lsa);
1360  LSA_SET_NULL (&loghdr->bkup_level1_lsa);
1361  LSA_SET_NULL (&loghdr->bkup_level2_lsa);
1362  if (prefix_logname != NULL)
1363  {
1364  strcpy (loghdr->prefix_name, prefix_logname);
1365  }
1366  else
1367  {
1368  loghdr->prefix_name[0] = '\0';
1369  }
1370  loghdr->vacuum_last_blockid = 0;
1371  loghdr->perm_status_obsolete = 0;
1372 
1373  for (i = 0; i < FILEIO_BACKUP_UNDEFINED_LEVEL; i++)
1374  {
1375  loghdr->bkinfo[i].bkup_attime = 0;
1376  }
1378 
1380  loghdr->ha_file_status = -1;
1381  LSA_SET_NULL (&loghdr->eof_lsa);
1383 
1384  logpb_vacuum_reset_log_header_cache (thread_p, loghdr);
1385 
1386  return NO_ERROR;
1387 }
1388 
1389 /*
1390  * logpb_create_header_page - Create log header page
1391  *
1392  * return: Pointer to the page or NULL
1393  *
1394  * NOTE:Create the log header page.
1395  */
1396 LOG_PAGE *
1398 {
1399  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
1400 
1401  return logpb_create_page (thread_p, LOGPB_HEADER_PAGE_ID);
1402 }
1403 
1404 /*
1405  * logpb_copy_log_header - Copy a log header
1406  *
1407  * return: NO_ERROR if all OK
1408  *
1409  * to_hdr(in): New log header
1410  * from_hdr(in): Source log header
1411  *
1412  * NOTE: Copy a log header.
1413  */
1414 static int
1415 logpb_copy_log_header (THREAD_ENTRY * thread_p, LOG_HEADER * to_hdr, const LOG_HEADER * from_hdr)
1416 {
1417  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
1418  assert (to_hdr != NULL);
1419  assert (from_hdr != NULL);
1420 
1421  to_hdr->was_copied = true; // should be reset on first restart
1422 
1423  to_hdr->mvcc_next_id = from_hdr->mvcc_next_id;
1424 
1425  /* Add other attributes that need to be copied */
1426 
1427  return NO_ERROR;
1428 }
1429 
1430 /*
1431  * logpb_fetch_header - Fetch log header
1432  *
1433  * return: nothing
1434  *
1435  * hdr(in/out): Pointer where log header is stored
1436  *
1437  * NOTE:Read the log header into the area pointed by hdr.
1438  */
1439 void
1441 {
1442  assert (hdr != NULL);
1443  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
1445 
1447 
1448  /* sync append_lsa to prior_lsa */
1450 }
1451 
1452 /*
1453  * logpb_fetch_header_with_buffer - Fetch log header using given buffer
1454  *
1455  * return: nothing
1456  *
1457  * hdr(in/out): Pointer where log header is stored
1458  * log_pgptr(in/out): log page buffer ptr
1459  *
1460  * NOTE:Read the log header into the area pointed by hdr
1461  */
1462 void
1464 {
1465  LOG_HEADER *log_hdr; /* The log header */
1466  LOG_LSA header_lsa;
1467 
1468  assert (hdr != NULL);
1469  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
1470  assert (log_pgptr != NULL);
1471 
1472  header_lsa.pageid = LOGPB_HEADER_PAGE_ID;
1473  header_lsa.offset = LOG_PAGESIZE;
1474 
1475  if ((logpb_fetch_page (thread_p, &header_lsa, LOG_CS_SAFE_READER, log_pgptr)) != NO_ERROR)
1476  {
1477  logpb_fatal_error (thread_p, true, ARG_FILE_LINE, "log_fetch_hdr_with_buf");
1478  /* This statement should not be reached */
1479  (void) logpb_initialize_header (thread_p, hdr, NULL, 0, NULL);
1480  return;
1481  }
1482 
1483  log_hdr = (LOG_HEADER *) (log_pgptr->area);
1484  *hdr = *log_hdr;
1485 
1487  assert (log_pgptr->hdr.offset == NULL_OFFSET);
1488 }
1489 
1490 /*
1491  * logpb_fetch_header_from_active_log - Fetch log header directly from active log file
1492  *
1493  * return: error code
1494  *
1495  * hdr(in/out): Pointer where log header is stored
1496  * log_pgptr(in/out): log page buffer ptr
1497  *
1498  * NOTE: Should be used only during boot sequence.
1499  */
1500 static int
1501 logpb_fetch_header_from_active_log (THREAD_ENTRY * thread_p, const char *db_fullname, const char *logpath,
1502  const char *prefix_logname, LOG_HEADER * hdr, LOG_PAGE * log_pgptr)
1503 {
1504  LOG_HEADER *log_hdr; /* The log header */
1506  int error_code = NO_ERROR;
1507 
1508  assert (db_fullname != NULL);
1509  assert (logpath != NULL);
1510  assert (prefix_logname != NULL);
1511  assert (hdr != NULL);
1512  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
1513  assert (log_pgptr != NULL);
1514 
1515  error_code = logpb_initialize_log_names (thread_p, db_fullname, logpath, prefix_logname);
1516  if (error_code != NO_ERROR)
1517  {
1518  goto error;
1519  }
1520 
1521  if (fileio_is_volume_exist (log_Name_active) == false)
1522  {
1523  error_code = ER_FAILED;
1524  goto error;
1525  }
1526 
1527  log_Gl.append.vdes = fileio_mount (thread_p, db_fullname, log_Name_active, LOG_DBLOG_ACTIVE_VOLID, true, false);
1528  if (log_Gl.append.vdes == NULL_VOLDES)
1529  {
1530  error_code = ER_FAILED;
1531  goto error;
1532  }
1533 
1535  logpb_log ("reading from active log:%s, physical page is : %lld\n", log_Name_active, (long long int) phy_pageid);
1536 
1537  if (fileio_read (thread_p, log_Gl.append.vdes, log_pgptr, phy_pageid, LOG_PAGESIZE) == NULL)
1538  {
1540  log_Name_active);
1541  error_code = ER_LOG_READ;
1542  goto error;
1543  }
1544 
1545  log_hdr = (LOG_HEADER *) (log_pgptr->area);
1546  *hdr = *log_hdr;
1547 
1548  /* keep active log mounted : this prevents other process to access/change DB parameters */
1549 
1550  if (log_pgptr->hdr.logical_pageid != LOGPB_HEADER_PAGE_ID || log_pgptr->hdr.offset != NULL_OFFSET)
1551  {
1553  error_code = ER_LOG_PAGE_CORRUPTED;
1554  goto error;
1555  }
1556 
1557 #if !defined(NDEBUG)
1559  {
1560  logpb_debug_check_log_page (thread_p, log_pgptr);
1561  }
1562 #endif
1563 
1564 error:
1565  return error_code;
1566 }
1567 
1568 // it peeks header page of the backuped log active file
1569 static int
1570 logpb_peek_header_of_active_log_from_backup (THREAD_ENTRY * thread_p, const char *active_log_path, LOG_HEADER * hdr)
1571 {
1572  char log_pgbuf[IO_MAX_PAGE_SIZE + MAX_ALIGNMENT], *aligned_log_pgbuf;
1573  LOG_HEADER *log_hdr;
1575  int error_code = NO_ERROR;
1576  LOG_PAGE *log_pgptr;
1577 
1578  assert (active_log_path != NULL);
1579  assert (hdr != NULL);
1580 
1581  aligned_log_pgbuf = PTR_ALIGN (log_pgbuf, MAX_ALIGNMENT);
1582 
1583  if (fileio_is_volume_exist (active_log_path) == false)
1584  {
1585  return ER_FAILED;
1586  }
1587 
1588  int log_vdes = fileio_open (active_log_path, O_RDONLY, 0);
1589  if (log_vdes == NULL_VOLDES)
1590  {
1591  return ER_FAILED;
1592  }
1593 
1594  phy_pageid = LOGPB_PHYSICAL_HEADER_PAGE_ID;
1595  logpb_log ("reading from active log:%s, physical page is : %lld\n", active_log_path, (long long int) phy_pageid);
1596 
1597  log_pgptr = (LOG_PAGE *) aligned_log_pgbuf;
1598 
1599  if (fileio_read (thread_p, log_vdes, log_pgptr, phy_pageid, LOG_PAGESIZE) == NULL)
1600  {
1601  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_LOG_READ, 3, LOGPB_HEADER_PAGE_ID, phy_pageid, active_log_path);
1602  error_code = ER_LOG_READ;
1603  goto end;
1604  }
1605 
1606  log_hdr = (LOG_HEADER *) (log_pgptr->area);
1607  *hdr = *log_hdr;
1608 
1609  if (log_pgptr->hdr.logical_pageid != LOGPB_HEADER_PAGE_ID || log_pgptr->hdr.offset != NULL_OFFSET)
1610  {
1612  error_code = ER_LOG_PAGE_CORRUPTED;
1613  goto end;
1614  }
1615 
1616 end:
1617  fileio_close (log_vdes);
1618 
1619  return error_code;
1620 }
1621 
1622 // it probes whether the next archive file exists
1623 // if the case, the backuped log active file is regarded as unuseful and restore will reset log.
1624 static bool
1625 logpb_is_log_active_from_backup_useful (THREAD_ENTRY * thread_p, const char *active_log_path, const char *db_full_name)
1626 {
1627  LOG_HEADER hdr;
1628 
1629  if (logpb_peek_header_of_active_log_from_backup (thread_p, active_log_path, &hdr) != NO_ERROR)
1630  {
1631  // something bad happened
1632  return false;
1633  }
1634 
1635  // make next archive name
1636  char next_archive_file_path[PATH_MAX], log_path[PATH_MAX];
1637 
1638  fileio_get_directory_path (log_path, active_log_path);
1639  fileio_make_log_archive_name (next_archive_file_path, log_path, fileio_get_base_file_name (db_full_name),
1640  hdr.nxarv_num);
1641 
1642  if (fileio_is_volume_exist (next_archive_file_path))
1643  {
1644  // if the next archive exists, regard the backuped log active is older and useless.
1645  er_log_debug (ARG_FILE_LINE, "log active from backup is older than available archive (%s).\n",
1646  next_archive_file_path);
1647  return false;
1648  }
1649 
1650  return true;
1651 }
1652 
1653 /*
1654  * logpb_flush_header - Flush log header
1655  *
1656  * return: nothing
1657  *
1658  * NOTE:Flush out the log header from the global variable log_Gl.hdr to disk. Note append pages are not flushed.
1659  */
1660 void
1662 {
1663  LOG_HEADER *log_hdr;
1664 #if defined(CUBRID_DEBUG)
1665  struct timeval start_time = { 0, 0 };
1666  struct timeval end_time = { 0, 0 };
1667 
1668  css_gettimeofday (&start_time, NULL);
1669 #endif /* CUBRID_DEBUG */
1670 
1671  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
1672 
1673  if (log_Gl.loghdr_pgptr == NULL)
1674  {
1676 
1677  /* This is just a safe guard. log_initialize frees log_Gl.loghdr_pgptr when it fails. It can only happen when
1678  * deletedb or emergency utilities fail to initialize log. */
1679  log_Gl.loghdr_pgptr = (LOG_PAGE *) malloc (LOG_PAGESIZE);
1680  if (log_Gl.loghdr_pgptr == NULL)
1681  {
1683  logpb_fatal_error (thread_p, true, ARG_FILE_LINE, "logpb_flush_header");
1684  return;
1685  }
1687  }
1688 
1689  log_hdr = (LOG_HEADER *) (log_Gl.loghdr_pgptr->area);
1690  *log_hdr = log_Gl.hdr;
1691 
1694  log_Gl.loghdr_pgptr->hdr.flags = 0; /* Now flags in header page has always 0 value */
1695 
1697 
1698  log_Stat.flush_hdr_call_count++;
1699 #if defined(CUBRID_DEBUG)
1700  gettimeofday (&end_time, NULL);
1701  log_Stat.last_flush_hdr_sec_by_LFT = LOG_GET_ELAPSED_TIME (end_time, start_time);
1703 #endif /* CUBRID_DEBUG */
1704 
1705 #if defined(CUBRID_DEBUG)
1706  er_log_debug (ARG_FILE_LINE, "log_flush_hdr: call count(%ld) avg flush (%f) \n", log_Stat.flush_hdr_call_count,
1707  (double) log_Stat.total_flush_hdr_sec_by_LFT / log_Stat.flush_hdr_call_count);
1708 #endif /* CUBRID_DEBUG */
1709 }
1710 
1711 /*
1712  * logpb_fetch_page - Fetch a exist_log page using local buffer
1713  *
1714  * return: NO_ERROR if everything is ok, else ER_FAILED
1715  *
1716  * pageid(in): Page identifier
1717  * log_pgptr(in/out): Page buffer to copy
1718  *
1719  * NOTE:Fetch the log page identified by pageid into a log buffer and return such buffer.
1720  * If there is the page in the log page buffer, copy it to buffer and return it.
1721  * If not, read log page from log.
1722  */
1723 int
1724 logpb_fetch_page (THREAD_ENTRY * thread_p, const LOG_LSA * req_lsa, LOG_CS_ACCESS_MODE access_mode,
1725  LOG_PAGE * log_pgptr)
1726 {
1727  LOG_LSA append_lsa, append_prev_lsa;
1728  int rv;
1729 
1730  assert (log_pgptr != NULL);
1731  assert (req_lsa != NULL);
1732  assert (req_lsa->pageid != NULL_PAGEID);
1733 
1734  logpb_log ("called logpb_fetch_page with pageid = %lld\n", (long long int) req_lsa->pageid);
1735 
1736  LSA_COPY (&append_lsa, &log_Gl.hdr.append_lsa);
1737  LSA_COPY (&append_prev_lsa, &log_Gl.append.prev_lsa);
1738 
1739  /*
1740  * This If block ensure belows,
1741  * case 1. log page (of pageid) is in log page buffer (not prior_lsa list)
1742  * case 2. EOL record which is written temporarily by
1743  * logpb_flush_all_append_pages is cleared so there is no EOL
1744  * in log page
1745  */
1746 
1747  if (LSA_LE (&append_lsa, req_lsa) /* for case 1 */
1748  || LSA_LE (&append_prev_lsa, req_lsa)) /* for case 2 */
1749  {
1750  LOG_CS_ENTER (thread_p);
1751 
1753 
1754  /*
1755  * copy prior lsa list to log page buffer to ensure that required
1756  * pageid is in log page buffer
1757  */
1758  if (LSA_LE (&log_Gl.hdr.append_lsa, req_lsa)) /* retry with mutex */
1759  {
1761  }
1762 
1763  LOG_CS_EXIT (thread_p);
1764  }
1765 
1766  /*
1767  * most of the cases, we don't need calling logpb_copy_page with LOG_CS exclusive access,
1768  * if needed, we acquire READ mode in logpb_copy_page
1769  */
1770  rv = logpb_copy_page (thread_p, req_lsa->pageid, access_mode, log_pgptr);
1771  if (rv != NO_ERROR)
1772  {
1773  ASSERT_ERROR ();
1774  return ER_FAILED;
1775  }
1776 
1777  return NO_ERROR;
1778 }
1779 
1780 /*
1781  * logpb_copy_page_from_log_buffer -
1782  *
1783  * return: NO_ERROR if everything is ok
1784  * pageid(in): Page identifier
1785  * log_pgptr(in/out): Page buffer
1786  */
1787 int
1789 {
1790  int rv;
1791 
1792  assert (log_pgptr != NULL);
1793  assert (pageid != NULL_PAGEID);
1794  assert (pageid <= log_Gl.hdr.append_lsa.pageid);
1795 
1796  logpb_log ("called logpb_copy_page_from_log_buffer with pageid = %lld\n", (long long int) pageid);
1797 
1798  rv = logpb_copy_page (thread_p, pageid, LOG_CS_FORCE_USE, log_pgptr);
1799  if (rv != NO_ERROR)
1800  {
1801  ASSERT_ERROR ();
1802  return ER_FAILED;
1803  }
1804 
1805  return NO_ERROR;
1806 }
1807 
1808 /*
1809  * logpb_copy_page_from_file -
1810  * pageid(in): Page identifier
1811  * log_pgptr(in/out): Page buffer
1812  * return: NO_ERROR if everything is ok
1813  *
1814  */
1815 int
1817 {
1818  int rv;
1819 
1820  assert (log_pgptr != NULL);
1821  assert (pageid != NULL_PAGEID);
1822  assert (pageid <= log_Gl.hdr.append_lsa.pageid);
1823 
1824  logpb_log ("called logpb_copy_page_from_file with pageid = %lld\n", (long long int) pageid);
1825 
1826  LOG_CS_ENTER_READ_MODE (thread_p);
1827  if (log_pgptr != NULL)
1828  {
1829  rv = logpb_read_page_from_file (thread_p, pageid, LOG_CS_FORCE_USE, log_pgptr);
1830  if (rv != NO_ERROR)
1831  {
1832  LOG_CS_EXIT (thread_p);
1833  return ER_FAILED;
1834  }
1835  }
1836  LOG_CS_EXIT (thread_p);
1837 
1838  return NO_ERROR;
1839 }
1840 
1841 /*
1842  * logpb_copy_page - copy a exist_log page using local buffer
1843  *
1844  * return: NO_ERROR if everything is ok
1845  *
1846  * pageid(in): Page identifier
1847  * access_mode(in): access mode (reader, safe reader, writer)
1848  * log_pgptr(in/out): Page buffer to copy
1849  *
1850  * NOTE:Fetch the log page identified by pageid into a log buffer and return such buffer.
1851  * If there is the page in the log page buffer, copy it to buffer and return it.
1852  * If not, read log page from log.
1853  */
1854 
1855 static int
1857 {
1858  LOG_BUFFER *log_bufptr = NULL;
1859  bool is_perf_tracking;
1860  TSC_TICKS start_tick, end_tick;
1861  TSCTIMEVAL tv_diff;
1862  UINT64 fix_wait_time;
1863  PERF_PAGE_MODE stat_page_found = PERF_PAGE_MODE_OLD_IN_BUFFER;
1864  bool log_csect_entered = false;
1865  int rv = NO_ERROR, index;
1866 
1867  assert (log_pgptr != NULL);
1868  assert (pageid != NULL_PAGEID);
1869 
1870  logpb_log ("called logpb_copy_page with pageid = %lld\n", (long long int) pageid);
1871 
1872  is_perf_tracking = perfmon_is_perf_tracking ();
1873  if (is_perf_tracking)
1874  {
1875  tsc_getticks (&start_tick);
1876  }
1877 
1878  if (access_mode != LOG_CS_SAFE_READER)
1879  {
1880  LOG_CS_ENTER_READ_MODE (thread_p);
1881  log_csect_entered = true;
1882  }
1883 
1884  if (pageid == LOGPB_HEADER_PAGE_ID)
1885  {
1886  /* copy header page into log_pgptr */
1887  log_bufptr = &log_Pb.header_buffer;
1888 
1889  if (log_bufptr->pageid == NULL_PAGEID)
1890  {
1891  rv = logpb_read_page_from_file (thread_p, pageid, access_mode, log_pgptr);
1892  if (rv != NO_ERROR)
1893  {
1894  rv = ER_FAILED;
1895  goto exit;
1896  }
1897  stat_page_found = PERF_PAGE_MODE_OLD_LOCK_WAIT;
1898  }
1899  else
1900  {
1901  memcpy (log_pgptr, log_bufptr->logpage, LOG_PAGESIZE);
1902  }
1903 
1904  goto exit;
1905  }
1906 
1907  index = logpb_get_log_buffer_index ((int) pageid);
1908  if (index >= 0 && index < log_Pb.num_buffers)
1909  {
1910  log_bufptr = &log_Pb.buffers[index];
1911  }
1912  else
1913  {
1915  return ER_LOG_PAGE_CORRUPTED;
1916  }
1917 
1918  if (log_bufptr->pageid == pageid)
1919  {
1920  /* Copy page from log_bufptr into log_pgptr */
1921  memcpy (log_pgptr, log_bufptr->logpage, LOG_PAGESIZE);
1922  if (log_bufptr->pageid == pageid)
1923  {
1924  goto exit;
1925  }
1926  }
1927 
1928  /* Could not get from log page buffer cache */
1929  rv = logpb_read_page_from_file (thread_p, pageid, access_mode, log_pgptr);
1930  if (rv != NO_ERROR)
1931  {
1932  rv = ER_FAILED;
1933  goto exit;
1934  }
1935  stat_page_found = PERF_PAGE_MODE_OLD_LOCK_WAIT;
1936 
1937  /* Always exit through here */
1938 exit:
1939  if (log_csect_entered)
1940  {
1941  LOG_CS_EXIT (thread_p);
1942  }
1944 
1945  if (is_perf_tracking)
1946  {
1947  tsc_getticks (&end_tick);
1948  tsc_elapsed_time_usec (&tv_diff, end_tick, start_tick);
1949  fix_wait_time = tv_diff.tv_sec * 1000000LL + tv_diff.tv_usec;
1950  /* log page fix time : use dummy values for latch type and conditional type; use PERF_PAGE_MODE_OLD_LOCK_WAIT and
1951  * PERF_PAGE_MODE_OLD_IN_BUFFER for page type : page is not found in log page buffer and must be read from
1952  * archive vs page is found in log page buffer */
1953  if (fix_wait_time > 0)
1954  {
1955  perfmon_pbx_fix_acquire_time (thread_p, PAGE_LOG, stat_page_found, PERF_HOLDER_LATCH_READ,
1956  PERF_UNCONDITIONAL_FIX_WITH_WAIT, fix_wait_time);
1957  }
1958  }
1959 
1960  return rv;
1961 }
1962 
1963 /*
1964  * logpb_read_page_from_file - Fetch a exist_log page from log files
1965  *
1966  * return: NO_ERROR if everything is ok, else ER_GENERIC_ERROR
1967  *
1968  * pageid(in): Page identifier
1969  * log_pgptr(in/out): Page buffer to read
1970  *
1971  * NOTE:read the log page identified by pageid into a buffer from from archive or active log.
1972  */
1973 int
1975  LOG_PAGE * log_pgptr)
1976 {
1977  bool log_csect_entered = false;
1978 
1979  assert (log_pgptr != NULL);
1980  assert (pageid != NULL_PAGEID);
1981 
1982  logpb_log ("called logpb_read_page_from_file with pageid = %lld, hdr.logical_pageid = %lld, "
1983  "LOGPB_ACTIVE_NPAGES = %d\n", (long long int) pageid, (long long int) log_pgptr->hdr.logical_pageid,
1985 
1986  if (access_mode == LOG_CS_SAFE_READER)
1987  {
1988  /* This is added here to block others from creating new archive or mounting/dismounting archives while the vacuum
1989  * workers is trying to fetch its page from file. */
1990  /* This was first done only for active pages. This allowed other transaction create a new archive from active log
1991  * after calling logpb_is_page_in_archive. Therefore the logpb_to_physical_pageid became flawed. */
1992  LOG_CS_ENTER_READ_MODE (thread_p);
1993  log_csect_entered = true;
1994  }
1995  else
1996  {
1997  assert (LOG_CS_OWN (thread_p));
1998  }
1999 
2000  // some archived pages may be still in active log; check if they can be fetched from active.
2001  bool fetch_from_archive = logpb_is_page_in_archive (pageid);
2002  if (fetch_from_archive)
2003  {
2004  bool is_archive_page_in_active_log = (pageid + LOGPB_ACTIVE_NPAGES) > log_Gl.hdr.append_lsa.pageid;
2005  bool dont_fetch_archive_from_active = !LOG_ISRESTARTED () || log_Gl.hdr.was_active_log_reset;
2006 
2007  if (is_archive_page_in_active_log && !dont_fetch_archive_from_active)
2008  {
2009  // can fetch from active
2010  fetch_from_archive = false;
2011  }
2012  }
2013  if (fetch_from_archive)
2014  {
2015  // fetch from archive
2016  if (logpb_fetch_from_archive (thread_p, pageid, log_pgptr, NULL, NULL, true) == NULL)
2017  {
2018 #if defined (SERVER_MODE)
2019  if (thread_p != NULL && thread_p->type == TT_VACUUM_MASTER)
2020  {
2022  "Failed to fetch page %lld from archives.", pageid);
2023  }
2024 #endif /* SERVER_MODE */
2025  goto error;
2026  }
2027  }
2028  else
2029  {
2030  // fetch from active
2032 
2033  /*
2034  * Page is contained in the active log.
2035  * Find the corresponding physical page and read the page form disk.
2036  */
2037  phy_pageid = logpb_to_physical_pageid (pageid);
2038  logpb_log ("phy_pageid in logpb_read_page_from_file is %lld\n", (long long int) phy_pageid);
2039 
2041 
2042  if (fileio_read (thread_p, log_Gl.append.vdes, log_pgptr, phy_pageid, LOG_PAGESIZE) == NULL)
2043  {
2045  goto error;
2046  }
2047  else
2048  {
2049  if (log_pgptr->hdr.logical_pageid != pageid)
2050  {
2051  if (log_pgptr->hdr.logical_pageid == pageid + LOGPB_ACTIVE_NPAGES)
2052  {
2053  /* The active part where this archive page belonged was already, overwritten. Fetch the page from
2054  * archive. */
2055  if (logpb_fetch_from_archive (thread_p, pageid, log_pgptr, NULL, NULL, true) == NULL)
2056  {
2057 #if defined (SERVER_MODE)
2058  if (thread_p != NULL && thread_p->type == TT_VACUUM_MASTER)
2059  {
2061  "Failed to fetch page %lld from archives.", pageid);
2062  }
2063 #endif /* SERVER_MODE */
2064  goto error;
2065  }
2066  }
2067  else
2068  {
2069  /* Clean the buffer... since it may be corrupted */
2071  goto error;
2072  }
2073  }
2074  else
2075  {
2076  /*
2077  * fetched from active.
2078  * In case of being fetched from archive log, no need to be decrypted
2079  * because it already decrypted in logpb_fetch_from_archive()
2080  */
2081  TDE_ALGORITHM tde_algo = logpb_get_tde_algorithm ((LOG_PAGE *) log_pgptr);
2082  if (tde_algo != TDE_ALGORITHM_NONE)
2083  {
2084  if (tde_decrypt_log_page ((LOG_PAGE *) log_pgptr, tde_algo, (LOG_PAGE *) log_pgptr) != NO_ERROR)
2085  {
2086  ASSERT_ERROR ();
2087  goto error;
2088  }
2089  }
2090  }
2091  }
2092  }
2093 
2094  if (log_csect_entered)
2095  {
2096  LOG_CS_EXIT (thread_p);
2097  }
2098 
2099  logpb_log ("logpb_read_page_from_file: log page %lld has checksum = %d\n",
2100  (long long int) log_pgptr->hdr.logical_pageid, log_pgptr->hdr.checksum);
2101 
2102 #if !defined(NDEBUG)
2104  {
2105  logpb_debug_check_log_page (thread_p, log_pgptr);
2106  }
2107 #endif
2108 
2109  /* keep old function's usage */
2110  return NO_ERROR;
2111 
2112 error:
2113  if (log_csect_entered)
2114  {
2115  LOG_CS_EXIT (thread_p);
2116  }
2117  return ER_FAILED;
2118 }
2119 
2120 /*
2121  * logpb_read_page_from_active_log -
2122  *
2123  * return: new num_pages
2124  *
2125  * pageid(in):
2126  * num_pages(in):
2127  * log_pgptr(in/out):
2128  */
2129 int
2130 logpb_read_page_from_active_log (THREAD_ENTRY * thread_p, LOG_PAGEID pageid, int num_pages, bool decrypt_needed,
2131  LOG_PAGE * log_pgptr)
2132 {
2133  LOG_PHY_PAGEID phy_start_pageid;
2134 
2135  assert (log_pgptr != NULL);
2136  assert (pageid != NULL_PAGEID);
2137  assert (num_pages > 0);
2138 
2139  logpb_log ("called logpb_read_page_from_active_log with pageid = %lld and num_pages = %d\n", (long long int) pageid,
2140  num_pages);
2141 
2142  /*
2143  * Page is contained in the active log.
2144  * Find the corresponding physical page and read the page from disk.
2145  */
2146  phy_start_pageid = logpb_to_physical_pageid (pageid);
2147  num_pages = MIN (num_pages, LOGPB_ACTIVE_NPAGES - phy_start_pageid + 1);
2148 
2150 
2151  if (fileio_read_pages (thread_p, log_Gl.append.vdes, (char *) log_pgptr, phy_start_pageid, num_pages, LOG_PAGESIZE) ==
2152  NULL)
2153  {
2154  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_LOG_READ, 3, pageid, phy_start_pageid, log_Name_active);
2155  return -1;
2156  }
2157  else
2158  {
2159  if (log_pgptr->hdr.logical_pageid != pageid)
2160  {
2162  return -1;
2163  }
2164  }
2165 
2166  if (decrypt_needed)
2167  {
2168  char *ptr = (char *) log_pgptr;
2169  int i;
2170  for (i = 0; i < num_pages; i++)
2171  {
2172  TDE_ALGORITHM tde_algo = logpb_get_tde_algorithm ((LOG_PAGE *) ptr);
2173  if (tde_algo != TDE_ALGORITHM_NONE)
2174  {
2175  if (tde_decrypt_log_page ((LOG_PAGE *) ptr, tde_algo, (LOG_PAGE *) ptr) != NO_ERROR)
2176  {
2177  ASSERT_ERROR ();
2178  return -1;
2179  }
2180  }
2181  ptr += LOG_PAGESIZE;
2182  }
2183  }
2184 
2185 #if !defined(NDEBUG)
2187  {
2188  char *ptr = NULL;
2189  int i;
2190  char log_pgbuf[IO_MAX_PAGE_SIZE + MAX_ALIGNMENT];
2191  char *aligned_log_pgbuf = PTR_ALIGN (log_pgbuf, MAX_ALIGNMENT);
2192 
2193  ptr = (char *) log_pgptr;
2194  for (i = 0; i < num_pages; i++)
2195  {
2196  TDE_ALGORITHM tde_algo = logpb_get_tde_algorithm ((LOG_PAGE *) ptr);
2197  /* checksum is calculated before tde-encryption */
2198  if (!decrypt_needed && tde_algo != TDE_ALGORITHM_NONE)
2199  {
2200  /* This page is tde-ecnrypted page and has not yet decrypted.
2201  * To check consistency, we need to decrypt it */
2202  if (!tde_Cipher.is_loaded)
2203  {
2204  ptr += LOG_PAGESIZE;
2205  continue; /* no way to check an encrypted page without tde module */
2206  }
2207 
2208  if (tde_decrypt_log_page ((LOG_PAGE *) ptr, tde_algo, (LOG_PAGE *) aligned_log_pgbuf) != NO_ERROR)
2209  {
2210  ASSERT_ERROR ();
2211  assert (false);
2212  }
2213  logpb_debug_check_log_page (thread_p, (LOG_PAGE *) aligned_log_pgbuf);
2214  ptr += LOG_PAGESIZE;
2215  }
2216  else
2217  {
2218  logpb_debug_check_log_page (thread_p, (LOG_PAGE *) ptr);
2219  ptr += LOG_PAGESIZE;
2220  }
2221  }
2222  }
2223 #endif
2224 
2225  return num_pages;
2226 }
2227 
2228 /*
2229  * logpb_write_page_to_disk - writes and syncs a log page to disk
2230  *
2231  * return: error code
2232  *
2233  * log_pgptr(in/out): Log page pointer
2234  * logical_pageid(in): logical page id
2235  *
2236  * NOTE:writes and syncs a log page to disk
2237  */
2238 int
2239 logpb_write_page_to_disk (THREAD_ENTRY * thread_p, LOG_PAGE * log_pgptr, LOG_PAGEID logical_pageid)
2240 {
2241  int nbytes, error_code;
2243  FILEIO_WRITE_MODE write_mode;
2244  char enc_pgbuf[IO_MAX_PAGE_SIZE + MAX_ALIGNMENT];
2245  LOG_PAGE *enc_pgptr = NULL;
2246 
2247  enc_pgptr = (LOG_PAGE *) PTR_ALIGN (enc_pgbuf, MAX_ALIGNMENT);
2248 
2249  assert (log_pgptr != NULL);
2250  assert (log_pgptr->hdr.logical_pageid == logical_pageid);
2251  /* we allow writing page as long as they do not belong to archive area */
2252  assert (logical_pageid == LOGPB_HEADER_PAGE_ID
2253  || (!LOGPB_IS_ARCHIVE_PAGE (logical_pageid) && logical_pageid <= LOGPB_LAST_ACTIVE_PAGE_ID));
2254 
2255  logpb_log ("called logpb_write_page_to_disk for logical_pageid = %lld\n", (long long int) logical_pageid);
2256 
2257  /* Set page CRC before writing to disk. */
2258  error_code = logpb_set_page_checksum (thread_p, log_pgptr);
2259  if (error_code != NO_ERROR)
2260  {
2261  return error_code;
2262  }
2263 
2264  phy_pageid = logpb_to_physical_pageid (logical_pageid);
2265  logpb_log ("phy_pageid in logpb_write_page_to_disk is %lld\n", (long long int) phy_pageid);
2266 
2267  /* log_Gl.append.vdes is only changed while starting or finishing or recovering server. So, log cs is not needed. */
2268 
2270 
2271  logpb_log ("logpb_write_page_to_disk: The page (%lld) is being tde-encrypted: %d\n", (long long int) logical_pageid,
2272  LOG_IS_PAGE_TDE_ENCRYPTED (log_pgptr));
2273 
2274  if (LOG_IS_PAGE_TDE_ENCRYPTED (log_pgptr))
2275  {
2276  error_code = tde_encrypt_log_page (log_pgptr, logpb_get_tde_algorithm (log_pgptr), enc_pgptr);
2277  if (error_code != NO_ERROR)
2278  {
2279  /*
2280  * if encrpytion fails, it just skip it and off the tde flag. The page will never be encrypted in this case.
2281  * It menas once it fails, the page always spill user data un-encrypted from then.
2282  */
2283  logpb_set_tde_algorithm (thread_p, log_pgptr, TDE_ALGORITHM_NONE);
2285  }
2286  else
2287  {
2288  log_pgptr = enc_pgptr;
2289  }
2290  }
2291 
2292  if (fileio_write (thread_p, log_Gl.append.vdes, log_pgptr, phy_pageid, LOG_PAGESIZE, write_mode) == NULL)
2293  {
2295  {
2296  nbytes = log_Gl.hdr.db_logpagesize;
2297  er_set (ER_FATAL_ERROR_SEVERITY, ARG_FILE_LINE, ER_LOG_WRITE_OUT_OF_SPACE, 4, logical_pageid, phy_pageid,
2298  log_Name_active, nbytes);
2299  }
2300  else
2301  {
2302  er_set_with_oserror (ER_FATAL_ERROR_SEVERITY, ARG_FILE_LINE, ER_LOG_WRITE, 3, logical_pageid, phy_pageid,
2303  log_Name_active);
2304  }
2305 
2306  logpb_fatal_error (thread_p, true, ARG_FILE_LINE, "logpb_write_page_to_disk");
2307  return ER_FAILED;
2308  }
2309 
2311  return NO_ERROR;
2312 }
2313 
2314 /*
2315  * logpb_find_header_parameters - Find some database creation parameters
2316  *
2317  * return: iopagesize or -1
2318  *
2319  * db_fullname(in): Full name of the database
2320  * force_read_log_header(in): force to read log header
2321  * logpath(in): Directory where the log volumes reside
2322  * prefix_logname(in): Name of the log volumes. It is usually set as database
2323  * name. For example, if the value is equal to "db", the
2324  * names of the log volumes created are as follow:
2325  * Active_log = db_logactive
2326  * Archive_logs = db_logarchive.0
2327  * db_logarchive.1
2328  * .
2329  * .
2330  * .
2331  * db_logarchive.n
2332  * Log_information = db_loginfo
2333  * Database Backup = db_backup
2334  * db_iopagesize(in): Set as a side effect to iopagesize
2335  * db_creation(in): Set as a side effect to time of database creation
2336  * db_compatibility(in): Set as a side effect to database disk compatibility
2337  * db_charset(in): Set as a side effect to database charset
2338  *
2339  * NOTE:Find some database creation parameters such as pagesize, creation time, and disk compatability.
2340  */
2341 PGLENGTH
2342 logpb_find_header_parameters (THREAD_ENTRY * thread_p, const bool force_read_log_header, const char *db_fullname,
2343  const char *logpath, const char *prefix_logname, PGLENGTH * io_page_size,
2344  PGLENGTH * log_page_size, INT64 * creation_time, float *db_compatibility, int *db_charset)
2345 {
2346  static LOG_HEADER hdr; /* Log header */
2347  static bool is_header_read_from_file = false;
2348  static bool is_log_header_validated = false;
2349  char log_pgbuf[IO_MAX_PAGE_SIZE + MAX_ALIGNMENT], *aligned_log_pgbuf;
2350  LOG_PAGE *log_pgptr = NULL;
2351  int error_code = NO_ERROR;
2352 
2353  if (force_read_log_header)
2354  {
2355  is_header_read_from_file = false;
2356  is_log_header_validated = false;
2357  }
2358 
2359  aligned_log_pgbuf = PTR_ALIGN (log_pgbuf, MAX_ALIGNMENT);
2360 
2361  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
2362 
2363  /* Is the system restarted ? */
2365  {
2366  *io_page_size = log_Gl.hdr.db_iopagesize;
2367  *log_page_size = log_Gl.hdr.db_logpagesize;
2368  *creation_time = log_Gl.hdr.db_creation;
2369  *db_compatibility = log_Gl.hdr.db_compatibility;
2370 
2371  if (IO_PAGESIZE != *io_page_size || LOG_PAGESIZE != *log_page_size)
2372  {
2373  if (db_set_page_size (*io_page_size, *log_page_size) != NO_ERROR)
2374  {
2375  goto error;
2376  }
2377 
2378  logpb_finalize_pool (thread_p);
2380  if (error_code != NO_ERROR)
2381  {
2382  goto error;
2383  }
2384  error_code = logpb_initialize_pool (thread_p);
2385  if (error_code != NO_ERROR)
2386  {
2387  goto error;
2388  }
2389  if (logpb_fetch_start_append_page (thread_p) != NO_ERROR)
2390  {
2391  goto error;
2392  }
2393  }
2394  return *io_page_size;
2395  }
2396 
2397  if (!is_header_read_from_file)
2398  {
2399  log_pgptr = (LOG_PAGE *) aligned_log_pgbuf;
2400 
2401  error_code = logpb_fetch_header_from_active_log (thread_p, db_fullname, logpath, prefix_logname, &hdr, log_pgptr);
2402  if (error_code != NO_ERROR)
2403  {
2404  goto error;
2405  }
2406  is_header_read_from_file = true;
2407  }
2408 
2409  *io_page_size = hdr.db_iopagesize;
2410  *log_page_size = hdr.db_logpagesize;
2411  *creation_time = hdr.db_creation;
2412  *db_compatibility = hdr.db_compatibility;
2413  *db_charset = (int) hdr.db_charset;
2414 
2415  if (is_log_header_validated)
2416  {
2417  return *io_page_size;
2418  }
2419 
2420  /*
2421  * Make sure that the log is a log file and that it is compatible with the
2422  * running database and system
2423  */
2424 
2425  if (strcmp (hdr.prefix_name, prefix_logname) != 0)
2426  {
2427  /*
2428  * This does not look like the log or the log was renamed. Incompatible
2429  * prefix name with the prefix stored on disk
2430  */
2432  hdr.prefix_name);
2433  /* Continue anyhow.. */
2434  }
2435 
2436  /* only check for incompatibility here, this will be done again in log_xinit which will run the compatibility
2437  * functions if there are any. */
2438  /* We added disk compatibility rules to R2.2. Before that release, rel_get_disk_compatible function returned only
2439  * REL_FULLY_COMPATIBLE or REL_NOT_COMPATIBLE. However, it might return REL_BACKWARD_COMPATIBLE now. */
2440  if (rel_get_disk_compatible (*db_compatibility, NULL) != REL_FULLY_COMPATIBLE)
2441  {
2442  log_Gl.hdr.db_compatibility = *db_compatibility;
2444  error_code = ER_LOG_INCOMPATIBLE_DATABASE;
2445  goto error;
2446  }
2447 
2448  is_log_header_validated = true;
2449 
2450  return *io_page_size;
2451 
2452 error:
2453  *io_page_size = -1;
2454  *log_page_size = -1;
2455  *creation_time = 0;
2456  *db_compatibility = -1.0;
2457 
2458  return *io_page_size;
2459 }
2460 
2461 /*
2462  *
2463  * FUNCTIONS RELATED TO APPEND PAGES
2464  *
2465  */
2466 
2467 /*
2468  * logpb_fetch_start_append_page - FETCH THE START APPEND PAGE
2469  *
2470  * return: Pointer to the page or NULL
2471  *
2472  * NOTE:Fetch the start append page.
2473  */
2474 int
2476 {
2477  LOG_FLUSH_INFO *flush_info = &log_Gl.flush_info;
2479  bool need_flush;
2480 #if defined(SERVER_MODE)
2481  int rv;
2482 #endif /* SERVER_MODE */
2483 
2484  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
2485 
2486  logpb_log ("started logpb_fetch_start_append_page\n");
2487 
2488  /* detect empty log (page and offset of zero) */
2489  if ((log_Gl.hdr.append_lsa.pageid == 0) && (log_Gl.hdr.append_lsa.offset == 0))
2490  {
2491  flag = NEW_PAGE;
2492  }
2493 
2494  if (log_Gl.append.log_pgptr != NULL)
2495  {
2496  /*
2497  * Somehow we already have an append page, flush all current append page
2498  * and start form scratch
2499  */
2500  logpb_invalid_all_append_pages (thread_p);
2501  }
2502 
2503  /*
2504  * Fetch the start append page
2505  */
2506 
2508  if (log_Gl.append.log_pgptr == NULL)
2509  {
2510  return ER_FAILED;
2511  }
2512 
2514  /*
2515  * Save this log append page as an active page to be flushed at a later
2516  * time if the page is modified (dirty).
2517  * We must save the log append pages in the order that they are defined
2518  * and need to be flushed.
2519  */
2520 
2521  need_flush = false;
2522 
2523  rv = pthread_mutex_lock (&flush_info->flush_mutex);
2524 
2525  flush_info->toflush[flush_info->num_toflush] = log_Gl.append.log_pgptr;
2526  flush_info->num_toflush++;
2527 
2528  if (flush_info->num_toflush >= flush_info->max_toflush)
2529  {
2530  /*
2531  * Force the dirty pages including the current one at this moment
2532  */
2533  need_flush = true;
2534  }
2535 
2536  pthread_mutex_unlock (&flush_info->flush_mutex);
2537 
2538  if (need_flush)
2539  {
2540  logpb_flush_pages_direct (thread_p);
2541  }
2542 
2543  return NO_ERROR;
2544 }
2545 
2546 /*
2547  * logpb_fetch_start_append_page_new - FETCH THE NEW START APPEND PAGE
2548  *
2549  * return: Pointer to the page or NULL
2550  */
2551 LOG_PAGE *
2553 {
2554  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
2555 
2556  logpb_log ("started logpb_fetch_start_append_page_new\n");
2557 
2559  if (log_Gl.append.log_pgptr == NULL)
2560  {
2561  return NULL;
2562  }
2563 
2565 
2566  return log_Gl.append.log_pgptr;
2567 }
2568 
2569 /*
2570  * logpb_next_append_page - Fetch next append page
2571  *
2572  * return: nothing
2573  *
2574  * current_setdirty(in): Set the current append page dirty ?
2575  *
2576  * NOTE:Fetch the next append page.
2577  * If the current append page contains the beginning of the log
2578  * record being appended (i.e., log record did fit on current
2579  * append page), the freeing of this page is delayed until the
2580  * record is completely appended/logged. This is needed since
2581  * every log record has a forward pointer to next log record
2582  * (i.e., next append address). In addition, we must avoid
2583  * flushing this page to disk (e.g., page replacement),
2584  * otherwise, during crash recovery we could try to read a log
2585  * record that has never been finished and the end of the log may
2586  * not be detected. That is, the log would be corrupted.
2587  *
2588  * If the current append page does not contain the beginning of
2589  * the log record, the page can be freed and flushed at any time.
2590  *
2591  * If the next page to archive is located at the physical
2592  * location of the desired append page, a set of log pages is
2593  * archived, so we can continue the append operations.
2594  */
2595 static void
2596 logpb_next_append_page (THREAD_ENTRY * thread_p, LOG_SETDIRTY current_setdirty)
2597 {
2598  LOG_FLUSH_INFO *flush_info = &log_Gl.flush_info;
2599  bool need_flush;
2600 #if defined(SERVER_MODE)
2601  int rv;
2602 #endif /* SERVER_MODE */
2603 #if defined(CUBRID_DEBUG)
2604  long commit_count = 0;
2605  static struct timeval start_append_time = { 0, 0 };
2606  struct timeval end_append_time = { 0, 0 };
2607  static long prev_commit_count_in_append = 0;
2608  double elapsed = 0;
2609 
2610  gettimeofday (&end_append_time, NULL);
2611 #endif /* CUBRID_DEBUG */
2612 
2613  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
2614 
2615  logpb_log ("started logpb_next_append_page\n");
2616 
2617  if (current_setdirty == LOG_SET_DIRTY)
2618  {
2619  logpb_set_dirty (thread_p, log_Gl.append.log_pgptr);
2620  }
2621 
2623 
2626 
2627  /*
2628  * Is the next logical page to archive, currently located at the physical
2629  * location of the next logical append page ? (Remember the log is a RING).
2630  * If so, we need to archive the log from the next logical page to archive
2631  * up to the closest page that does not hold the current append log record.
2632  */
2633 
2635  {
2636  /* The log must be archived */
2637  logpb_archive_active_log (thread_p);
2638  }
2639 
2640  /*
2641  * Has the log been cycled ?
2642  */
2644  {
2646 
2647  /* Flush the header to save updates by archiving. */
2648  logpb_flush_header (thread_p);
2649  }
2650 
2651  /*
2652  * Fetch the next page as a newly defined append page. Append pages are
2653  * always new pages
2654  */
2655 
2657  if (log_Gl.append.log_pgptr == NULL)
2658  {
2659  logpb_fatal_error (thread_p, true, ARG_FILE_LINE, "log_next_append_page");
2660  /* This statement should not be reached */
2661  return;
2662  }
2663 
2665  {
2667  logpb_set_tde_algorithm (thread_p, log_Gl.append.log_pgptr, tde_algo);
2668  logpb_set_dirty (thread_p, log_Gl.append.log_pgptr);
2669  logpb_log ("logpb_next_append_page: set tde_algorithm to appending page (%lld), "
2670  "tde_algorithm = %s\n", (long long int) log_Gl.append.log_pgptr->hdr.logical_pageid,
2671  tde_get_algorithm_name (tde_algo));
2672  }
2673 
2674 #if defined(CUBRID_DEBUG)
2675  {
2677  }
2678 #endif /* CUBRID_DEBUG */
2679 
2680  /*
2681  * Save this log append page as an active page to be flushed at a later
2682  * time if the page is modified (dirty).
2683  * We must save the log append pages in the order that they are defined
2684  * and need to be flushed.
2685  */
2686 
2687  rv = pthread_mutex_lock (&flush_info->flush_mutex);
2688 
2689  flush_info->toflush[flush_info->num_toflush] = log_Gl.append.log_pgptr;
2690  flush_info->num_toflush++;
2691 
2692  need_flush = false;
2693  if (flush_info->num_toflush >= flush_info->max_toflush)
2694  {
2695  need_flush = true;
2696  }
2697 
2698  pthread_mutex_unlock (&flush_info->flush_mutex);
2699 
2700  if (need_flush)
2701  {
2702  logpb_flush_all_append_pages (thread_p);
2703  }
2704 
2705 #if defined(CUBRID_DEBUG)
2706  if (start_append_time.tv_sec != 0 && start_append_time.tv_usec != 0)
2707  {
2708  elapsed = LOG_GET_ELAPSED_TIME (end_append_time, start_append_time);
2709  }
2710 
2711  log_Stat.use_append_page_sec = elapsed;
2712  gettimeofday (&start_append_time, NULL);
2713 
2714  commit_count = log_Stat.commit_count - prev_commit_count_in_append;
2715 
2716  prev_commit_count_in_append = log_Stat.commit_count;
2717 
2718  log_Stat.last_commit_count_while_using_a_page = commit_count;
2720 #endif /* CUBRID_DEBUG */
2721 
2722  log_Stat.total_append_page_count++;
2723 
2724 #if defined(CUBRID_DEBUG)
2726  "log_next_append_page: new page id(%lld) total_append_page_count(%ld)"
2727  " num_toflush(%d) use_append_page_sec(%f) need_flush(%d) commit_count(%ld)"
2728  " total_commit_count(%ld)\n", (int) log_Stat.last_append_pageid, log_Stat.total_append_page_count,
2729  flush_info->num_toflush, log_Stat.use_append_page_sec, need_flush,
2731 #endif /* CUBRID_DEBUG */
2732 }
2733 
2734 /*
2735  * log_writev_append_pages - Write a set of sequential pages
2736  *
2737  * return: to_flush or NULL
2738  *
2739  * to_flush(in): Array to address of content of pages to flush
2740  * npages(in): Number of pages to flush
2741  *
2742  * NOTE:Flush to disk a set of log contiguous pages.
2743  */
2744 static LOG_PAGE **
2745 logpb_writev_append_pages (THREAD_ENTRY * thread_p, LOG_PAGE ** to_flush, DKNPAGES npages)
2746 {
2747  LOG_BUFFER *bufptr;
2749  int i;
2751  char enc_pgbuf[IO_MAX_PAGE_SIZE + MAX_ALIGNMENT];
2752  LOG_PAGE *log_pgptr = NULL;
2753  LOG_PAGE *enc_pgptr = NULL;
2754 
2755  enc_pgptr = (LOG_PAGE *) PTR_ALIGN (enc_pgbuf, MAX_ALIGNMENT);
2756 
2757 #if !defined (CS_MODE)
2759 #endif
2760 
2761  /* In this point, flush buffer cannot be replaced by trans. So, bufptr's pageid and phy_pageid are not changed. */
2762  if (npages > 0)
2763  {
2764  bufptr = logpb_get_log_buffer (to_flush[0]);
2765  phy_pageid = bufptr->phy_pageid;
2766 
2767  logpb_log ("logpb_writev_append_pages: started with pageid = %lld and phy_pageid = %lld\n",
2768  (long long int) bufptr->pageid, (long long int) phy_pageid);
2769 
2770  for (i = 0; i < npages; i++)
2771  {
2772  /* Set page CRC before writing to disk. */
2773  if (logpb_set_page_checksum (thread_p, to_flush[i]) != NO_ERROR)
2774  {
2775  return NULL;
2776  }
2777  }
2778  for (i = 0; i < npages; i++)
2779  {
2780  log_pgptr = to_flush[i];
2781 
2782  logpb_log ("logpb_writev_append_pages: The page (%lld) is being tde-encrypted: %d\n",
2783  (long long int) log_pgptr->hdr.logical_pageid, LOG_IS_PAGE_TDE_ENCRYPTED (log_pgptr));
2784  if (LOG_IS_PAGE_TDE_ENCRYPTED (log_pgptr))
2785  {
2786  if (tde_encrypt_log_page (log_pgptr, logpb_get_tde_algorithm (log_pgptr), enc_pgptr) != NO_ERROR)
2787  {
2788  /*
2789  * if encrpytion fails, it just skip it and off the tde flag. The page will never be encrypted in this case.
2790  * It menas once it fails, the page always spill user data un-encrypted from then.
2791  */
2792  logpb_set_tde_algorithm (thread_p, log_pgptr, TDE_ALGORITHM_NONE);
2794  log_pgptr->hdr.logical_pageid);
2795  }
2796  else
2797  {
2798  log_pgptr = enc_pgptr;
2799  }
2800  }
2801 
2802  if (fileio_write (thread_p, log_Gl.append.vdes, log_pgptr, phy_pageid + i, LOG_PAGESIZE, write_mode) == NULL)
2803  {
2805  {
2807  phy_pageid, log_Name_active, log_Gl.hdr.db_logpagesize);
2808  }
2809  else
2810  {
2812  phy_pageid, log_Name_active);
2813  }
2814  to_flush = NULL;
2815  break;
2816  }
2817  }
2818  }
2819 
2820  return to_flush;
2821 }
2822 
2823 /*
2824  * logpb_write_toflush_pages_to_archive - Background archiving
2825  *
2826  * NOTE : write flushed pages to temporary archiving volume
2827  * (which will be renamed to real archiving volume) at this time.
2828  * but don't write last page because it will be modified & flushed again.
2829  * in error case, dismount temp archiving volume and give up background
2830  * archiving.
2831  */
2832 static void
2834 {
2835  int i;
2836  LOG_PAGEID pageid, prev_lsa_pageid;
2838  char log_pgbuf[IO_MAX_PAGE_SIZE + MAX_ALIGNMENT];
2839  char enc_pgbuf[IO_MAX_PAGE_SIZE + MAX_ALIGNMENT];
2840  LOG_PAGE *log_pgptr = NULL;
2841  LOG_PAGE *enc_pgptr = NULL;
2842  LOG_BUFFER *bufptr;
2843  LOG_FLUSH_INFO *flush_info = &log_Gl.flush_info;
2845  FILEIO_WRITE_MODE write_mode;
2846 
2848 
2849  if (log_Gl.bg_archive_info.vdes == NULL_VOLDES || flush_info->num_toflush <= 1)
2850  {
2851  return;
2852  }
2853 
2854  pageid = bg_arv_info->current_page_id;
2855  prev_lsa_pageid = log_Gl.append.prev_lsa.pageid;
2856  i = 0;
2858 
2859  while (pageid < prev_lsa_pageid && i < flush_info->num_toflush)
2860  {
2861  bufptr = logpb_get_log_buffer (flush_info->toflush[i]);
2862  if (pageid > bufptr->pageid)
2863  {
2864  assert_release (pageid <= bufptr->pageid);
2865  fileio_dismount (thread_p, bg_arv_info->vdes);
2866  bg_arv_info->vdes = NULL_VOLDES;
2867  return;
2868  }
2869  else if (pageid < bufptr->pageid)
2870  {
2871  LOG_LSA current_lsa;
2872 
2873  current_lsa.pageid = pageid;
2874  current_lsa.offset = LOG_PAGESIZE;
2875  /* to flush all omitted pages by the previous archiving */
2876  log_pgptr = (LOG_PAGE *) PTR_ALIGN (log_pgbuf, MAX_ALIGNMENT);
2877 
2878  if (logpb_fetch_page (thread_p, &current_lsa, LOG_CS_FORCE_USE, log_pgptr) != NO_ERROR)
2879  {
2880  fileio_dismount (thread_p, bg_arv_info->vdes);
2881  bg_arv_info->vdes = NULL_VOLDES;
2882  return;
2883  }
2884  }
2885  else
2886  {
2887  log_pgptr = flush_info->toflush[i];
2888  i++;
2889  }
2890 
2891 #if !defined(NDEBUG)
2892  logpb_debug_check_log_page (thread_p, log_pgptr);
2893 #endif
2894  phy_pageid = (LOG_PHY_PAGEID) (pageid - bg_arv_info->start_page_id + 1);
2895  assert_release (phy_pageid > 0);
2896 
2897  logpb_log ("logpb_write_toflush_pages_to_archive: The page (%lld) is being tde-encrypted: %d\n",
2898  (long long int) pageid, LOG_IS_PAGE_TDE_ENCRYPTED (log_pgptr));
2899 
2900  if (LOG_IS_PAGE_TDE_ENCRYPTED (log_pgptr))
2901  {
2902  enc_pgptr = (LOG_PAGE *) PTR_ALIGN (enc_pgbuf, MAX_ALIGNMENT);
2903  if (tde_encrypt_log_page (log_pgptr, logpb_get_tde_algorithm (log_pgptr), enc_pgptr) != NO_ERROR)
2904  {
2905  /*
2906  * if encrpytion fails, it just skip it and off the tde flag. The page will never be encrypted in this case.
2907  * It menas once it fails, the page always spill user data un-encrypted from then.
2908  */
2909  logpb_set_tde_algorithm (thread_p, log_pgptr, TDE_ALGORITHM_NONE);
2911  }
2912  else
2913  {
2914  log_pgptr = enc_pgptr;
2915  }
2916  }
2917 
2918  if (fileio_write (thread_p, bg_arv_info->vdes, log_pgptr, phy_pageid, LOG_PAGESIZE, write_mode) == NULL)
2919  {
2920  fileio_dismount (thread_p, bg_arv_info->vdes);
2921  bg_arv_info->vdes = NULL_VOLDES;
2922  return;
2923  }
2924 
2925  pageid++;
2926  bg_arv_info->current_page_id = pageid;
2927  }
2928 
2929  assert_release (bg_arv_info->current_page_id >= bg_arv_info->last_sync_pageid);
2931  {
2932  /* System volume. No need to sync DWB. */
2933  fileio_synchronize (thread_p, bg_arv_info->vdes, log_Name_bg_archive, FILEIO_SYNC_ONLY);
2934  bg_arv_info->last_sync_pageid = bg_arv_info->current_page_id;
2935  }
2936 }
2937 
2938 /*
2939  * logpb_append_next_record -
2940  *
2941  * return: NO_ERROR
2942  *
2943  * node(in):
2944  */
2945 static int
2947 {
2948  if (!LSA_EQ (&node->start_lsa, &log_Gl.hdr.append_lsa))
2949  {
2950  logpb_fatal_error (thread_p, true, ARG_FILE_LINE, "logpb_append_next_record");
2951  }
2952 
2953  /* forcing flush in the middle of log record append is a complicated business. try to avoid it if possible. */
2954  if (log_Gl.flush_info.num_toflush + 1 >= log_Gl.flush_info.max_toflush) /* flush will be forced on next page */
2955  {
2956  /* flush early to avoid complicated case */
2957  logpb_flush_all_append_pages (thread_p);
2958  }
2959 
2960  logpb_log ("logpb_append_next_record: append a record\n"
2961  "log_Gl.hdr.append_lsa.offset = %d, total record size = %d\n",
2963  sizeof (LOG_RECORD_HEADER) + node->data_header_length + node->ulength + node->rlength);
2964 
2965  /* to tde-encrypt pages which is being created while appending */
2967 
2968  logpb_start_append (thread_p, &node->log_header);
2969 
2970  if (node->data_header != NULL)
2971  {
2973  logpb_append_data (thread_p, node->data_header_length, node->data_header);
2974  }
2975 
2976  if (node->udata != NULL)
2977  {
2978  logpb_append_data (thread_p, node->ulength, node->udata);
2979  }
2980 
2981  if (node->rdata != NULL)
2982  {
2983  logpb_append_data (thread_p, node->rlength, node->rdata);
2984  }
2985 
2986  logpb_end_append (thread_p, &node->log_header);
2987 
2989 
2990  return NO_ERROR;
2991 }
2992 
2993 /*
2994  * logpb_append_prior_lsa_list -
2995  *
2996  * return: NO_ERROR
2997  *
2998  * list(in/out):
2999  */
3000 static int
3002 {
3003  LOG_PRIOR_NODE *node;
3004 
3005  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
3006 
3007  /* append prior_flush_list */
3010 
3011  /* append log buffer */
3013  {
3016 
3017  logpb_append_next_record (thread_p, node);
3018 
3019  if (node->data_header != NULL)
3020  {
3021  free_and_init (node->data_header);
3022  }
3023  if (node->udata != NULL)
3024  {
3025  free_and_init (node->udata);
3026  }
3027  if (node->rdata != NULL)
3028  {
3029  free_and_init (node->rdata);
3030  }
3031 
3032  free_and_init (node);
3033  }
3034 
3035  return NO_ERROR;
3036 }
3037 
3038 /*
3039  * prior_lsa_remove_prior_list:
3040  *
3041  * return: prior list
3042  *
3043  */
3044 static LOG_PRIOR_NODE *
3046 {
3047  LOG_PRIOR_NODE *prior_list;
3048 
3049  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
3050 
3051  prior_list = log_Gl.prior_info.prior_list_header;
3052 
3056 
3057  return prior_list;
3058 }
3059 
3060 /*
3061  * logpb_prior_lsa_append_all_list:
3062  *
3063  * return: NO_ERROR
3064  *
3065  */
3066 int
3068 {
3069  LOG_PRIOR_NODE *prior_list;
3070  INT64 current_size;
3071 
3072  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
3073 
3075  current_size = log_Gl.prior_info.list_size;
3076  prior_list = prior_lsa_remove_prior_list (thread_p);
3077  log_Gl.prior_info.prior_lsa_mutex.unlock ();
3078 
3079  if (prior_list != NULL)
3080  {
3081  perfmon_add_stat (thread_p, PSTAT_PRIOR_LSA_LIST_SIZE, (unsigned int) current_size / ONE_K); /* kbytes */
3083 
3084  logpb_append_prior_lsa_list (thread_p, prior_list);
3085  }
3086 
3087  return NO_ERROR;
3088 }
3089 
3090 /*
3091  * logpb_dump_log_page_area - Dump log page area.
3092  *
3093  * return: nothing
3094  * log_pgptr(in): log page
3095  * offset(in): offset in page area to start logging
3096  * length(in): length to log
3097  */
3098 void
3099 logpb_dump_log_page_area (THREAD_ENTRY * thread_p, LOG_PAGE * log_pgptr, int offset, int length)
3100 {
3101  const int block_size = 4 * ONE_K;
3102  char log_block_string[block_size * 4], log_header_string[200], *src_ptr, *dest_ptr;
3103  int count_remaining_bytes, count_bytes_to_dump, i;
3104  int line_no = 0;
3105 
3106  if (logpb_Logging == false)
3107  {
3108  return;
3109  }
3110 
3111  assert (log_pgptr != NULL);
3112  if (offset < 0 || length < 0 || length > LOGAREA_SIZE || offset + length > LOGAREA_SIZE)
3113  {
3114  return;
3115  }
3116 
3117  sprintf (log_header_string, "page_id = %lld, checksum = %d, offset = %d, length = %d\n",
3118  (long long int) log_pgptr->hdr.logical_pageid, log_pgptr->hdr.checksum, offset, length);
3119 
3120  count_remaining_bytes = length;
3121  src_ptr = log_pgptr->area + offset;
3122  while (count_remaining_bytes > 0)
3123  {
3124  dest_ptr = log_block_string;
3125  count_bytes_to_dump = MIN (count_remaining_bytes, block_size);
3126  for (i = 0; i < count_bytes_to_dump; i++, src_ptr++)
3127  {
3128  if (i % 32 == 0)
3129  {
3130  dest_ptr += sprintf (dest_ptr, "\n %05d: ", line_no++);
3131  }
3132 
3133  dest_ptr += sprintf (dest_ptr, "%02X ", (unsigned char) (*src_ptr));
3134  }
3135 
3136  dest_ptr += sprintf (dest_ptr, "\n");
3137  logpb_log ("logpb_dump_log_page_area: header = %s data = %s\n", log_header_string, log_block_string);
3138  count_remaining_bytes -= count_bytes_to_dump;
3139  }
3140 }
3141 
3142 /*
3143  * logpb_page_get_first_null_block_lsa - Get LSA of first null block in log page.
3144  *
3145  * return: nothing
3146  * thread_p(in): thread entry
3147  * log_pgptr(in): log page
3148  * first_null_block_lsa(out): LSA of first null block.
3149  */
3150 void
3151 logpb_page_get_first_null_block_lsa (THREAD_ENTRY * thread_p, LOG_PAGE * log_pgptr, LOG_LSA * first_null_block_lsa)
3152 {
3153  const int block_size = 4 * ONE_K;
3154  char null_buffer[block_size + MAX_ALIGNMENT], *null_block;
3155  int i, max_num_blocks = LOG_PAGESIZE / block_size;
3156 
3157  assert (log_pgptr != NULL && first_null_block_lsa != NULL);
3158 
3159  null_block = PTR_ALIGN (null_buffer, MAX_ALIGNMENT);
3160  memset (null_block, LOG_PAGE_INIT_VALUE, block_size);
3161 
3162  LSA_SET_NULL (first_null_block_lsa);
3163 
3164  /* Set LSA of first NULL block. */
3165  for (i = 0; i < max_num_blocks; i++)
3166  {
3167  /* Search for null blocks. */
3168  if (memcmp (((char *) log_pgptr) + (i * block_size), null_block, block_size) == 0)
3169  {
3170  /* Found the null block. Computes its LSA. */
3171  first_null_block_lsa->pageid = log_pgptr->hdr.logical_pageid;
3172  first_null_block_lsa->offset = i * block_size;
3173 
3174  if (first_null_block_lsa->offset > 0)
3175  {
3176  /* Skip log header size. */
3177  first_null_block_lsa->offset -= sizeof (LOG_HDRPAGE);
3178  }
3179 
3180  assert (first_null_block_lsa->offset >= 0);
3181  break;
3182  }
3183  }
3184 }
3185 
3186 /*
3187  * logpb_flush_all_append_pages - Flush log append pages
3188  *
3189  * return: 1 : log flushed, 0 : do not need log flush, < 0 : error code
3190  *
3191  */
3192 static int
3194 {
3195  LOG_BUFFER *bufptr = NULL; /* The current buffer log append page scanned */
3196  LOG_BUFFER *prv_bufptr = NULL; /* The previous buffer log append page scanned */
3197  int idxflush; /* An index into the first log page buffer to flush */
3198  bool need_sync; /* How we flush anything ? */
3199 
3200  int i;
3201  LOG_PAGEID first_append_pageid = NULL_PAGEID;
3202  bool need_flush = true;
3203  int error_code = NO_ERROR;
3204  int flush_page_count = 0;
3205 #if defined(CUBRID_DEBUG)
3206  struct timeval start_time = { 0, 0 };
3207  struct timeval end_time = { 0, 0 };
3208  int dirty_page_count = 0;
3209  int curr_flush_count = 0;
3210  long commit_count = 0;
3211  static long prev_commit_count_in_flush = 0;
3212 #endif /* CUBRID_DEBUG */
3213  bool hold_flush_mutex = false;
3214  LOG_FLUSH_INFO *flush_info = &log_Gl.flush_info;
3215  LOG_LSA nxio_lsa;
3216 
3217  int rv;
3218 #if defined(SERVER_MODE)
3219  INT64 flush_start_time = 0;
3220  INT64 flush_completed_time = 0;
3221  INT64 all_writer_thr_end_time = 0;
3222 
3223  LOGWR_INFO *writer_info = log_Gl.writer_info;
3224  LOGWR_ENTRY *entry = NULL;
3225  THREAD_ENTRY *wait_thread_p = NULL;
3226 #endif /* SERVER_MODE */
3227 
3228  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
3229 
3230  logpb_log ("called logpb_flush_all_append_pages\n");
3231 
3232 #if defined(CUBRID_DEBUG)
3233  er_log_debug (ARG_FILE_LINE, "logpb_flush_all_append_pages: start\n");
3234 
3235  gettimeofday (&start_time, NULL);
3236 #endif /* CUBRID_DEBUG */
3237 
3238  rv = pthread_mutex_lock (&flush_info->flush_mutex);
3239  hold_flush_mutex = true;
3240 
3241  if (flush_info->num_toflush < 1)
3242  {
3243  need_flush = false;
3244  }
3245  else if (flush_info->num_toflush == 1)
3246  {
3247  /*
3248  * Don't need to do anything if the page is not dirty.
3249  *
3250  * This block is used to avoid updating the last page with an end of file log when it is not needed at all.
3251  */
3252 
3253  bufptr = logpb_get_log_buffer (flush_info->toflush[0]);
3254  if (!logpb_is_dirty (thread_p, flush_info->toflush[0]))
3255  {
3256  need_flush = false;
3257  }
3258  }
3259 
3260  pthread_mutex_unlock (&flush_info->flush_mutex);
3261  hold_flush_mutex = false;
3262 
3263  if (!need_flush)
3264  {
3265  return 0;
3266  }
3267 
3268 #if !defined(NDEBUG)
3269  {
3270  const char *env_value;
3271  int verbose_dump = -1;
3272 
3273  if (verbose_dump == -1)
3274  {
3275  /*
3276  * Do we want to dump the buffer pool for future verification
3277  */
3278  if ((env_value = envvar_get ("LOG_FLUSH_VERBOSE_DUMP")) != NULL)
3279  {
3280  verbose_dump = atoi (env_value) != 0 ? 1 : 0;
3281  }
3282  else
3283  {
3284  verbose_dump = 0;
3285  }
3286  }
3287 
3288  if (verbose_dump != 0)
3289  {
3290  fprintf (stdout, "\n DUMP BEFORE FLUSHING APPEND PAGES\n");
3291  logpb_dump (thread_p, stdout);
3292  }
3293  }
3294 #endif
3295 
3296 #if defined(SERVER_MODE)
3297  if (thread_p && thread_p->type != TT_DAEMON && thread_p->type != TT_VACUUM_MASTER
3298  && thread_p->type != TT_VACUUM_WORKER)
3299  {
3300  /* set event logging parameter */
3301  thread_p->event_stats.trace_log_flush_time = prm_get_integer_value (PRM_ID_LOG_TRACE_FLUSH_TIME_MSECS);
3302  }
3303 #endif /* SERVER_MODE */
3304 
3305 #if defined(CUBRID_DEBUG)
3306  if (log_Gl.append.get_nxio_lsa ().pageid != logpb_get_page_id (flush_info->toflush[0]))
3307  {
3309  "logpb_flush_all_append_pages: SYSTEM ERROR\n NXIO_PAGE %d does not seem the same as next free"
3310  " append page %d to flush\n", log_Gl.append.get_nxio_lsa ().pageid,
3311  logpb_get_page_id (flush_info->toflush[0]));
3312  goto error;
3313  }
3314 #endif /* CUBRID_DEBUG */
3315 
3316  /* how this works:
3317  * ok, so we might have to flush several pages here. if there is only one page, the implementation is straight
3318  * forward, just flush the page.
3319  *
3320  * however, if there are two or more pages, we really need to make sure that the first page, where previous end of log
3321  * record resides (where nxio_lsa points), is flushed last! we cannot validate the new end of log record until we are
3322  * sure all log pages have been flushed!
3323  * so, we'll do a two-step flushing: in the first step we skip nxio_lsa page and flush all other pages. in the second
3324  * step we flush the nxio_lsa page.
3325  *
3326  * the story becomes a lot more complicated when a log entry is only partially appended before flush is called. this
3327  * can happen for when log page buffer becomes full. the problem here is that we cannot yet validate the flushed pages
3328  * before flushing this record entirely; not all pages. what we do here is replace the incomplete record with end of
3329  * log record (very important! not in log page buffer, but in a copy of the log page; that way we allow others to read
3330  * the correct version from buffer). the overwritten copy is flushed to disk.
3331  * when the log record is fully appended (there can be several iterations of flush if we deal with a very large log
3332  * record and log page buffer is very small), we call again flush again to make sure all remaining record pages are
3333  * written to disk. at the end of this flush iteration, the original record is written back and page is flushed again,
3334  * validating new record.
3335  *
3336  * to see full implementation, follow references of log_Pb.partial_append.
3337  *
3338  * todo: we might think of a better and simpler solution. the most difficult case to handle is very large log record,
3339  * larger than log page buffer. since log records can theoretically be any size, the solution will have to
3340  * consider this case.
3341  * for now I chose a solution similar to the implementation used before the log page buffer redesign.
3342  */
3343 
3344  /*
3345  * Add an end of log marker to detect the end of the log.
3346  * The marker should be added at the end of the log if there is only one page to be flushed.
3347  * That is, if we are not in the middle of appending a new log record. Otherwise, we need to change the label of
3348  * the last append record as log end record. Flush and then check it back.
3349  */
3350 
3352  {
3353  /* Flush all log append records on such page except the current log record which has not been finished.
3354  * Save the log record type of this record, overwrite an eof record on such position, and flush the page.
3355  * Then, restore the record back on the page and change the current append log sequence address.
3356  */
3357  logpb_log ("logpb_flush_all_append_pages: incomplete record at log_Gl.append.prev_lsa=%lld|%d when flush is "
3358  "called. we'll overwrite the log record with eof.\n", (long long int) log_Gl.append.prev_lsa.pageid,
3359  (int) log_Gl.append.prev_lsa.offset);
3360 
3361  /* first, let's see if this is page is still in log page buffer */
3362  first_append_pageid = log_Gl.append.prev_lsa.pageid;
3363  bufptr = &log_Pb.buffers[logpb_get_log_buffer_index (first_append_pageid)];
3364 
3365  if (bufptr->pageid != first_append_pageid)
3366  {
3367  assert_release (false);
3368  logpb_log ("logpb_flush_all_append_pages: fatal error, partial page not found in log page buffer.");
3369  error_code = ER_FAILED;
3370  goto error;
3371  }
3372 
3373  /* copy from log page buffer */
3374  memcpy (log_Pb.partial_append.log_page_record_header, bufptr->logpage, LOG_PAGESIZE);
3375 
3376  /* set entry in log page buffer not dirty. we don't want it to get flushed again */
3377  bufptr->dirty = false;
3378 
3379  /* Overwrite it with an end of log marker */
3385 
3386  /* write page to disk as it is */
3387  if (logpb_write_page_to_disk (thread_p, log_Pb.partial_append.log_page_record_header, first_append_pageid)
3388  != NO_ERROR)
3389  {
3390  error_code = ER_FAILED;
3391  goto error;
3392  }
3394 
3396  }
3398  {
3399  logpb_log ("logpb_flush_all_append_pages: continue flushing page of partially appended log record.\n");
3400  }
3403  {
3404  /* Add an end of log marker to detect the end of the log.
3405  * Don't advance the log address, the log end of file is overwritten at a later point. */
3406  LOG_RECORD_HEADER eof;
3407 
3408  logpb_log ("logpb_flush_all_append_pages: append end of log record at append_lsa = %lld|%d.\n",
3409  (long long int) log_Gl.hdr.append_lsa.pageid, (int) log_Gl.hdr.append_lsa.offset);
3410  eof.trid = LOG_READ_NEXT_TRANID;
3411  LSA_SET_NULL (&eof.prev_tranlsa);
3413  LSA_SET_NULL (&eof.forw_lsa);
3414  eof.type = LOG_END_OF_LOG;
3415 
3416  logpb_start_append (thread_p, &eof);
3417  }
3418  else
3419  {
3420  /* unexpected status here */
3421  assert_release (false);
3422  error_code = ER_FAILED;
3423  goto error;
3424  }
3425 
3426  /*
3427  * Now flush all contiguous log append dirty pages. The first log append dirty page is flushed at the end,
3428  * so we can synchronize it with the rest.
3429  */
3430 
3431 #if defined(SERVER_MODE)
3432  /* It changes the status of waiting log writer threads and wakes them up */
3433  if (!HA_DISABLED () && !writer_info->skip_flush)
3434  {
3435  assert (hold_flush_mutex == false);
3436  LOG_CS_DEMOTE (thread_p);
3437 
3438  rv = pthread_mutex_lock (&writer_info->flush_start_mutex);
3439  rv = pthread_mutex_lock (&writer_info->wr_list_mutex);
3440 
3441  if (thread_p != NULL && thread_p->event_stats.trace_log_flush_time > 0)
3442  {
3443  flush_start_time = log_get_clock_msec ();
3444 
3445  // *INDENT-OFF*
3446  new (&writer_info->last_writer_client_info) clientids ();
3447  // *INDENT-ON*
3448 
3449  writer_info->trace_last_writer = true;
3450  writer_info->last_writer_elapsed_time = 0;
3452  }
3453 
3454  entry = writer_info->writer_list;
3455  while (entry)
3456  {
3457  if (entry->status == LOGWR_STATUS_WAIT)
3458  {
3459  wait_thread_p = entry->thread_p;
3460  assert (wait_thread_p != thread_p);
3461 
3462  thread_lock_entry (wait_thread_p);
3463 
3464  /* If THREAD_RESUME_DUE_TO_INTERRUPT, do not set the entry status to avoid deadlock
3465  * between flush_end_cond and CSECT_LOG.
3466  */
3467  if (thread_p->resume_status != THREAD_RESUME_DUE_TO_INTERRUPT)
3468  {
3469  /* Still waiting for LOGWR. */
3470  entry->status = LOGWR_STATUS_FETCH;
3471  if (wait_thread_p->resume_status == THREAD_LOGWR_SUSPENDED)
3472  {
3474  }
3475  }
3476 
3477  thread_unlock_entry (wait_thread_p);
3478  }
3479  entry = entry->next;
3480  }
3481 
3482  rv = pthread_mutex_lock (&writer_info->flush_wait_mutex);
3483  writer_info->flush_completed = false;
3484  rv = pthread_mutex_unlock (&writer_info->flush_wait_mutex);
3485 
3486  pthread_mutex_unlock (&writer_info->wr_list_mutex);
3487  pthread_mutex_unlock (&writer_info->flush_start_mutex);
3488  }
3489 #endif /* SERVER_MODE */
3490 
3491  idxflush = -1;
3492  prv_bufptr = NULL;
3493  need_sync = false;
3494 
3495  rv = pthread_mutex_lock (&flush_info->flush_mutex);
3496  hold_flush_mutex = true;
3497 
3498 #if defined(CUBRID_DEBUG)
3499  log_scan_flush_info (log_dump_pageid);
3500  er_log_debug (ARG_FILE_LINE, "\n");
3501 #endif /* CUBRID_DEBUG */
3502 
3503  /* Record number of writes in statistics */
3504  perfmon_add_stat (thread_p, PSTAT_LOG_NUM_IOWRITES, flush_info->num_toflush);
3505 
3506  /* loop through all to flush list. do a two-step process:
3507  * 1. skip all pages not dirty. also skip the page of nxio_lsa! it must be flushed last!
3508  * 2. collect and flush all dirty and successive pages.
3509  */
3510  i = 0;
3511  while (true)
3512  {
3513  /* skip all not dirty */
3514  for (; i < flush_info->num_toflush; i++)
3515  {
3516  bufptr = logpb_get_log_buffer (flush_info->toflush[i]);
3517  assert (bufptr->pageid == flush_info->toflush[i]->hdr.logical_pageid);
3518  if (bufptr->dirty && bufptr->pageid != log_Gl.append.get_nxio_lsa ().pageid)
3519  {
3520  /* found dirty */
3521  break;
3522  }
3523  logpb_log ("logpb_flush_all_append_pages: skip flushing not dirty page %lld.\n", bufptr->pageid);
3524  }
3525  if (i == flush_info->num_toflush)
3526  {
3527  /* nothing left to flush */
3528  break;
3529  }
3530 
3531  /* we have a dirty record */
3532  assert (bufptr->dirty);
3533  prv_bufptr = bufptr;
3534  idxflush = i;
3535 
3536  /* advance to next */
3537  i++;
3538 
3539  /* collect all consecutive pages that are dirty */
3540  for (; i < flush_info->num_toflush; i++)
3541  {
3542  bufptr = logpb_get_log_buffer (flush_info->toflush[i]);
3543 
3544  assert (bufptr->pageid == flush_info->toflush[i]->hdr.logical_pageid);
3545 
3546  if (!bufptr->dirty)
3547  {
3548  /* not dirty */
3549  break;
3550  }
3551  if (bufptr->pageid == log_Gl.append.get_nxio_lsa ().pageid)
3552  {
3553  /* this must be flushed last! */
3554  break;
3555  }
3556  if (prv_bufptr->pageid + 1 != bufptr->pageid)
3557  {
3558  /* not successive pages */
3559  break;
3560  }
3561  if (prv_bufptr->phy_pageid + 1 != bufptr->phy_pageid)
3562  {
3563  /* not successive pages on disk */
3564  break;
3565  }
3566 
3567  prv_bufptr = bufptr;
3568  }
3569 
3570  if (logpb_writev_append_pages (thread_p, &flush_info->toflush[idxflush], i - idxflush) == NULL)
3571  {
3572  /* is this acceptable? */
3573  assert_release (false);
3574  error_code = ER_FAILED;
3575  goto error;
3576  }
3577  else
3578  {
3579  int buf_iter;
3580  need_sync = true;
3581  flush_page_count += i - idxflush;
3582 
3583  logpb_log ("logpb_flush_all_append_pages: flushed all pages in range [%lld, %lld].\n",
3584  (long long int) flush_info->toflush[idxflush]->hdr.logical_pageid,
3585  (long long int) flush_info->toflush[idxflush]->hdr.logical_pageid + i - idxflush - 1);
3586 
3587  /* set not dirty what we have flushed */
3588  for (buf_iter = idxflush; buf_iter < i; buf_iter++)
3589  {
3590  bufptr = logpb_get_log_buffer (flush_info->toflush[buf_iter]);
3591  bufptr->dirty = false;
3592  }
3593 #if defined (CUBRID_DEBUG)
3594  dirty_page_count += i - idxflush;
3595 #endif /* CUBRID_DEBUG */
3596  }
3597 
3598  if (i == flush_info->num_toflush)
3599  {
3600  /* nothing left to flush */
3601  break;
3602  }
3603  }
3604 
3605  /* now flush the nxio_lsa page... unless it is the page of header for incomplete log record */
3606  nxio_lsa = log_Gl.append.get_nxio_lsa ();
3608  {
3611 
3612  bufptr = &log_Pb.buffers[logpb_get_log_buffer_index (nxio_lsa.pageid)];
3613 
3614  if (bufptr->pageid != nxio_lsa.pageid)
3615  {
3616  /* not expected. */
3617  assert_release (false);
3618 
3619  logpb_log ("logpb_flush_all_append_pages: fatal error, nxio_lsa %lld|%d page not found in buffer. "
3620  "bufptr->pageid is %lld instead.\n",
3621  (long long int) nxio_lsa.pageid, (int) nxio_lsa.offset, (long long int) bufptr->pageid);
3622 
3623  error_code = ER_FAILED;
3624  goto error;
3625  }
3626 
3627  if (!bufptr->dirty)
3628  {
3629  /* not expected */
3630  assert_release (false);
3631 
3632  logpb_log ("logpb_flush_all_append_pages: fatal error, nxio_lsa %lld|%d page is not dirty.\n",
3633  (long long int) nxio_lsa.pageid, (int) nxio_lsa.offset);
3634 
3635  error_code = ER_FAILED;
3636  goto error;
3637  }
3638 
3639  logpb_write_page_to_disk (thread_p, bufptr->logpage, bufptr->pageid);
3640  need_sync = true;
3641  bufptr->dirty = false;
3642  flush_page_count += 1;
3643 
3644  logpb_log ("logpb_flush_all_append_pages: flushed nxio_lsa = %lld|%d page to disk.\n",
3645  (long long int) log_Gl.append.get_nxio_lsa ().pageid, (int) log_Gl.append.get_nxio_lsa ().offset);
3646 
3647  if (logpb_Logging)
3648  {
3649  /* Dump latest portion of page, for debugging purpose. */
3650  logpb_dump_log_page_area (thread_p, bufptr->logpage, (int) (log_Gl.append.get_nxio_lsa ().offset),
3651  (int) sizeof (LOG_RECORD_HEADER));
3652  logpb_dump_log_page_area (thread_p, bufptr->logpage, (int) (log_Gl.hdr.eof_lsa.offset),
3653  (int) sizeof (LOG_RECORD_HEADER));
3654  }
3655  }
3656  else
3657  {
3658  logpb_log ("logpb_flush_all_append_pages: skipped flushing nxio_lsa = %lld|%d page to disk because it matches "
3659  "the header page for incomplete record (prev_lsa = %lld|%d).\n",
3660  (long long int) log_Gl.append.get_nxio_lsa ().pageid, (int) log_Gl.append.get_nxio_lsa ().offset,
3661  (long long int) log_Gl.append.prev_lsa.pageid, (int) log_Gl.append.prev_lsa.offset);
3662  }
3663 
3664  /* Make sure that all of the above log writes are synchronized with any future log writes.
3665  * That is, the pages should be stored on physical disk.
3666  */
3667  if (need_sync == true)
3668  {
3671  {
3672  /* System volume. No need to sync DWB. */
3674  {
3675  error_code = ER_FAILED;
3676  goto error;
3677  }
3678  log_Stat.total_sync_count++;
3679  }
3680  }
3681 
3682  /* dual writing (Background archiving) */
3684  {
3686  }
3687 
3688 #if !defined(NDEBUG)
3689  if (prm_get_bool_value (PRM_ID_LOG_TRACE_DEBUG) && logpb_is_any_dirty (thread_p) == true)
3690  {
3691  er_log_debug (ARG_FILE_LINE, "logpb_flush_all_append_pages: Log Buffer contains dirty pages\n");
3692  logpb_dump (thread_p, stdout);
3693  fflush (stdout);
3694  }
3695 #endif
3696 
3697  if (flush_info->num_toflush == flush_info->max_toflush)
3698  {
3699  log_Stat.log_buffer_full_count++;
3700  }
3701 #if defined(CUBRID_DEBUG)
3702  curr_flush_count = flush_info->num_toflush;
3703 #endif /* CUBRID_DEBUG */
3704 
3705  /*
3706  * Change the log sequence address to indicate the next append address to flush and synchronize
3707  */
3709  {
3710  /* partially flushed log record is now complete */
3711 
3712  /* overwrite with original log record. */
3714  error_code =
3717  if (error_code != NO_ERROR)
3718  {
3719  goto error;
3720  }
3721  ++flush_page_count;
3722 
3723  /* Update checksum. */
3724  first_append_pageid = log_Pb.partial_append.log_page_record_header->hdr.logical_pageid;
3725  bufptr = &log_Pb.buffers[logpb_get_log_buffer_index (first_append_pageid)];
3726  if (bufptr->pageid == first_append_pageid)
3727  {
3729  assert (!memcmp (bufptr->logpage, log_Pb.partial_append.log_page_record_header, LOG_PAGESIZE));
3730 #if !defined(NDEBUG)
3731  logpb_debug_check_log_page (thread_p, bufptr->logpage);
3732 #endif
3733  }
3734 
3735  /* we need to also sync again */
3737  {
3738  error_code = ER_FAILED;
3739  goto error;
3740  }
3741 
3742  /* now we can set the nxio_lsa to append_lsa */
3744 
3746 
3747  logpb_log ("logpb_flush_all_append_pages: completed partial record and flush again its first page %lld. "
3748  "nxio_lsa = %lld|%d.\n",
3749  (long long int) log_Pb.partial_append.log_page_record_header->hdr.logical_pageid,
3750  (long long int) log_Gl.append.get_nxio_lsa ().pageid, (int) log_Gl.append.get_nxio_lsa ().offset);
3751  }
3753  {
3754  /* we cannot set nxio_lsa to append_lsa yet. set it to append.prev_lsa */
3756 
3757  logpb_log ("logpb_flush_all_append_pages: partial record flushed... set nxio_lsa = %lld|%d.\n",
3758  (long long int) log_Gl.append.get_nxio_lsa ().pageid, (int) log_Gl.append.get_nxio_lsa ().offset);
3759  }
3760  else if (log_Pb.partial_append.status == LOGPB_APPENDREC_SUCCESS)
3761  {
3763 
3764  logpb_log ("logpb_flush_all_append_pages: set nxio_lsa = %lld|%d.\n",
3765  (long long int) log_Gl.append.get_nxio_lsa ().pageid, (int) log_Gl.append.get_nxio_lsa ().offset);
3766  }
3767  else
3768  {
3769  /* unexpected */
3770  assert_release (false);
3771  error_code = ER_FAILED;
3772  goto error;
3773  }
3774  flush_info->num_toflush = 0;
3775 
3776  if (log_Gl.append.log_pgptr != NULL)
3777  {
3778  /* Add the append page */
3779  flush_info->toflush[flush_info->num_toflush] = log_Gl.append.log_pgptr;
3780  flush_info->num_toflush++;
3781  }
3782 
3784  log_Stat.last_flush_count_by_trans = flush_page_count;
3785  log_Stat.total_flush_count_by_trans += flush_page_count;
3786 
3787 #if defined(CUBRID_DEBUG)
3788  gettimeofday (&end_time, NULL);
3789 
3790  log_Stat.last_flush_sec_by_trans = LOG_GET_ELAPSED_TIME (end_time, start_time);
3791 
3793 
3794  commit_count = log_Stat.commit_count - prev_commit_count_in_flush;
3795  prev_commit_count_in_flush = log_Stat.commit_count;
3796 
3797  log_Stat.last_commit_count_in_flush_pages = commit_count;
3799 
3801  "logpb_flush_all_append_pages: flush page(%ld / %d / %ld) avg flush count(%f), avg flush sec(%f)"
3802  "commit count(%ld) avg commit count(%f)\n", log_Stat.last_flush_count_by_trans, dirty_page_count,
3803  curr_flush_count,
3804  (double) log_Stat.total_flush_count_by_trans / log_Stat.flushall_append_pages_call_count,
3805  log_Stat.total_flush_sec_by_trans / log_Stat.flushall_append_pages_call_count, commit_count,
3807 #endif /* CUBRID_DEBUG */
3808 
3809  pthread_mutex_unlock (&flush_info->flush_mutex);
3810  hold_flush_mutex = false;
3811 
3812 #if defined(SERVER_MODE)
3813  if (!HA_DISABLED () && !writer_info->skip_flush)
3814  {
3815  /* it sends signal to LWT to notify that flush is completed */
3816  rv = pthread_mutex_lock (&writer_info->flush_wait_mutex);
3817 
3818  if (thread_p != NULL && thread_p->event_stats.trace_log_flush_time > 0)
3819  {
3820  flush_completed_time = log_get_clock_msec ();
3821  }
3822 
3823  writer_info->flush_completed = true;
3824  rv = pthread_cond_broadcast (&writer_info->flush_wait_cond);
3825 
3826  rv = pthread_mutex_unlock (&writer_info->flush_wait_mutex);
3827 
3828  /* It waits until all log writer threads are done */
3829  rv = pthread_mutex_lock (&writer_info->flush_end_mutex);
3830 
3831  rv = pthread_mutex_lock (&writer_info->wr_list_mutex);
3832  entry = writer_info->writer_list;
3833  while (entry != NULL)
3834  {
3835  if (entry->status == LOGWR_STATUS_FETCH)
3836  {
3837  break;
3838  }
3839  entry = entry->next;
3840  }
3841  pthread_mutex_unlock (&writer_info->wr_list_mutex);
3842 
3843  if (entry != NULL)
3844  {
3845  rv = pthread_cond_wait (&writer_info->flush_end_cond, &writer_info->flush_end_mutex);
3846  }
3847 
3848  rv = pthread_mutex_lock (&writer_info->wr_list_mutex);
3849  writer_info->trace_last_writer = false;
3850 
3851  if (thread_p != NULL && thread_p->event_stats.trace_log_flush_time > 0)
3852  {
3853  all_writer_thr_end_time = log_get_clock_msec ();
3854 
3855  if (all_writer_thr_end_time - flush_start_time > thread_p->event_stats.trace_log_flush_time)
3856  {
3857  event_log_log_flush_thr_wait (thread_p, flush_page_count, &writer_info->last_writer_client_info,
3858  (int) (all_writer_thr_end_time - flush_start_time),
3859  (int) (all_writer_thr_end_time - flush_completed_time),
3860  (int) writer_info->last_writer_elapsed_time);
3861  }
3862  }
3863 
3864  pthread_mutex_unlock (&writer_info->wr_list_mutex);
3865 
3866  pthread_mutex_unlock (&writer_info->flush_end_mutex);
3867  assert (hold_flush_mutex == false);
3868  LOG_CS_PROMOTE (thread_p);
3869  }
3870 #endif /* SERVER_MODE */
3871 
3872 #if defined(SERVER_MODE)
3873  if (thread_p && thread_p->type != TT_DAEMON && thread_p->type != TT_VACUUM_MASTER
3874  && thread_p->type != TT_VACUUM_WORKER)
3875  {
3876  /* reset event logging parameter */
3877  thread_p->event_stats.trace_log_flush_time = 0;
3878  }
3879 #endif /* SERVER_MODE */
3880 
3881  return 1;
3882 
3883 error:
3884  if (hold_flush_mutex)
3885  {
3886  pthread_mutex_unlock (&flush_info->flush_mutex);
3887  }
3888 
3889  logpb_fatal_error (thread_p, true, ARG_FILE_LINE, "logpb_flush_all_append_pages");
3890 
3891 #if defined(SERVER_MODE)
3892  if (thread_p && thread_p->type != TT_DAEMON && thread_p->type != TT_VACUUM_MASTER
3893  && thread_p->type != TT_VACUUM_WORKER)
3894  {
3895  /* reset event logging parameter */
3896  thread_p->event_stats.trace_log_flush_time = 0;
3897  }
3898 #endif /* SERVER_MODE */
3899 
3900  return error_code;
3901 }
3902 
3903 /*
3904  * logpb_flush_pages_direct - flush all pages by itself.
3905  *
3906  * return: nothing
3907  *
3908  */
3909 void
3911 {
3912 #if defined(CUBRID_DEBUG)
3913  er_log_debug (ARG_FILE_LINE, "logpb_flush_pages_direct: [%d]flush direct\n", (int) THREAD_ID ());
3914 #endif /* CUBRID_DEBUG */
3915 
3916  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
3917 
3919  (void) logpb_flush_all_append_pages (thread_p);
3920  log_Stat.direct_flush_count++;
3921 }
3922 
3923 /*
3924  * logpb_flush_pages - FLUSH LOG APPEND PAGES
3925  *
3926  * return: nothing
3927  *
3928  * flush_lsa(in):
3929  *
3930  * NOTE:There are 4 cases to commit.
3931  * ASYNC | GROUP COMMIT
3932  * X X : normal commit, wakeup LFT and wait
3933  * X O : group commit, wait
3934  * O X : async commit, wakeup LFT and return
3935  * O O : async & group commit, just return
3936  */
3937 void
3938 logpb_flush_pages (THREAD_ENTRY * thread_p, LOG_LSA * flush_lsa)
3939 {
3940 #if !defined(SERVER_MODE)
3941  LOG_CS_ENTER (thread_p);
3942  logpb_flush_pages_direct (thread_p);
3943  LOG_CS_EXIT (thread_p);
3944 #else /* SERVER_MODE */
3945  int rv;
3946  struct timeval start_time = { 0, 0 };
3947  struct timeval tmp_timeval = { 0, 0 };
3948  struct timespec to = { 0, 0 };
3949  int max_wait_time_in_msec = 1000;
3950  bool need_wakeup_LFT, need_wait;
3951  bool async_commit, group_commit;
3952  LOG_LSA nxio_lsa;
3953  LOG_GROUP_COMMIT_INFO *group_commit_info = &log_Gl.group_commit_info;
3954 
3955  assert (flush_lsa != NULL && !LSA_ISNULL (flush_lsa));
3956 
3957  if (!BO_IS_SERVER_RESTARTED () || flush_lsa == NULL || LSA_ISNULL (flush_lsa))
3958  {
3959  LOG_CS_ENTER (thread_p);
3960  logpb_flush_pages_direct (thread_p);
3961  LOG_CS_EXIT (thread_p);
3962 
3963  return;
3964  }
3965  assert (!LOG_CS_OWN_WRITE_MODE (thread_p));
3966 
3968  {
3969  LOG_CS_ENTER (thread_p);
3970  logpb_flush_pages_direct (thread_p);
3971  LOG_CS_EXIT (thread_p);
3972 
3973  return;
3974  }
3975 
3977  group_commit = LOG_IS_GROUP_COMMIT_ACTIVE ();
3978 
3979  if (async_commit == false)
3980  {
3981  need_wait = true;
3982  if (group_commit == false)
3983  {
3984  /* Default case: synchorous & non-group commit */
3985  need_wakeup_LFT = true;
3986  }
3987  else
3988  {
3989  /* synchronous & group commit */
3990  need_wakeup_LFT = false;
3991  log_Stat.gc_commit_request_count++;
3992  }
3993  }
3994  else
3995  {
3996  need_wait = false;
3997  log_Stat.async_commit_request_count++;
3998 
3999  if (group_commit == false)
4000  {
4001  /* asynchorous & non-group commit */
4002  need_wakeup_LFT = true;
4003  }
4004  else
4005  {
4006  /* asynchorous & group commit */
4007  need_wakeup_LFT = false;
4008  log_Stat.gc_commit_request_count++;
4009  }
4010  }
4011 
4012  if (need_wakeup_LFT == true && need_wait == false)
4013  {
4015  }
4016  else if (need_wait == true)
4017  {
4018  nxio_lsa = log_Gl.append.get_nxio_lsa ();
4019 
4020  if (need_wakeup_LFT == false && pgbuf_has_perm_pages_fixed (thread_p))
4021  {
4022  need_wakeup_LFT = true;
4023  }
4024 
4025  while (LSA_LT (&nxio_lsa, flush_lsa))
4026  {
4027  gettimeofday (&start_time, NULL);
4028  (void) timeval_add_msec (&tmp_timeval, &start_time, max_wait_time_in_msec);
4029  (void) timeval_to_timespec (&to, &tmp_timeval);
4030 
4031  rv = pthread_mutex_lock (&group_commit_info->gc_mutex);
4032  nxio_lsa = log_Gl.append.get_nxio_lsa ();
4033  if (LSA_GE (&nxio_lsa, flush_lsa))
4034  {
4035  pthread_mutex_unlock (&group_commit_info->gc_mutex);
4036  break;
4037  }
4038 
4039  if (need_wakeup_LFT == true)
4040  {
4042  }
4043  (void) pthread_cond_timedwait (&group_commit_info->gc_cond, &group_commit_info->gc_mutex, &to);
4044  pthread_mutex_unlock (&group_commit_info->gc_mutex);
4045 
4046  need_wakeup_LFT = true;
4047  nxio_lsa = log_Gl.append.get_nxio_lsa ();
4048  }
4049  }
4050 #endif /* SERVER_MODE */
4051 }
4052 
4053 void
4055 {
4056  LOG_CS_ENTER (thread_p);
4057  logpb_flush_pages_direct (thread_p);
4058  LOG_CS_EXIT (thread_p);
4059 }
4060 
4061 void
4063 {
4064  LOG_CS_ENTER (thread_p);
4065  logpb_flush_pages_direct (thread_p);
4066  logpb_flush_header (thread_p);
4067  LOG_CS_EXIT (thread_p);
4068 }
4069 
4070 /*
4071  * logpb_invalid_all_append_pages - Invalidate all append pages
4072  *
4073  * return: nothing
4074  *
4075  * NOTE:Invalidate and free all append pages. Before invalidating the
4076  * pages if their are dirty, they are flushed.
4077  */
4078 void
4080 {
4081  LOG_FLUSH_INFO *flush_info = &log_Gl.flush_info;
4082 #if defined(SERVER_MODE)
4083  int rv;
4084 #endif /* SERVER_MODE */
4085 
4086  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
4087 
4088  logpb_log ("called logpb_invalid_all_append_pages\n");
4089 
4090  if (log_Gl.append.log_pgptr != NULL)
4091  {
4092  /*
4093  * Somehow we already have an append page, flush all current append page
4094  * and start form scratch
4095  */
4096  logpb_flush_pages_direct (thread_p);
4098  }
4099 
4100  rv = pthread_mutex_lock (&flush_info->flush_mutex);
4101 
4102  flush_info->num_toflush = 0;
4103  flush_info->toflush[flush_info->num_toflush] = NULL;
4104 
4105  pthread_mutex_unlock (&flush_info->flush_mutex);
4106 }
4107 
4108 /*
4109  * logpb_flush_log_for_wal - Flush log if needed
4110  *
4111  * return: nothing
4112  *
4113  * lsa_ptr(in): Force all log records up to this lsa
4114  *
4115  * NOTE:Flush the log up to given log sequence address according to the WAL rule.
4116  * The page buffer manager must call this function whenever a
4117  * page is about to be flushed due to a page replacement.
4118  */
4119 void
4120 logpb_flush_log_for_wal (THREAD_ENTRY * thread_p, const LOG_LSA * lsa_ptr)
4121 {
4122  if (logpb_need_wal (lsa_ptr))
4123  {
4125 
4126  LOG_CS_ENTER (thread_p);
4127  if (logpb_need_wal (lsa_ptr))
4128  {
4129  logpb_flush_pages_direct (thread_p);
4130  }
4131  else
4132  {
4133  /* was flushed in the meantime */
4134  }
4135  LOG_CS_EXIT (thread_p);
4136 
4137  assert (LSA_ISNULL (lsa_ptr) || !logpb_need_wal (lsa_ptr));
4138 
4139 #if defined(CUBRID_DEBUG)
4140  if (logpb_need_wal (lsa_ptr) && !LSA_EQ (&log_Gl.rcv_phase_lsa, lsa_ptr))
4141  {
4142  er_log_debug (ARG_FILE_LINE, "log_wal: SYSTEM ERROR.. DUMP LOG BUFFER\n");
4143  logpb_dump (thread_p, stdout);
4144  }
4145 #endif /* CUBRID_DEBUG */
4146  }
4147 }
4148 
4149 /*
4150  *
4151  * FUNCTIONS RELATED TO DATA APPEND
4152  *
4153  */
4154 
4155 /*
4156  * logpb_start_append - Start appending a new log record
4157  *
4158  * return: nothing
4159  *
4160  * header(in):
4161  *
4162  * NOTE:
4163  */
4164 static void
4166 {
4167  LOG_RECORD_HEADER *log_rec; /* Log record */
4168 
4169  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
4170 
4171  /* Record number of append log record in statistics */
4173 
4174  /* Does the new log record fit in this page ? */
4176 
4177  if (!LSA_EQ (&header->back_lsa, &log_Gl.append.prev_lsa))
4178  {
4179  logpb_fatal_error (thread_p, true, ARG_FILE_LINE, "logpb_start_append");
4180  }
4181 
4183 
4185  {
4187  {
4189  logpb_set_tde_algorithm (thread_p, log_Gl.append.log_pgptr, tde_algo);
4190  logpb_set_dirty (thread_p, log_Gl.append.log_pgptr);
4191  logpb_log ("logpb_start_append: set tde_algorithm to existing page (%lld), "
4192  "tde_algorithm = %s\n", (long long int) log_Gl.append.log_pgptr->hdr.logical_pageid,
4193  tde_get_algorithm_name (tde_algo));
4194  }
4195  else
4196  {
4197  logpb_log ("logpb_start_append: tde_algorithm already set to existing page (%lld), "
4198  "tde_algorithm = %s\n", (long long int) log_Gl.append.log_pgptr->hdr.logical_pageid,
4200  }
4201  }
4202 
4203  log_rec = (LOG_RECORD_HEADER *) LOG_APPEND_PTR ();
4204  *log_rec = *header;
4205 
4206  /*
4207  * If the header of the append page does not have the offset set to the
4208  * first log record, this is the first log record in the page, set to it.
4209  */
4210 
4212  {
4214  }
4215 
4216  if (log_rec->type == LOG_END_OF_LOG)
4217  {
4218  /* this comes from logpb_flush_all_append_pages */
4221 
4223 
4224  logpb_set_dirty (thread_p, log_Gl.append.log_pgptr);
4225  }
4226  else
4227  {
4228  /* no record should be in progress now */
4230 
4232 
4233  /*
4234  * Set the page dirty, increase and align the append offset
4235  */
4236  LOG_APPEND_SETDIRTY_ADD_ALIGN (thread_p, sizeof (LOG_RECORD_HEADER));
4237 
4239  }
4240 }
4241 
4242 /*
4243  * logpb_append_data - Append data
4244  *
4245  * return: nothing
4246  *
4247  * length(in): Length of data to append
4248  * data(in): Data to append
4249  *
4250  * NOTE:Append data as part of current log record.
4251  */
4252 static void
4253 logpb_append_data (THREAD_ENTRY * thread_p, int length, const char *data)
4254 {
4255  int copy_length; /* Amount of contiguos data that can be copied */
4256  char *ptr; /* Pointer for copy data into log append buffer */
4257  char *last_ptr; /* Pointer to last portion available to copy into log append buffer */
4258 
4259  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
4260 
4261  if (length == 0 || data == NULL)
4262  {
4263  return;
4264  }
4265 
4266  /*
4267  * Align if needed,
4268  * don't set it dirty since this function has not updated
4269  */
4271 
4272  ptr = LOG_APPEND_PTR ();
4273  last_ptr = LOG_LAST_APPEND_PTR ();
4274 
4275  /* Does data fit completely in current page ? */
4276  if ((ptr + length) >= last_ptr)
4277  {
4278  while (length > 0)
4279  {
4280  if (ptr >= last_ptr)
4281  {
4282  /*
4283  * Get next page and set the current one dirty
4284  */
4286  ptr = LOG_APPEND_PTR ();
4287  last_ptr = LOG_LAST_APPEND_PTR ();
4288  }
4289  /* Find the amount of contiguous data that can be copied */
4290  if (ptr + length >= last_ptr)
4291  {
4292  copy_length = CAST_BUFLEN (last_ptr - ptr);
4293  }
4294  else
4295  {
4296  copy_length = length;
4297  }
4298  memcpy (ptr, data, copy_length);
4299  ptr += copy_length;
4300  data += copy_length;
4301  length -= copy_length;
4302  log_Gl.hdr.append_lsa.offset += copy_length;
4303  }
4304  }
4305  else
4306  {
4307  memcpy (ptr, data, length);
4308  log_Gl.hdr.append_lsa.offset += length;
4309  }
4310 
4311  /*
4312  * Align the data for future appends.
4313  * Indicate that modifications were done
4314  */
4315  LOG_APPEND_ALIGN (thread_p, LOG_SET_DIRTY);
4316 }
4317 
4318 /*
4319  * logpb_append_crumbs - Append crumbs of data
4320  *
4321  * return: nothing
4322  *
4323  * num_crumbs(in): Number of crumbs
4324  * crumbs(in): The crumbs (length + data)
4325  *
4326  * NOTE: Append crumbs of data by gluing them. After this the log manager will lose track of what was glued.
4327  */
4328 static void
4329 logpb_append_crumbs (THREAD_ENTRY * thread_p, int num_crumbs, const LOG_CRUMB * crumbs)
4330 {
4331  const char *data; /* Data to copy */
4332  char *ptr; /* Pointer for copy data into log append buffer */
4333  char *last_ptr; /* Pointer to last portion available to copy into log append buffer */
4334  int copy_length; /* Amount of contiguos data that can be copied */
4335  int length;
4336  int i;
4337 
4338  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
4339 
4340  if (num_crumbs == 0)
4341  {
4342  return;
4343  }
4344 
4345  /*
4346  * Align if needed,
4347  * don't set it dirty since this function has not updated
4348  */
4350 
4351  ptr = LOG_APPEND_PTR ();
4352  last_ptr = LOG_LAST_APPEND_PTR ();
4353 
4354  for (i = 0; i < num_crumbs; i++)
4355  {
4356  length = crumbs[i].length;
4357  data = (char *) crumbs[i].data;
4358 
4359  /* Does data fit completely in current page ? */
4360  if ((ptr + length) >= last_ptr)
4361  while (length > 0)
4362  {
4363  if (ptr >= last_ptr)
4364  {
4365  /*
4366  * Get next page and set the current one dirty
4367  */
4369  ptr = LOG_APPEND_PTR ();
4370  last_ptr = LOG_LAST_APPEND_PTR ();
4371  }
4372  /* Find the amount of contiguous data that can be copied */
4373  if ((ptr + length) >= last_ptr)
4374  {
4375  copy_length = CAST_BUFLEN (last_ptr - ptr);
4376  }
4377  else
4378  {
4379  copy_length = length;
4380  }
4381  memcpy (ptr, data, copy_length);
4382  ptr += copy_length;
4383  data += copy_length;
4384  length -= copy_length;
4385  log_Gl.hdr.append_lsa.offset += copy_length;
4386  }
4387  else
4388  {
4389  memcpy (ptr, data, length);
4390  ptr += length;
4391  log_Gl.hdr.append_lsa.offset += length;
4392  }
4393  }
4394 
4395  /*
4396  * Align the data for future appends.
4397  * Indicate that modifications were done
4398  */
4399  LOG_APPEND_ALIGN (thread_p, LOG_SET_DIRTY);
4400 }
4401 
4402 /*
4403  * logpb_end_append - Finish appending a log record
4404  *
4405  * return: nothing
4406  *
4407  * flush(in): Is it a requirement to flush the log ?
4408  * force_flush(in):
4409  *
4410  * NOTE: Finish appending a log record. If the log record was appended
4411  * in several log buffers, these buffers are flushed and freed.
4412  * Only one append buffer will remain pin (fetched) in memory.
4413  * If the log record was appended in only one buffer, the buffer
4414  * is not flushed unless the caller requested flushing (e.g.,
4415  * for a log_commit record).
4416  */
4417 static void
4419 {
4420  assert (LOG_CS_OWN_WRITE_MODE (thread_p));
4421 
4424 
4425  /*
4426  * Find the log_rec portion of the append record, it may not be in the
4427  * current append buffer since it can be stored in several buffers. Then,
4428  * make the log_rec point to next future append record, unless it is
4429  * the special record type used for archives created during backups
4430  * that cannot have a forward lsa and must waste the remaining space
4431  * on the current page.
4432  */
4433  assert (LSA_EQ (&header->forw_lsa, &log_Gl.hdr.append_lsa));
4434 
4436  {
4437  logpb_set_dirty (thread_p, log_Gl.append.log_pgptr);
4438  }
4439 
4441  {
4442  /* success, fall through */
4443  }
4445  {
4446  /* we need to flush the correct version now */
4448  logpb_flush_all_append_pages (thread_p);
4450  }
4451  else
4452  {
4453  /* invalid state */
4454  assert_release (false);
4455  }
4457 }
4458 
4459 /*
4460  *
4461  * FUNCTIONS RELATED TO LOG INFORMATION FILE
4462  *
4463  */
4464 
4465 /*
4466  * logpb_create_log_info - Create a log information file
4467  *
4468  * return: nothing
4469  *
4470  * logname_info(in): Name of the log information file
4471  * db_fullname(in): Name of the database or NULL (defualt to current one)
4472  *
4473  * NOTE: Creates a log information file. This file is used as a help
4474  * for the DBA of what things has been archived and what archive
4475  * logs are not needed during normal restart recovery (i.e.,
4476  * other than media crash).
4477  */
4478 void
4479 logpb_create_log_info (const char *logname_info, const char *db_fullname)
4480 {
4481  FILE *fp; /* Pointer to file */
4482  const char *catmsg;
4483  const char *db_name = db_fullname;
4484  int error_code = NO_ERROR;
4485 
4486  /* Create the information file */
4487  fp = fopen (logname_info, "w");
4488  if (fp != NULL)
4489  {
4490  fclose (fp);
4492  if (db_name == NULL)
4493  {
4494  db_name = log_Db_fullname;
4495  }
4496  if (catmsg == NULL)
4497  {
4498  catmsg = "COMMENT: %s for database %s\n";
4499  }
4500  error_code = log_dump_log_info (logname_info, false, catmsg, CUBRID_MAGIC_LOG_INFO, db_name);
4501  if (error_code != NO_ERROR)
4502  {
4503  return;
4504  }
4505 
4506  (void) logpb_add_volume (db_fullname, LOG_DBLOG_INFO_VOLID, logname_info, DISK_UNKNOWN_PURPOSE);
4507  }
4508 }
4509 
4510 
4511 /*
4512  * logpb_get_guess_archive_num - Guess archive number
4513  *
4514  * return: arvnum or -1
4515  *
4516  * pageid(in): Desired page
4517  *
4518  * NOTE: Guess the archive number where the desired page is archived by searching the log information file.
4519  */
4520 static int
4522 {
4523  FILE *fp;
4524  char line[LOG_MAX_LOGINFO_LINE];
4525  int arv_num = -1;
4526  int last_arvnum = -1;
4527  int next_arvnum;
4528  bool isfound = false;
4529  LOG_PAGEID from_pageid;
4530  LOG_PAGEID to_pageid;
4531  long long int f, t;
4532 
4533  assert (LOG_CS_OWN (thread_p));
4534 
4535  arv_num = logpb_get_archive_num_from_info_table (thread_p, pageid);
4536 
4537  if (arv_num >= 0)
4538  {
4539  return arv_num;
4540  }
4541 
4542  /*
4543  * Guess by looking into the log information file. This is just a guess
4544  */
4545  fp = fopen (log_Name_info, "r");
4546  if (fp != NULL)
4547  {
4548  while (fgets (line, LOG_MAX_LOGINFO_LINE, fp) != NULL)
4549  {
4550  if (strstr (line + TIME_SIZE_OF_DUMP_LOG_INFO,
4552  == line + TIME_SIZE_OF_DUMP_LOG_INFO)
4553  {
4554  /* A candidate for a guess */
4555  if (sscanf (line + TIME_SIZE_OF_DUMP_LOG_INFO, "%*s %d %*s %lld %lld", &next_arvnum, &f, &t) == 3)
4556  {
4557  from_pageid = f;
4558  to_pageid = t;
4559 
4560  last_arvnum = next_arvnum;
4561 
4562  if (pageid < from_pageid)
4563  {
4564  /*
4565  * keep looking.
4566  * There is likely a hole in the archive process due to media
4567  * crashes off or the log information contains some missing
4568  * entries.
4569  */
4570  continue;
4571  }
4572 
4573  arv_num = next_arvnum;
4574 
4575  if (pageid >= from_pageid && pageid <= to_pageid)
4576  {
4577  /* Found the page in this archive */
4578  isfound = true;
4579  break;
4580  }
4581  }
4582  }
4583  }
4584  fclose (fp);
4585  }
4586 
4587  if (arv_num == -1)
4588  {
4589  /*
4590  * If I have a log active, use it to find out a better archive number
4591  * for initial search
4592  */
4593  if (log_Gl.append.vdes != NULL_VOLDES)
4594  {
4595  arv_num = (int) (pageid / LOGPB_ACTIVE_NPAGES);
4596  }
4597  else
4598  {
4599  /*
4600  * We do not have a clue what it is available. Don't have log active
4601  * and likely we did not have backups.
4602  * Must trace for available archive volumes
4603  */
4604  arv_num = 0;
4605  }
4606  }
4607  else if (isfound == false && last_arvnum == arv_num && log_Gl.append.vdes != NULL_VOLDES)
4608  {
4609  /*
4610  * The log archive was chopped somehow.
4611  */
4612  arv_num = log_Gl.hdr.nxarv_num - 1;
4613  }
4614 
4615  /* Insure that we never pick one larger than the next one to be created */
4616  if (arv_num >= log_Gl.hdr.nxarv_num)
4617  {
4618  arv_num = log_Gl.hdr.nxarv_num - 1;
4619  }
4620 
4621  return arv_num;
4622 }
4623 
4624 /*
4625  * logpb_find_volume_info_exist - Find if volume information exists ?
4626  *
4627  * return:
4628  *
4629  * NOTE: Find if volume information exist.
4630  */
4631 bool
4633 {
4635 }
4636 
4637 /*
4638  * logpb_create_volume_info - Create the volume information and add first volume
4639  *
4640  * return: NO_ERROR or error code
4641  *
4642  * db_fullname(in): Name of the database or NULL (defualt to current one)
4643  *
4644  * NOTE: Create the volume information and add the first volume.
4645  */
4646 int
4647 logpb_create_volume_info (const char *db_fullname)
4648 {
4649  char vol_fullname[PATH_MAX];
4650  char *volinfo_fullname;
4651  FILE *volinfo_fp = NULL;
4652 
4653  if (db_fullname != NULL)
4654  {
4655  fileio_make_volume_info_name (vol_fullname, db_fullname);
4656  volinfo_fullname = vol_fullname;
4657  }
4658  else
4659  {
4660  volinfo_fullname = log_Name_volinfo;
4661  }
4662 
4663  volinfo_fp = fopen (volinfo_fullname, "w");
4664  if (volinfo_fp == NULL)
4665  {
4666  /* Unable to create the database volume information */
4667  er_set_with_oserror (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_BO_CANNOT_CREATE_VOL, 2, volinfo_fullname, db_fullname);
4668  return ER_BO_CANNOT_CREATE_VOL;
4669  }
4670  /*
4671  * Write information about:
4672  * the active log and the first volume of the database
4673  * in the volume information file
4674  */
4675  fprintf (volinfo_fp, "%4d %s\n", LOG_DBVOLINFO_VOLID, volinfo_fullname);
4676 
4677  fflush (volinfo_fp);
4678  fclose (volinfo_fp);
4679 
4680  return NO_ERROR;
4681 }
4682 
4683 /*
4684  * logpb_recreate_volume_info - Recreate the database volume information
4685  *
4686  * return: NO_ERROR if all OK, ER_ status otherwise
4687  *
4688  * NOTE: Recreate the database volume information from the internal information that is stored in each volume.
4689  */
4690 int
4692 {
4693  VOLID volid = LOG_DBFIRST_VOLID; /* Current volume identifier */
4694  VOLID next_volid = LOG_DBFIRST_VOLID; /* Next volume identifier */
4695  char next_vol_fullname[PATH_MAX]; /* Next volume name */
4696  int error_code = NO_ERROR;
4697 
4698  error_code = logpb_create_volume_info (NULL);
4699  if (error_code != NO_ERROR)
4700  {
4701  goto error;
4702  }
4704  {
4705  error_code = ER_FAILED;
4706  goto error;
4707  }
4710  {
4711  error_code = ER_FAILED;
4712  goto error;
4713  }
4715  {
4716  error_code = ER_FAILED;
4717  goto error;
4718  }
4719 
4720  /* First the primary volume, then the rest of the volumes */
4721 
4722  strcpy (next_vol_fullname, log_Db_fullname);
4723 
4724  do
4725  {
4726  if (logpb_add_volume (NULL, volid, next_vol_fullname, DB_PERMANENT_DATA_PURPOSE) != volid)
4727  {
4728  error_code = ER_FAILED;
4729  goto error;
4730  }
4731 
4732  if (disk_get_link (thread_p, volid, &next_volid, next_vol_fullname) == NULL)
4733  {
4734  error_code = ER_FAILED;
4735  goto error;
4736  }
4737 
4738  volid = next_volid;
4739  }
4740  while (volid != NULL_VOLID);
4741 
4742  return error_code;
4743 
4744  /* ****** */
4745 error:
4746  (void) remove (log_Name_volinfo);
4747  return error_code;
4748 }
4749 
4750 /*
4751  * logpb_add_volume - Add a new volume entry to the volume information
4752  *
4753  * return: new_volid or NULL_VOLID
4754  *
4755  * db_fullname(in):
4756  * new_volid(in): New volume identifier
4757  * new_volfullname(in): New volume name
4758  * new_volpurpose(in): Purpose of new volume
4759  *
4760  * NOTE: Add a new entry to the volume information
4761  */
4762 /* todo: remove purpose */
4763 VOLID
4764 logpb_add_volume (const char *db_fullname, VOLID new_volid, const char *new_volfullname, DISK_VOLPURPOSE new_volpurpose)
4765 {
4766  if (new_volpurpose != DB_TEMPORARY_DATA_PURPOSE)
4767  {
4768  char vol_fullname[PATH_MAX];
4769  char *volinfo_fullname;
4770  FILE *volinfo_fp = NULL;
4771 
4772  if (db_fullname != NULL)
4773  {
4774  fileio_make_volume_info_name (vol_fullname, db_fullname);
4775  volinfo_fullname = vol_fullname;
4776  }
4777  else
4778  {
4779  volinfo_fullname = log_Name_volinfo;
4780  }
4781 
4782  volinfo_fp = fopen (volinfo_fullname, "a");
4783  if (volinfo_fp != NULL)
4784  {
4785  /* Write information about this volume in the volume information file */
4786  fprintf (volinfo_fp, "%4d %s\n", new_volid, new_volfullname);
4787  fflush (volinfo_fp);
4788  fclose (volinfo_fp);
4789 
4790  return new_volid;
4791  }
4792  else
4793  {
4794  return NULL_VOLID;
4795  }
4796  }
4797 
4798  return new_volid;
4799 }
4800 
4801 /*
4802  * logpb_scan_volume_info - Scan the volume information entries
4803  *
4804  * return: number of entries or -1 in case of error.
4805  *
4806  * db_fullname(in):
4807  * ignore_volid(in): Don't call function with this volume
4808  * start_volid(in): Scan should start at this point.
4809  * fun(in): Function to be called on each entry
4810  * args(in): Additional arguments to be passed to function
4811  *
4812  * NOTE: Scan the volume information entries calling the given function on each entry.
4813  */
4814 int
4815 logpb_scan_volume_info (THREAD_ENTRY * thread_p, const char *db_fullname, VOLID ignore_volid, VOLID start_volid,
4816  int (*fun) (THREAD_ENTRY * thread_p, VOLID xvolid, const char *vlabel, void *args), void *args)
4817 {
4818  char xxvolinfo_fullname[PATH_MAX];
4819  char *volinfo_fullname;
4820  FILE *volinfo_fp = NULL; /* Pointer to new volinfo */
4821  char vol_fullname[PATH_MAX]; /* Next volume name */
4822  VOLID volid = LOG_DBFIRST_VOLID - 1; /* Next volume identifier */
4823  int read_int_volid;
4824  VOLID num_vols = 0;
4825  bool start_scan = false;
4826  char format_string[64];
4827 
4828  if (db_fullname != NULL)
4829  {
4830  fileio_make_volume_info_name (xxvolinfo_fullname, db_fullname);
4831  volinfo_fullname = xxvolinfo_fullname;
4832  }
4833  else
4834  {
4835  volinfo_fullname = log_Name_volinfo;
4836  }
4837 
4838  volinfo_fp = fopen (volinfo_fullname, "r");
4839  if (volinfo_fp == NULL)
4840  {
4842  return -1;
4843  }
4844 
4845  sprintf (format_string, "%%d %%%ds", PATH_MAX - 1);
4846  while (true)
4847  {
4848  if (fscanf (volinfo_fp, format_string, &read_int_volid, vol_fullname) != 2)
4849  {
4850  break;
4851  }
4852 
4853  if ((volid + 1) != NULL_VOLID && (volid + 1) > (VOLID) read_int_volid && num_vols != 0)
4854  {
4855  er_set (ER_WARNING_SEVERITY, ARG_FILE_LINE, ER_BO_UNSORTED_VOLINFO, 4, volinfo_fullname, num_vols,
4856  read_int_volid, vol_fullname);
4857  num_vols = -1;
4858  break;
4859  }
4860  volid = (VOLID) read_int_volid;
4861 
4862  if (volid == NULL_VOLID)
4863  {
4864  continue;
4865  }
4866 
4867  if (start_scan == false)
4868  {
4869  if (start_volid == read_int_volid)
4870  {
4871  start_scan = true;
4872  }
4873  else
4874  {
4875  continue;
4876  }
4877  }
4878 
4879  if (volid != ignore_volid)
4880  {
4881  if (((*fun) (thread_p, volid, vol_fullname, args)) != NO_ERROR)
4882  {
4883  num_vols = -1;
4884  break;
4885  }
4886 
4887  num_vols++;
4888  }
4889  }
4890 
4891  fclose (volinfo_fp);
4892 
4893  return num_vols;
4894 }
4895 
4896 /*
4897  *
4898  * FUNCTIONS RELATED TO LOG ARCHIVES
4899  *
4900  */
4901 
4902 /*
4903  * logpb_to_physical_pageid - Find physical page identifier of given logic page
4904  *
4905  * return: phy page identifier
4906  *
4907  * logical_pageid(in): logical_pageid: Logical log page
4908  *
4909  * NOTE: Returns the physical page identifier associated with given logical page.
4910  */
4913 {
4915 
4916  if (logical_pageid == LOGPB_HEADER_PAGE_ID)
4917  {
4918  phy_pageid = LOGPB_PHYSICAL_HEADER_PAGE_ID;
4919  }
4920  else
4921  {
4922  LOG_PAGEID tmp_pageid;
4923 
4924  tmp_pageid = logical_pageid - LOGPB_FIRST_ACTIVE_PAGE_ID;
4925  if (tmp_pageid >= LOGPB_ACTIVE_NPAGES)
4926  {
4927  tmp_pageid %= LOGPB_ACTIVE_NPAGES;
4928  }
4929  else if (tmp_pageid < 0)
4930  {
4931  tmp_pageid = LOGPB_ACTIVE_NPAGES - ((-tmp_pageid) % LOGPB_ACTIVE_NPAGES);
4932  }
4933 
4934  tmp_pageid++;
4935  if (tmp_pageid > LOGPB_ACTIVE_NPAGES)
4936  {
4937  tmp_pageid %= LOGPB_ACTIVE_NPAGES;
4938  }
4939 
4940  assert (tmp_pageid <= PAGEID_MAX);
4941  phy_pageid = (LOG_PHY_PAGEID) tmp_pageid;
4942  }
4943 
4944  return phy_pageid;
4945 }
4946 
4947 /*
4948  * logpb_is_page_in_archive - Is the given page an archive page ?
4949  *
4950  * return:
4951  *
4952  * pageid(in): Log page identifier
4953  *
4954  * NOTE:Find if given page is an archive page identifier.
4955  */
4956 bool
4958 {
4959  return LOGPB_IS_ARCHIVE_PAGE (pageid);
4960 }
4961 
4962 /*
4963  * logpb_is_smallest_lsa_in_archive - IS THE SMALLEST ACTIVE OF THE LOG ARCHIVE ?
4964  *
4965  * return:
4966  *
4967  * NOTE: Returns true if the smallest active LSA is located in an archive log.
4968  */
4969 bool
4971 {
4972  LOG_LSA lsa; /* smallest lsa */
4973 
4974  logtb_find_smallest_lsa (thread_p, &lsa);
4975  return (!LSA_ISNULL (&lsa) && logpb_is_page_in_archive (lsa.pageid));
4976 }
4977 
4978 /*
4979  * logpb_get_archive_number - Archive location of given page
4980  *
4981  * return: archive number
4982  *
4983  * pageid(in): The desired logical page
4984  *
4985  * NOTE: Find in what archive the page is located or in what archive the page should have been located.
4986  */
4987 int
4989 {
4990  int arv_num = 0;
4991 
4992  if (logpb_fetch_from_archive (thread_p, pageid, NULL, &arv_num, NULL, false) == NULL)
4993  {
4994  return -1;
4995  }
4996 
4997  if (arv_num < 0)
4998  {
4999  arv_num = 0;
5000  }
5001 
5002  return arv_num;
5003 }
5004 
5005 /*
5006  * logpb_set_unavailable_archive - Cache that given archive is unavailable
5007  *
5008  * return: nothing
5009  *
5010  * arv_num(in): Log archive number
5011  *
5012  * NOTE: Record that give archive is unavialble.
5013  */
5014 static void
5016 {
5017  int *ptr;
5018  int size;
5019 
5021 
5023  {
5024  size = sizeof (*log_Gl.archive.unav_archives) * 10;
5025  ptr = (int *) malloc (size);
5026  if (ptr == NULL)
5027  {
5028  return;
5029  }
5030  log_Gl.archive.max_unav = 10;
5031  log_Gl.archive.next_unav = 0;
5033  }
5034  else
5035  {
5037  {
5038  size = (sizeof (*log_Gl.archive.unav_archives) * (log_Gl.archive.max_unav + 10));
5039  ptr = (int *) realloc (log_Gl.archive.unav_archives, size);
5040  if (ptr == NULL)
5041  {
5042  return;
5043  }
5044  log_Gl.archive.max_unav += 10;
5046  }
5047  }
5048 
5050 }
5051 
5052 /*
5053  * logpb_dismount_log_archive - dismount archive log
5054  *
5055  * return: nothing
5056  *
5057  * It dismounts and resets log_Gl.archive.vdes
5058  */
5059 static void
5061 {
5062  LOG_ARCHIVE_CS_ENTER (thread_p);
5063 
5064  if (log_Gl.archive.vdes != NULL_VOLDES)
5065  {
5066  fileio_dismount (thread_p, log_Gl.archive.vdes);
5068  }
5069 
5070  LOG_ARCHIVE_CS_EXIT (thread_p);
5071 }
5072 
5073 /*
5074  * logpb_decache_archive_info - Decache any archive log memory information
5075  *
5076  * return: nothing
5077  *
5078  * NOTE: Decache any archive log memory information.
5079  */
5080 void
5082 {
5083  LOG_ARCHIVE_CS_ENTER (thread_p);
5084 
5085  if (log_Gl.archive.vdes != NULL_VOLDES)
5086  {
5087  logpb_dismount_log_archive (thread_p);
5088  }
5089 
5091  {
5093  log_Gl.archive.max_unav = 0;
5094  log_Gl.archive.next_unav = 0;
5095  }
5096 
5097  LOG_ARCHIVE_CS_EXIT (thread_p);
5098 }
5099 
5100 /*
5101  * log_isarchive_available - Is given archive available ?
5102  *
5103  * return: true/false
5104  * true: means that the archive may be available.
5105  * false: it is known that archive is not available.
5106  *
5107  * arv_num(in): Log archive number
5108  *
5109  * NOTE:Find if the current archive is available.
5110  */
5111 static bool
5112 logpb_is_archive_available (THREAD_ENTRY * thread_p, int arv_num)
5113 {
5114  int i;
5115 
5116  assert (LOG_CS_OWN (thread_p));
5118 
5119  if (arv_num >= log_Gl.hdr.nxarv_num || arv_num < 0)
5120  {
5121  return false;
5122  }
5123 
5125  {
5126  for (i = 0; i < log_Gl.archive.next_unav; i++)
5127  {
5128  if (log_Gl.archive.unav_archives[i] == arv_num)
5129  {
5130  return false;
5131  }
5132  }
5133  }
5134 
5135  return true;
5136 }
5137 
5138 /*
5139  * log_fetch_from_archive - Fetch a log page from the log archives
5140  *
5141  * return: log_pgptr or NULL (in case of error)
5142  *
5143  * pageid(in): The desired logical page
5144  * log_pgptr(in): Place to return the log page
5145  * arv_num(in): Set to archive number where page was found or where page
5146  * should have been found.
5147  *
5148  * NOTE: Fetch a log page from archive logs.
5149  */
5150 LOG_PAGE *
5152  int *ret_arv_num, LOG_ARV_HEADER * ret_arv_hdr, bool is_fatal)
5153 {
5154  char hdr_pgbuf[IO_MAX_PAGE_SIZE + MAX_ALIGNMENT], *aligned_hdr_pgbuf;
5155  char log_pgbuf[IO_MAX_PAGE_SIZE + MAX_ALIGNMENT], *aligned_log_pgbuf;
5156  LOG_ARV_HEADER *arv_hdr;
5157  LOG_PAGE *hdr_pgptr;
5159  char arv_name[PATH_MAX];
5160  const char *tmp_arv_name;
5161  int arv_num, vdes;
5162  int direction = 0, retry;
5163  bool has_guess_arvnum = false, first_time = true;
5164  int error_code = NO_ERROR;
5165  char format_string[64];
5166 
5167  assert (LOG_CS_OWN (thread_p));
5168 
5169  logpb_log ("called logpb_fetch_from_archive for pageid = %lld\n", (long long int) pageid);
5170 
5171  LOG_ARCHIVE_CS_ENTER (thread_p);
5172 
5173  aligned_hdr_pgbuf = PTR_ALIGN (hdr_pgbuf, MAX_ALIGNMENT);
5174  aligned_log_pgbuf = PTR_ALIGN (log_pgbuf, MAX_ALIGNMENT);
5175 
5176 #if !defined(NDEBUG)
5178  {
5179  fprintf (stdout, "\n **log_fetch_from_archive has been called on pageid = %lld ** \n", (long long int) pageid);
5180  fflush (stdout);
5181  }
5182 #endif
5183 
5184  hdr_pgptr = (LOG_PAGE *) aligned_hdr_pgbuf;
5185  if (log_pgptr == NULL)
5186  {
5187  log_pgptr = (LOG_PAGE *) aligned_log_pgbuf;
5188  }
5189  if (ret_arv_num == NULL)
5190  {
5191  ret_arv_num = &arv_num;
5192  }
5193 
5194  if (log_Gl.archive.vdes == NULL_VOLDES)
5195  {
5196  if (log_Gl.hdr.nxarv_num <= 0)
5197  {
5198  /* We do not have any archives */
5200 
5201  LOG_ARCHIVE_CS_EXIT (thread_p);
5202  return NULL;
5203  }
5204 
5205  /*
5206  * Guess the archive where that page is stored
5207  */
5208 
5209  has_guess_arvnum = true;
5210  *ret_arv_num = logpb_get_guess_archive_num (thread_p, pageid);
5211  fileio_make_log_archive_name (arv_name, log_Archive_path, log_Prefix, *ret_arv_num);
5212 
5213  error_code = ER_FAILED;
5214  if (logpb_is_archive_available (thread_p, *ret_arv_num) == true && fileio_is_volume_exist (arv_name) == true)
5215  {
5216  vdes = fileio_mount (thread_p, log_Db_fullname, arv_name, LOG_DBLOG_ARCHIVE_VOLID, false, false);
5217  if (vdes != NULL_VOLDES)
5218  {
5219  if (fileio_read (thread_p, vdes, hdr_pgptr, 0, LOG_PAGESIZE) == NULL)
5220  {
5221  fileio_dismount (thread_p, vdes);
5222  er_set (ER_FATAL_ERROR_SEVERITY, ARG_FILE_LINE, ER_LOG_READ, 3, 0LL, 0LL, arv_name);
5223 
5224  LOG_ARCHIVE_CS_EXIT (thread_p);
5225  return NULL;
5226  }
5227  error_code = NO_ERROR;
5228  arv_hdr = (LOG_ARV_HEADER *) hdr_pgptr->area;
5230  {
5231  if (difftime64 ((time_t) arv_hdr->db_creation, (time_t) log_Gl.hdr.db_creation) != 0)
5232  {
5233  /*
5234  * This volume does not belong to the database. For now, assume
5235  * that it is not only. Later, we will give this error to user
5236  */
5237  vdes = NULL_VOLDES;
5238  arv_hdr = NULL;
5239  }
5240  }
5241  }
5242  }
5243 
5244  if (error_code != NO_ERROR)
5245  {
5246  /*
5247  * The volume is not online. Ask for it later (below). But first try to
5248  * make the best guess for the archive number.
5249  */
5250  vdes = NULL_VOLDES;
5251  arv_hdr = NULL;
5252  }
5253  }
5254  else
5255  {
5256  vdes = log_Gl.archive.vdes;
5257  arv_hdr = &log_Gl.archive.hdr;
5258  *ret_arv_num = arv_hdr->arv_num;
5259  }
5260 
5261  sprintf (format_string, "%%%ds", PATH_MAX - 1);
5262 
5264  while (true)
5265  {
5266  /* Is the page in current archive log ? */
5267  if (arv_hdr != NULL && pageid >= arv_hdr->fpageid && pageid <= arv_hdr->fpageid + arv_hdr->npages - 1)
5268  {
5269  /* Find location of logical page in the archive log */
5270  phy_pageid = (LOG_PHY_PAGEID) (pageid - arv_hdr->fpageid + 1);
5271 
5272  /* Record number of reads in statistics */
5274 
5275  if (fileio_read (thread_p, vdes, log_pgptr, phy_pageid, LOG_PAGESIZE) == NULL)
5276  {
5277  /* Error reading archive page */
5278  tmp_arv_name = fileio_get_volume_label_by_fd (vdes, PEEK);
5279  fileio_dismount (thread_p, vdes);
5281  er_set (ER_FATAL_ERROR_SEVERITY, ARG_FILE_LINE, ER_LOG_READ, 3, pageid, phy_pageid, tmp_arv_name);
5282 
5283  LOG_ARCHIVE_CS_EXIT (thread_p);
5284  return NULL;
5285  }
5286 
5287  TDE_ALGORITHM tde_algo = logpb_get_tde_algorithm (log_pgptr);
5288  if (tde_algo != TDE_ALGORITHM_NONE)
5289  {
5290  if (tde_decrypt_log_page (log_pgptr, tde_algo, log_pgptr) != NO_ERROR)
5291  {
5292  ASSERT_ERROR ();
5293  LOG_ARCHIVE_CS_EXIT (thread_p);
5294  return NULL;
5295  }
5296  }
5297 
5298  /* Cast the archive information. May be used again */
5299  if (arv_hdr != &log_Gl.archive.hdr)
5300  {
5301  log_Gl.archive.hdr = *arv_hdr;
5302  }
5303  log_Gl.archive.vdes = vdes;
5304  break;
5305  }
5306  else
5307  {
5308  /* If any archive dismount it */
5309  if (vdes != NULL_VOLDES)
5310  {
5311  fileio_dismount (thread_p, vdes);
5312  vdes = NULL_VOLDES;
5313  }
5314 
5315  if (has_guess_arvnum == false)
5316  {
5317  has_guess_arvnum = true;
5318  retry = logpb_get_guess_archive_num (thread_p, pageid);
5319  if (retry != *ret_arv_num)
5320  {
5321  *ret_arv_num = retry;
5322  }
5323  }
5324  else
5325  {
5326  if (direction == 0)
5327  {
5328  /*
5329  * Define the direction by looking for desired page
5330  */
5331  if (arv_hdr != NULL)
5332  {
5333  if (pageid < arv_hdr->fpageid)
5334  {
5335  /* Try older archives */
5336  direction = -1;
5337  }
5338  else
5339  {
5340  /* Try newer archives */
5341  direction = 1;
5342  }
5343  }
5344  else
5345  {
5346  if (first_time != true)
5347  {
5348  if (log_Gl.append.vdes == NULL_VOLDES)
5349  {
5350  direction = 1;
5351  }
5352  else
5353  {
5354  /*
5355  * Start looking from the last archive.
5356  * Optimized for UNDO.. This is not so bad since this branch
5357  * will be reached only when the guess archive is not
5358  * available.
5359  */
5360  *ret_arv_num = log_Gl.hdr.nxarv_num;
5361  direction = -1;
5362  }
5363  }
5364  }
5365  }
5366 
5367  if (arv_hdr != NULL)
5368  {
5369  if (direction == -1)
5370  {
5371  /*
5372  * Try an older archive.
5373  * The page that I am looking MUST be smaller than the first
5374  * page in current archive
5375  */
5376  if (pageid < arv_hdr->fpageid)
5377  {
5378  *ret_arv_num -= 1;
5379  }
5380  else
5381  {
5382  *ret_arv_num = -1;
5383  }
5384  }
5385  else
5386  {
5387  /* Try a newer archive. The page that I am looking MUST be larger than the last page in current
5388  * archive */
5389  if (pageid > arv_hdr->fpageid + arv_hdr->npages - 1)
5390  {
5391  *ret_arv_num += 1;
5392  }
5393  else
5394  {
5395  *ret_arv_num = log_Gl.hdr.nxarv_num;
5396  }
5397  }
5398  }
5399  else
5400  {
5401  /*
5402  * The archive number is not increased the first time in the loop,
5403  * so we can ask for it when it is not available.
5404  */
5405  if (first_time != true)
5406  {
5407  /*
5408  * If we do not have the log active, we don't really know how to
5409  * continue, we could be looping forever.
5410  */
5411  if (log_Gl.append.vdes == NULL_VOLDES)
5412  {
5413  *ret_arv_num = -1;
5414  }
5415  else
5416  {
5417  *ret_arv_num = *ret_arv_num + direction;
5418  }
5419  }
5420  }
5421 
5422  first_time = false;
5423  if (*ret_arv_num < 0 || *ret_arv_num == log_Gl.hdr.nxarv_num)
5424  {
5425  /* Unable to find page in archive */
5426  if (log_Gl.append.vdes != NULL_VOLDES)
5427  {
5429  }
5430  else
5431  {
5432  /*
5433  * This is likely an incomplete recovery (restore).
5434  * We do not have the active log and we are looking for a log page
5435  */
5437  }
5438 
5439  LOG_ARCHIVE_CS_EXIT (thread_p);
5440 
5441  return NULL;
5442  }
5443  }
5444 
5445  if (logpb_is_archive_available (thread_p, *ret_arv_num) == false)
5446  {
5447  arv_hdr = NULL;
5448  continue;
5449  }
5450 
5451  fileio_make_log_archive_name (arv_name, log_Archive_path, log_Prefix, *ret_arv_num);
5452  retry = 3;
5453  while (retry != 0 && retry != 1
5454  && (vdes =
5455  fileio_mount (thread_p, log_Db_fullname, arv_name, LOG_DBLOG_ARCHIVE_VOLID, false,
5456  false)) == NULL_VOLDES)
5457  {
5458  char line_buf[PATH_MAX * 2];
5459  bool is_in_crash_recovery;
5460 
5461  is_in_crash_recovery = log_is_in_crash_recovery ();
5462 
5463  /*
5464  * The archive is not online.
5465  */
5466  if (is_in_crash_recovery == true)
5467  {
5468  fprintf (stdout, "%s\n", er_msg ());
5469  }
5470 
5471  retry_prompt:
5473  {
5475  if (retry == 1 && is_in_crash_recovery == true)
5476  {
5477  fprintf (stdout, "Continue without present archive. (Partial recovery).\n");
5478  }
<