CUBRID Engine  latest
master_heartbeat.h
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Search Solution Corporation
3  * Copyright 2016 CUBRID Corporation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 /*
20  * master_heartbeat.h - heartbeat module in cub_master
21  */
22 
23 #ifndef _MASTER_HEARTBEAT_H_
24 #define _MASTER_HEARTBEAT_H_
25 
26 #ident "$Id$"
27 
28 #include "heartbeat.h"
29 #include "log_lsa.hpp"
30 #include "master_util.h"
31 #include "porting.h"
32 #include "system_parameter.h"
33 
34 #if defined (LINUX)
35 #include <netinet/in.h>
36 #elif defined (WINDOWS)
37 #include <winsock2.h>
38 #endif
39 
40 #if defined(WINDOWS)
41 typedef int pid_t;
42 #endif
43 
44 /* ping result */
46 {
52 };
53 
54 #define HB_PING_UNKNOWN_STR "UNKNOWN"
55 #define HB_PING_SUCCESS_STR "SUCCESS"
56 #define HB_PING_USELESS_HOST_STR "SKIPPED"
57 #define HB_PING_SYS_ERR_STR "ERROR"
58 #define HB_PING_FAILURE_STR "FAILURE"
59 #define HB_PING_STR_SIZE (7)
60 
61 /* heartbeat cluster jobs */
63 {
73 };
74 
75 /* heartbeat resource jobs */
77 {
88 };
89 
90 /* heartbet resource process state
91  * When change this, must be change the SERVER_STATE.
92  * broker.c : enum SERVER_STATE */
94 {
106 };
107 #define HB_PSTATE_UNKNOWN_STR "unknown"
108 #define HB_PSTATE_DEAD_STR "dead"
109 #define HB_PSTATE_DEREGISTERED_STR "deregistered"
110 #define HB_PSTATE_STARTED_STR "started"
111 #define HB_PSTATE_NOT_REGISTERED_STR "not_registered"
112 #define HB_PSTATE_REGISTERED_STR "registered"
113 #define HB_PSTATE_REGISTERED_AND_STANDBY_STR "registered_and_standby"
114 #define HB_PSTATE_REGISTERED_AND_TO_BE_STANDBY_STR "registered_and_to_be_standby"
115 #define HB_PSTATE_REGISTERED_AND_ACTIVE_STR "registered_and_active"
116 #define HB_PSTATE_REGISTERED_AND_TO_BE_ACTIVE_STR "registered_and_to_be_active"
117 #define HB_PSTATE_STR_SZ (32)
118 
119 #define HB_REPLICA_PRIORITY 0x7FFF
120 
121 /* heartbeat node score bitmask */
122 #define HB_NODE_SCORE_MASTER 0x8000
123 #define HB_NODE_SCORE_TO_BE_MASTER 0xF000
124 #define HB_NODE_SCORE_SLAVE 0x0000
125 #define HB_NODE_SCORE_UNKNOWN 0x7FFF
126 
127 #define HB_BUFFER_SZ (4096)
128 #define HB_MAX_NUM_NODES (8)
129 #define HB_MAX_NUM_RESOURCE_PROC (16)
130 #define HB_MAX_PING_CHECK (3)
131 #define HB_MAX_WAIT_FOR_NEW_MASTER (60)
132 #define HB_MAX_CHANGEMODE_DIFF_TO_TERM (12)
133 #define HB_MAX_CHANGEMODE_DIFF_TO_KILL (24)
134 
135 /* various strings for er_set */
136 #define HB_RESULT_SUCCESS_STR "Success"
137 #define HB_RESULT_FAILURE_STR "Failure"
138 
139 #define HB_CMD_ACTIVATE_STR "activate"
140 #define HB_CMD_DEACTIVATE_STR "deactivate"
141 #define HB_CMD_DEREGISTER_STR "deregister"
142 #define HB_CMD_RELOAD_STR "reload"
143 #define HB_CMD_UTIL_START_STR "util process start"
144 
146 {
151 };
152 
154 {
158 };
159 
160 /* heartbeat validation result */
162 {
168 };
169 #define HB_VALID_NO_ERROR_STR "no_error"
170 #define HB_VALID_UNIDENTIFIED_NODE_STR "unidentified_node"
171 #define HB_VALID_GROUP_NAME_MISMATCH_STR "group_name_mismatch"
172 #define HB_VALID_IP_ADDR_MISMATCH_STR "ip_addr_mismatch"
173 #define HB_VALID_CANNOT_RESOLVE_HOST_STR "cannot_resolve_host_name"
174 
175 /* time related macro */
176 #define HB_GET_ELAPSED_TIME(end_time, start_time) \
177  ((double)(end_time.tv_sec - start_time.tv_sec) * 1000 + \
178  (end_time.tv_usec - start_time.tv_usec)/1000.0)
179 
180 #define HB_IS_INITIALIZED_TIME(arg_time) \
181  ((arg_time.tv_sec == 0 && arg_time.tv_usec == 0) ? 1 : 0)
182 
183 #define HB_PROC_RECOVERY_DELAY_TIME (30* 1000) /* milli-second */
184 
185 #define HB_UI_NODE_CLEANUP_TIME_IN_MSECS (3600 * 1000)
186 #define HB_UI_NODE_CACHE_TIME_IN_MSECS (60 * 1000)
187 #define HB_IPV4_STR_LEN (16)
188 
189 /* heartbeat list */
190 typedef struct hb_list HB_LIST;
191 struct hb_list
192 {
195 };
196 
197 
198 /* heartbeat node entries */
201 {
204 
205  char host_name[CUB_MAXHOSTNAMELEN];
206  unsigned short priority;
208  short score;
210 
211  struct timeval last_recv_hbtime; /* last received heartbeat time */
212 };
213 
214 /* heartbeat ping host entries */
217 {
220 
221  char host_name[CUB_MAXHOSTNAMELEN];
223 };
224 
225 /* heartbeat unidentifed host entries */
228 {
231 
232  char host_name[CUB_MAXHOSTNAMELEN];
233  char group_id[HB_MAX_GROUP_ID_LEN];
234  struct sockaddr_in saddr;
235  struct timeval last_recv_time;
236  int v_result;
237 };
238 
239 /* herartbeat cluster */
240 typedef struct hb_cluster HB_CLUSTER;
242 {
243  pthread_mutex_t lock;
244 
246 
248  char group_id[HB_MAX_GROUP_ID_LEN];
249  char host_name[CUB_MAXHOSTNAMELEN];
250 
253 
256 
257  bool shutdown;
261 
264 
267 };
268 
269 /* heartbeat processs entries */
271 {
274 
275  unsigned char state; /* process state */
276  unsigned char type; /* single/master-slave */
277 
278  int sfd;
279 
280  int pid;
281  char exec_path[HB_MAX_SZ_PROC_EXEC_PATH];
283 
284  struct timeval frtime; /* first registered time */
285  struct timeval rtime; /* registerd time */
286  struct timeval dtime; /* deregistered time */
287  struct timeval ktime; /* shutdown time */
288  struct timeval stime; /* start time */
289 
290  unsigned short changemode_rid;
291  unsigned short changemode_gap;
292 
296 
298 
299  bool being_shutdown; /* whether the proc is being shut down */
301 };
302 
303 /* heartbeat resources */
304 typedef struct hb_resource HB_RESOURCE;
306 {
307  pthread_mutex_t lock;
308 
309  HB_NODE_STATE_TYPE state; /* mode/state */
310 
313 
314  bool shutdown;
315 };
316 
317 /* heartbeat cluster job argument */
320 {
321  unsigned int ping_check_count;
322  unsigned int retries; /* job retries */
323 };
324 
325 /* heartbeat resource job argument */
328 {
329  int pid; /* process id */
330  int sfd; /* socket fd */
331 
332  char args[HB_MAX_SZ_PROC_ARGS]; /* args */
333 
334  unsigned int retries; /* job retries */
335  unsigned int max_retries; /* job max retries */
336 
337  struct timeval ftime; /* first job execution time */
338  struct timeval ltime; /* last job execution time */
339 };
340 
341 /* heartbeat job argument */
342 typedef union hb_job_arg HB_JOB_ARG;
344 {
347 };
348 
349 typedef void (*HB_JOB_FUNC) (HB_JOB_ARG *);
350 
351 /* timer job queue entries */
352 typedef struct hb_job_entry HB_JOB_ENTRY;
354 {
357 
358  unsigned int type;
359 
360  struct timeval expire;
361 
364 };
365 
366 /* timer job queue */
367 typedef struct hb_job HB_JOB;
368 struct hb_job
369 {
370  pthread_mutex_t lock;
371 
372  unsigned short num_jobs;
374 
376 
377  bool shutdown;
378 };
379 
380 extern HB_CLUSTER *hb_Cluster;
381 extern HB_RESOURCE *hb_Resource;
382 extern HB_JOB *cluster_Jobs;
383 extern HB_JOB *resource_Jobs;
384 
385 extern bool hb_Deactivate_immediately;
386 
387 extern int hb_master_init (void);
388 extern void hb_resource_shutdown_and_cleanup (void);
389 extern void hb_cluster_shutdown_and_cleanup (void);
390 
392 
393 extern void hb_get_node_info_string (char **str, bool verbose_yn);
394 extern void hb_get_process_info_string (char **str, bool verbose_yn);
395 extern void hb_get_ping_host_info_string (char **str);
396 extern void hb_get_admin_info_string (char **str);
397 extern void hb_kill_all_heartbeat_process (char **str);
398 
399 extern void hb_deregister_by_pid (pid_t pid);
400 extern void hb_deregister_by_args (char *args);
401 
402 extern void hb_reconfig_heartbeat (char **str);
403 extern int hb_prepare_deactivate_heartbeat (void);
404 extern int hb_deactivate_heartbeat (void);
405 extern int hb_activate_heartbeat (void);
406 
407 extern bool hb_is_registered_process (CSS_CONN_ENTRY * conn, char *args);
408 extern void hb_register_new_process (CSS_CONN_ENTRY * conn);
410 extern void hb_resource_receive_get_eof (CSS_CONN_ENTRY * conn);
411 
412 extern int hb_check_request_eligibility (SOCKET sd);
413 extern void hb_start_deactivate_server_info (void);
414 extern int hb_get_deactivating_server_count (void);
415 extern bool hb_is_deactivation_started (void);
416 extern bool hb_is_deactivation_ready (void);
417 extern void hb_finish_deactivate_server_info (void);
418 
419 extern int hb_start_util_process (char *args);
420 
421 extern void hb_enable_er_log (void);
422 extern void hb_disable_er_log (int reason, const char *msg_fmt, ...);
423 
424 extern int hb_return_proc_state_by_fd (int sfd);
425 extern bool hb_is_hang_process (int sfd);
426 
427 #endif /* _MASTER_HEARTBEAT_H_ */
HB_LIST ** prev
HB_NODE_STATE_TYPE state
unsigned short changemode_gap
void hb_finish_deactivate_server_info(void)
HB_JOB_ENTRY ** prev
HB_NODE_STATE_TYPE state
HB_CLUSTER_JOB_ARG cluster_job_arg
int SOCKET
Definition: porting.h:482
int hb_start_util_process(char *args)
unsigned char state
HB_NODE_ENTRY * master
bool is_curr_eof_received
HB_PING_HOST_ENTRY ** prev
int sfd
bool hb_is_hang_process(int sfd)
HB_JOB_ARG * arg
int hb_master_init(void)
void(* HB_JOB_FUNC)(HB_JOB_ARG *)
void hb_cluster_shutdown_and_cleanup(void)
void hb_reconfig_heartbeat(char **str)
void hb_kill_all_heartbeat_process(char **str)
int hb_get_deactivating_server_count(void)
HB_RESOURCE_JOB_ARG resource_job_arg
void hb_get_node_info_string(char **str, bool verbose_yn)
HB_NODE_STATE_TYPE state
void hb_disable_er_log(int reason, const char *msg_fmt,...)
HB_RESOURCE * hb_Resource
pthread_mutex_t lock
int hb_prepare_deactivate_heartbeat(void)
HB_NODE_ENTRY * next
HB_JOB_FUNC func
int hb_activate_heartbeat(void)
int hb_return_proc_state_by_fd(int sfd)
bool hb_is_deactivation_started(void)
void hb_get_admin_info_string(char **str)
bool being_shutdown
HB_VALID_RESULT
HB_NODE_ENTRY ** prev
LOG_LSA curr_eof
unsigned char type
void hb_start_deactivate_server_info(void)
void hb_deregister_by_pid(pid_t pid)
HB_NOLOG_REASON
HB_NODE_ENTRY * myself
void hb_register_new_process(CSS_CONN_ENTRY *conn)
void hb_resource_shutdown_and_cleanup(void)
#define HB_MAX_SZ_PROC_ARGS
Definition: heartbeat.h:83
HB_PROC_ENTRY ** prev
short score
CSS_CONN_ENTRY * conn
HB_CLUSTER_JOB
HB_PROC_STATE
HB_JOB_FUNC * job_funcs
unsigned short num_jobs
pid_t pid
Definition: dynamic_load.c:955
HB_PROC_ENTRY * procs
HB_UI_NODE_ENTRY * next
void hb_get_process_info_string(char **str, bool verbose_yn)
bool hb_is_deactivation_ready(void)
int hb_check_request_eligibility(SOCKET sd)
#define HB_MAX_SZ_PROC_EXEC_PATH
Definition: heartbeat.h:80
void hb_resource_receive_get_eof(CSS_CONN_ENTRY *conn)
HB_PROC_ENTRY * next
bool server_hang
HB_PING_HOST_ENTRY * next
bool is_ping_check_enabled
HB_LIST * next
HB_NODE_ENTRY * nodes
pthread_mutex_t lock
#define HB_MAX_GROUP_ID_LEN
Definition: heartbeat.h:79
HB_JOB * cluster_Jobs
void hb_get_ping_host_info_string(char **str)
HB_JOB_ENTRY * next
void hb_enable_er_log(void)
enum HB_NODE_STATE HB_NODE_STATE_TYPE
Definition: heartbeat.h:106
void hb_cleanup_conn_and_start_process(CSS_CONN_ENTRY *conn, SOCKET sfd)
HB_HOST_CHECK_RESULT
pthread_mutex_t lock
HB_PING_RESULT
unsigned int type
int pid
short heartbeat_gap
void hb_resource_receive_changemode(CSS_CONN_ENTRY *conn)
HB_RESOURCE_JOB
unsigned int ping_check_count
int hb_deactivate_heartbeat(void)
HB_UI_NODE_ENTRY * ui_nodes
bool hb_Deactivate_immediately
HB_CLUSTER * hb_Cluster
HB_UI_NODE_ENTRY ** prev
HB_JOB_ENTRY * jobs
bool hb_is_registered_process(CSS_CONN_ENTRY *conn, char *args)
#define CUB_MAXHOSTNAMELEN
Definition: porting.h:379
HB_JOB * resource_Jobs
LOG_LSA prev_eof
unsigned short changemode_rid
HB_PING_HOST_ENTRY * ping_hosts
void hb_deregister_by_args(char *args)
int v_result
int ping_result
unsigned int max_retries
unsigned short priority