File external_sort.h¶
File List > cubrid > src > storage > external_sort.h
Go to the documentation of this file
/*
* Copyright 2008 Search Solution Corporation
* Copyright 2016 CUBRID Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/*
* external_sort.h - External sorting module
*/
#ifndef _EXTERNAL_SORT_H_
#define _EXTERNAL_SORT_H_
#ident "$Id$"
#if !defined (SERVER_MODE) && !defined (SA_MODE)
#error Belongs to server module
#endif /* !defined (SERVER_MODE) && !defined (SA_MODE) */
#include "error_manager.h"
#include "query_list.h"
#include "storage_common.h"
#include "thread_compat.hpp"
#define SORT_PUT_STOP 2
#define NO_SORT_LIMIT (-1)
#define SORT_RECORD_LENGTH_SIZE (sizeof(INT64)) /* for 8byte align */
#define SORT_RECORD_LENGTH(item_p) (*((int *) ((item_p) - SORT_RECORD_LENGTH_SIZE)))
typedef enum
{
SORT_REC_DOESNT_FIT,
SORT_SUCCESS,
SORT_NOMORE_RECS,
SORT_ERROR_OCCURRED
} SORT_STATUS;
typedef enum
{
SORT_ELIM_DUP, /* eliminate duplicate */
SORT_DUP /* allow duplicate */
} SORT_DUP_OPTION;
typedef enum
{
SORT_ORDER_BY,
SORT_ORDER_WITH_LIMIT,
SORT_GROUP_BY,
SORT_ANALYTIC,
SORT_INDEX_LEAF
} SORT_PARALLEL_TYPE;
typedef SORT_STATUS SORT_GET_FUNC (THREAD_ENTRY * thread_p, RECDES *, void *);
typedef int SORT_PUT_FUNC (THREAD_ENTRY * thread_p, const RECDES *, void *);
typedef int SORT_CMP_FUNC (const void *, const void *, void *);
typedef struct SORT_REC SORT_REC;
typedef struct SUBKEY_INFO SUBKEY_INFO;
typedef struct SORTKEY_INFO SORTKEY_INFO;
typedef struct SORT_INFO SORT_INFO;
struct SORT_REC
{
SORT_REC *next; /* forward link for duplicate sort_key value */
union
{
/* Bread crumbs back to the original tuple, so that we can go straight there after the keys have been sorted. */
struct
{
INT32 pageid; /* Page identifier */
INT16 volid; /* Volume identifier */
INT16 offset; /* offset in page */
char body[1]; /* sort_key body start position */
} original;
/*
* The offset vector. A value of zero for an entry means that the
* corresponding column is null, and that there are no data bytes for
* the column. A non-zero entry is interpreted as the offset from
* the *start* of the SORT_REC to the data bytes for that column.
*/
int offset[1];
} s;
};
struct SUBKEY_INFO
{
/* The actual column number in the list file tuple. */
int col;
int permuted_col;
TP_DOMAIN *col_dom;
TP_DOMAIN *cmp_dom; /* for median sorting string in different domain */
// signature should match pr_type::data_cmpdisk_function_type
// todo - use a function type for both sort_f and pr_type::data_cmpdisk_function_type
DB_VALUE_COMPARE_RESULT (*sort_f) (void *tplp1, void *tplp2, TP_DOMAIN * dom, int do_coercion, int total_order,
int *start_col);
/*
* Non-zero iff the sort on this column is descending. Factoring
* this decision out of the actual sort function allows to use only
* one of those guys, at no particularly great cost in performance,
* and a big win in maintainability.
*/
int is_desc;
int is_nulls_first;
bool use_cmp_dom; /* when true, use cmp_dom to make comparing */
};
struct SORTKEY_INFO
{
int nkeys; /* The number of columns in use today. */
int use_original; /* False iff the sort keys consist of all of the input record fields, i.e., if we'll
* reconstruct the input records from the keys rather than look them up again in the
* original file. */
SUBKEY_INFO *key; /* Points to `default_keys' if `nkeys' <= 8; otherwise it points to malloc'ed space. */
SUBKEY_INFO default_keys[8]; /* Default storage; this ought to work for most cases. */
int error; /* median domain convert errors */
};
struct SORT_INFO
{
SORTKEY_INFO key_info; /* All of the interesting key information. */
QFILE_SORT_SCAN_ID *s_id; /* A SCAN_ID for the input list file. This is stateful, and records the current
* location of the scan between calls to ls_sort_get_next(). */
QFILE_LIST_ID *input_file;
QFILE_LIST_ID *output_file; /* The name of the output file. This is where ls_sort_put_next_*() deposits its stuff.
*/
RECDES output_recdes; /* A working buffer for output of tuples; used only when we're using
* ls_sort_put_next_short() as the output function. */
void *extra_arg; /* extra information supplied by the caller */
/* for parallel */
SORT_LIST *sort_list_p; /* to open output list file */
int flag; /* to open output list file */
int parallelism;
void *orderby_stats;
};
extern int sort_listfile (THREAD_ENTRY * thread_p, INT16 volid, int est_inp_pg_cnt, SORT_GET_FUNC * get_fn,
void *get_arg, SORT_PUT_FUNC * put_fn, void *put_arg, SORT_CMP_FUNC * cmp_fn, void *cmp_arg,
SORT_DUP_OPTION option, int limit, bool includes_tde_class,
SORT_PARALLEL_TYPE sort_parallel_type);
extern SORT_STATUS btree_sort_get_next_parallel (THREAD_ENTRY * thread_p, RECDES * temp_recdes, void *arg);
#endif /* _EXTERNAL_SORT_H_ */