File query_analytic.cpp¶
File List > cubrid > src > query > query_analytic.cpp
Go to the documentation of this file
/*
* Copyright 2008 Search Solution Corporation
* Copyright 2016 CUBRID Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//
// query_analytic - implementation of analytic query execution
//
#include "query_analytic.hpp"
#include "dbtype.h"
#include "fetch.h"
#include "list_file.h"
#include "object_domain.h"
#include "object_primitive.h"
#include "object_representation.h"
#include "query_opfunc.h"
#include "xasl.h" // QPROC_IS_INTERPOLATION_FUNC
#include "xasl_analytic.hpp"
#include <cmath>
// XXX: SHOULD BE THE LAST INCLUDE HEADER
#include "memory_wrapper.hpp"
static int qdata_analytic_interpolation (cubthread::entry *thread_p, cubxasl::analytic_list_node *ana_p,
QFILE_LIST_SCAN_ID *scan_id);
/*
* qdata_initialize_analytic_func () -
* return: NO_ERROR, or ER_code
* func_p(in): Analytic expression node
* query_id(in): Associated query id
*
*/
int
qdata_initialize_analytic_func (cubthread::entry *thread_p, ANALYTIC_TYPE *func_p, QUERY_ID query_id)
{
func_p->curr_cnt = 0;
if (db_value_domain_init (func_p->value, DB_VALUE_DOMAIN_TYPE (func_p->value), DB_DEFAULT_PRECISION, DB_DEFAULT_SCALE)
!= NO_ERROR)
{
return ER_FAILED;
}
const FUNC_CODE fcode = func_p->function;
if (fcode == PT_COUNT_STAR || fcode == PT_COUNT)
{
db_make_bigint (func_p->value, 0);
}
else if (fcode == PT_ROW_NUMBER || fcode == PT_RANK || fcode == PT_DENSE_RANK)
{
db_make_int (func_p->value, 0);
}
db_make_null (&func_p->part_value);
/* create temporary list file to handle distincts */
if (func_p->option == Q_DISTINCT)
{
QFILE_TUPLE_VALUE_TYPE_LIST type_list;
QFILE_LIST_ID *list_id_p;
type_list.type_cnt = 1;
type_list.domp = (TP_DOMAIN **) db_private_alloc (thread_p, sizeof (TP_DOMAIN *));
if (type_list.domp == NULL)
{
return ER_FAILED;
}
type_list.domp[0] = func_p->operand.domain;
list_id_p = qfile_open_list (thread_p, &type_list, NULL, query_id, QFILE_FLAG_DISTINCT, NULL);
if (list_id_p == NULL)
{
db_private_free_and_init (thread_p, type_list.domp);
return ER_FAILED;
}
db_private_free_and_init (thread_p, type_list.domp);
if (qfile_copy_list_id (func_p->list_id, list_id_p, true, QFILE_PROHIBIT_DEPENDENT) != NO_ERROR)
{
qfile_free_list_id (list_id_p);
return ER_FAILED;
}
qfile_free_list_id (list_id_p);
}
return NO_ERROR;
}
/*
* qdata_evaluate_analytic_func () -
* return: NO_ERROR, or ER_code
* func_p(in): Analytic expression node
* vd(in): Value descriptor
*
*/
int
qdata_evaluate_analytic_func (cubthread::entry *thread_p, ANALYTIC_TYPE *func_p, VAL_DESCR *val_desc_p)
{
DB_VALUE dbval, sqr_val;
DB_VALUE *opr_dbval_p = NULL;
const PR_TYPE *pr_type_p;
OR_BUF buf;
char *disk_repr_p = NULL;
int dbval_size;
int copy_opr;
TP_DOMAIN *tmp_domain_p = NULL;
DB_TYPE dbval_type;
int error = NO_ERROR;
TP_DOMAIN_STATUS dom_status;
int coll_id;
ANALYTIC_PERCENTILE_FUNCTION_INFO *percentile_info_p = NULL;
DB_VALUE *peek_value_p = NULL;
db_make_null (&dbval);
db_make_null (&sqr_val);
/* fetch operand value, analytic regulator variable should only contain constants */
if (fetch_copy_dbval (thread_p, &func_p->operand, val_desc_p, NULL, NULL, NULL, &dbval) != NO_ERROR)
{
return ER_FAILED;
}
if ((func_p->opr_dbtype == DB_TYPE_VARIABLE || TP_DOMAIN_COLLATION_FLAG (func_p->domain) != TP_DOMAIN_COLL_NORMAL)
&& !DB_IS_NULL (&dbval))
{
/* set function default domain when late binding */
switch (func_p->function)
{
case PT_COUNT:
case PT_COUNT_STAR:
func_p->domain = tp_domain_resolve_default (DB_TYPE_BIGINT);
break;
case PT_AVG:
case PT_STDDEV:
case PT_STDDEV_POP:
case PT_STDDEV_SAMP:
case PT_VARIANCE:
case PT_VAR_POP:
case PT_VAR_SAMP:
func_p->domain = tp_domain_resolve_default (DB_TYPE_DOUBLE);
break;
case PT_SUM:
if (TP_IS_NUMERIC_TYPE (DB_VALUE_TYPE (&dbval)))
{
func_p->domain = tp_domain_resolve_value (&dbval, NULL);
}
else
{
func_p->domain = tp_domain_resolve_default (DB_TYPE_DOUBLE);
}
break;
default:
func_p->domain = tp_domain_resolve_value (&dbval, NULL);
break;
}
if (func_p->domain == NULL)
{
error = ER_FAILED;
goto exit;
}
/* coerce operand */
if (tp_value_coerce (&dbval, &dbval, func_p->domain) != DOMAIN_COMPATIBLE)
{
error = ER_FAILED;
goto exit;
}
func_p->opr_dbtype = TP_DOMAIN_TYPE (func_p->domain);
db_value_domain_init (func_p->value, func_p->opr_dbtype, DB_DEFAULT_PRECISION, DB_DEFAULT_SCALE);
}
if (DB_IS_NULL (&dbval) && func_p->function != PT_ROW_NUMBER && func_p->function != PT_FIRST_VALUE
&& func_p->function != PT_LAST_VALUE && func_p->function != PT_NTH_VALUE && func_p->function != PT_RANK
&& func_p->function != PT_DENSE_RANK && func_p->function != PT_LEAD && func_p->function != PT_LAG
&& !QPROC_IS_INTERPOLATION_FUNC (func_p))
{
if (func_p->function == PT_COUNT || func_p->function == PT_COUNT_STAR)
{
func_p->curr_cnt++;
}
if (func_p->function == PT_NTILE)
{
func_p->info.ntile.is_null = true;
func_p->info.ntile.bucket_count = 0;
}
goto exit;
}
if (func_p->option == Q_DISTINCT)
{
/* handle distincts by adding to the temp list file */
dbval_type = DB_VALUE_DOMAIN_TYPE (&dbval);
pr_type_p = pr_type_from_id (dbval_type);
if (pr_type_p == NULL)
{
error = ER_FAILED;
goto exit;
}
dbval_size = pr_data_writeval_disk_size (&dbval);
if (dbval_size > 0 && (disk_repr_p = (char *) db_private_alloc (thread_p, dbval_size)) != NULL)
{
or_init (&buf, disk_repr_p, dbval_size);
error = pr_type_p->data_writeval (&buf, &dbval);
if (error != NO_ERROR)
{
assert_release (buf.ptr <= buf.endptr);
db_private_free_and_init (thread_p, disk_repr_p);
error = ER_FAILED;
goto exit;
}
}
else
{
error = ER_FAILED;
goto exit;
}
if (qfile_add_item_to_list (thread_p, disk_repr_p, dbval_size, func_p->list_id) != NO_ERROR)
{
db_private_free_and_init (thread_p, disk_repr_p);
error = ER_FAILED;
goto exit;
}
db_private_free_and_init (thread_p, disk_repr_p);
/* interpolation funcs need to check domain compatibility in the following code */
if (!QPROC_IS_INTERPOLATION_FUNC (func_p))
{
goto exit;
}
}
copy_opr = false;
coll_id = func_p->domain->collation_id;
switch (func_p->function)
{
case PT_CUME_DIST:
case PT_PERCENT_RANK:
/* these functions do not execute here, just in case */
pr_clear_value (func_p->value);
break;
case PT_NTILE:
/* output value is not required now */
db_make_null (func_p->value);
if (func_p->curr_cnt < 1)
{
/* the operand is the number of buckets and should be constant within the window; we can extract it now for
* later use */
dom_status = tp_value_coerce (&dbval, &dbval, &tp_Double_domain);
if (dom_status != DOMAIN_COMPATIBLE)
{
error = tp_domain_status_er_set (dom_status, ARG_FILE_LINE, &dbval, &tp_Double_domain);
assert_release (error != NO_ERROR);
goto exit;
}
int ntile_bucket = (int) floor (db_get_double (&dbval));
/* boundary check */
if (ntile_bucket < 1 || ntile_bucket > DB_INT32_MAX)
{
er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_NTILE_INVALID_BUCKET_NUMBER, 0);
error = ER_NTILE_INVALID_BUCKET_NUMBER;
goto exit;
}
/* we're sure the operand is not null */
func_p->info.ntile.is_null = false;
func_p->info.ntile.bucket_count = ntile_bucket;
}
break;
case PT_FIRST_VALUE:
if ((func_p->ignore_nulls && DB_IS_NULL (func_p->value)) || (func_p->curr_cnt < 1))
{
/* copy value if it's the first value OR if we're ignoring NULLs and we've only encountered NULL values so
* far */
(void) pr_clear_value (func_p->value);
pr_clone_value (&dbval, func_p->value);
}
break;
case PT_LAST_VALUE:
if (!func_p->ignore_nulls || !DB_IS_NULL (&dbval))
{
(void) pr_clear_value (func_p->value);
pr_clone_value (&dbval, func_p->value);
}
break;
case PT_LEAD:
case PT_LAG:
case PT_NTH_VALUE:
/* just copy */
(void) pr_clear_value (func_p->value);
pr_clone_value (&dbval, func_p->value);
break;
case PT_MIN:
opr_dbval_p = &dbval;
if ((func_p->curr_cnt < 1 || DB_IS_NULL (func_p->value))
|| func_p->domain->type->cmpval (func_p->value, &dbval, 1, 1, NULL, coll_id) > 0)
{
copy_opr = true;
}
break;
case PT_MAX:
opr_dbval_p = &dbval;
if ((func_p->curr_cnt < 1 || DB_IS_NULL (func_p->value))
|| func_p->domain->type->cmpval (func_p->value, &dbval, 1, 1, NULL, coll_id) < 0)
{
copy_opr = true;
}
break;
case PT_AVG:
case PT_SUM:
if (func_p->curr_cnt < 1)
{
opr_dbval_p = &dbval;
copy_opr = true;
if (TP_IS_CHAR_TYPE (DB_VALUE_DOMAIN_TYPE (opr_dbval_p)))
{
/* char types default to double; coerce here so we don't mess up the accumulator when we copy the operand
*/
if (tp_value_coerce (&dbval, &dbval, func_p->domain) != DOMAIN_COMPATIBLE)
{
error = ER_FAILED;
goto exit;
}
}
/* this type setting is necessary, it ensures that for the case average handling, which is treated like sum
* until final iteration, starts with the initial data type */
if (db_value_domain_init (func_p->value, DB_VALUE_DOMAIN_TYPE (opr_dbval_p), DB_DEFAULT_PRECISION,
DB_DEFAULT_SCALE) != NO_ERROR)
{
error = ER_FAILED;
goto exit;
}
}
else
{
TP_DOMAIN *result_domain;
DB_TYPE type =
(func_p->function ==
PT_AVG) ? (DB_TYPE) func_p->value->domain.general_info.type : TP_DOMAIN_TYPE (func_p->domain);
result_domain = ((type == DB_TYPE_NUMERIC) ? NULL : func_p->domain);
if (qdata_add_dbval (func_p->value, &dbval, func_p->value, result_domain) != NO_ERROR)
{
error = ER_FAILED;
goto exit;
}
copy_opr = false;
}
break;
case PT_COUNT_STAR:
break;
case PT_ROW_NUMBER:
db_make_int (func_p->out_value, func_p->curr_cnt + 1);
break;
case PT_COUNT:
if (func_p->curr_cnt < 1)
{
db_make_bigint (func_p->value, 1);
}
else
{
db_make_bigint (func_p->value, db_get_bigint (func_p->value) + 1);
}
break;
case PT_RANK:
if (func_p->curr_cnt < 1)
{
db_make_int (func_p->value, 1);
}
else
{
if (ANALYTIC_FUNC_IS_FLAGED (func_p, ANALYTIC_KEEP_RANK))
{
ANALYTIC_FUNC_CLEAR_FLAG (func_p, ANALYTIC_KEEP_RANK);
}
else
{
db_make_int (func_p->value, func_p->curr_cnt + 1);
}
}
break;
case PT_DENSE_RANK:
if (func_p->curr_cnt < 1)
{
db_make_int (func_p->value, 1);
}
else
{
if (ANALYTIC_FUNC_IS_FLAGED (func_p, ANALYTIC_KEEP_RANK))
{
ANALYTIC_FUNC_CLEAR_FLAG (func_p, ANALYTIC_KEEP_RANK);
}
else
{
db_make_int (func_p->value, db_get_int (func_p->value) + 1);
}
}
break;
case PT_STDDEV:
case PT_STDDEV_POP:
case PT_STDDEV_SAMP:
case PT_VARIANCE:
case PT_VAR_POP:
case PT_VAR_SAMP:
copy_opr = false;
tmp_domain_p = tp_domain_resolve_default (DB_TYPE_DOUBLE);
if (tp_value_coerce (&dbval, &dbval, tmp_domain_p) != DOMAIN_COMPATIBLE)
{
error = ER_FAILED;
goto exit;
}
if (func_p->curr_cnt < 1)
{
opr_dbval_p = &dbval;
/* func_p->value contains SUM(X) */
if (db_value_domain_init (func_p->value, DB_VALUE_DOMAIN_TYPE (opr_dbval_p), DB_DEFAULT_PRECISION,
DB_DEFAULT_SCALE) != NO_ERROR)
{
error = ER_FAILED;
goto exit;
}
/* func_p->value contains SUM(X^2) */
if (db_value_domain_init (func_p->value2, DB_VALUE_DOMAIN_TYPE (opr_dbval_p), DB_DEFAULT_PRECISION,
DB_DEFAULT_SCALE) != NO_ERROR)
{
error = ER_FAILED;
goto exit;
}
/* calculate X^2 */
if (qdata_multiply_dbval (&dbval, &dbval, &sqr_val, tmp_domain_p) != NO_ERROR)
{
error = ER_FAILED;
goto exit;
}
(void) pr_clear_value (func_p->value);
(void) pr_clear_value (func_p->value2);
dbval_type = DB_VALUE_DOMAIN_TYPE (func_p->value);
pr_type_p = pr_type_from_id (dbval_type);
if (pr_type_p == NULL)
{
error = ER_FAILED;
goto exit;
}
pr_type_p->setval (func_p->value, &dbval, true);
pr_type_p->setval (func_p->value2, &sqr_val, true);
}
else
{
if (qdata_multiply_dbval (&dbval, &dbval, &sqr_val, tmp_domain_p) != NO_ERROR)
{
error = ER_FAILED;
goto exit;
}
if (qdata_add_dbval (func_p->value, &dbval, func_p->value, tmp_domain_p) != NO_ERROR)
{
pr_clear_value (&sqr_val);
error = ER_FAILED;
goto exit;
}
if (qdata_add_dbval (func_p->value2, &sqr_val, func_p->value2, tmp_domain_p) != NO_ERROR)
{
pr_clear_value (&sqr_val);
error = ER_FAILED;
goto exit;
}
pr_clear_value (&sqr_val);
}
break;
case PT_MEDIAN:
case PT_PERCENTILE_CONT:
case PT_PERCENTILE_DISC:
if (func_p->function == PT_PERCENTILE_CONT || func_p->function == PT_PERCENTILE_DISC)
{
percentile_info_p = &func_p->info.percentile;
}
if (func_p->curr_cnt < 1)
{
if (func_p->function == PT_PERCENTILE_CONT || func_p->function == PT_PERCENTILE_DISC)
{
error =
fetch_peek_dbval (thread_p, percentile_info_p->percentile_reguvar, NULL, NULL, NULL, NULL,
&peek_value_p);
if (error != NO_ERROR)
{
assert (er_errid () != NO_ERROR);
goto exit;
}
if ((peek_value_p == NULL) || (DB_VALUE_TYPE (peek_value_p) != DB_TYPE_DOUBLE))
{
er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_QPROC_INVALID_DATATYPE, 0);
error = ER_QPROC_INVALID_DATATYPE;
goto exit;
}
percentile_info_p->cur_group_percentile = db_get_double (peek_value_p);
if ((percentile_info_p->cur_group_percentile < 0) || (percentile_info_p->cur_group_percentile > 1))
{
er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_PERCENTILE_FUNC_INVALID_PERCENTILE_RANGE, 1,
percentile_info_p->cur_group_percentile);
error = ER_PERCENTILE_FUNC_INVALID_PERCENTILE_RANGE;
goto exit;
}
}
if (func_p->is_first_exec_time)
{
func_p->is_first_exec_time = false;
/* determine domain based on first value */
switch (func_p->opr_dbtype)
{
case DB_TYPE_SHORT:
case DB_TYPE_INTEGER:
case DB_TYPE_BIGINT:
case DB_TYPE_FLOAT:
case DB_TYPE_DOUBLE:
case DB_TYPE_MONETARY:
case DB_TYPE_NUMERIC:
if (TP_DOMAIN_TYPE (func_p->domain) == DB_TYPE_VARIABLE)
{
if (func_p->is_const_operand || func_p->function == PT_PERCENTILE_DISC)
{
/* percentile_disc returns the same type as operand while median and percentile_cont return
* double */
func_p->domain = tp_domain_resolve_value (&dbval, NULL);
if (func_p->domain == NULL)
{
error = er_errid ();
assert (error != NO_ERROR);
return error;
}
}
else
{
func_p->domain = tp_domain_resolve_default (DB_TYPE_DOUBLE);
}
}
break;
case DB_TYPE_DATE:
if (TP_DOMAIN_TYPE (func_p->domain) == DB_TYPE_VARIABLE)
{
func_p->domain = tp_domain_resolve_default (DB_TYPE_DATE);
}
break;
case DB_TYPE_DATETIME:
if (TP_DOMAIN_TYPE (func_p->domain) == DB_TYPE_VARIABLE)
{
func_p->domain = tp_domain_resolve_default (DB_TYPE_DATETIME);
}
break;
case DB_TYPE_DATETIMETZ:
if (TP_DOMAIN_TYPE (func_p->domain) == DB_TYPE_VARIABLE)
{
func_p->domain = tp_domain_resolve_default (DB_TYPE_DATETIMETZ);
}
break;
case DB_TYPE_DATETIMELTZ:
if (TP_DOMAIN_TYPE (func_p->domain) == DB_TYPE_VARIABLE)
{
func_p->domain = tp_domain_resolve_default (DB_TYPE_DATETIMELTZ);
}
break;
case DB_TYPE_TIMESTAMP:
if (TP_DOMAIN_TYPE (func_p->domain) == DB_TYPE_VARIABLE)
{
func_p->domain = tp_domain_resolve_default (DB_TYPE_TIMESTAMP);
}
break;
case DB_TYPE_TIMESTAMPTZ:
if (TP_DOMAIN_TYPE (func_p->domain) == DB_TYPE_VARIABLE)
{
func_p->domain = tp_domain_resolve_default (DB_TYPE_TIMESTAMPTZ);
}
break;
case DB_TYPE_TIMESTAMPLTZ:
if (TP_DOMAIN_TYPE (func_p->domain) == DB_TYPE_VARIABLE)
{
func_p->domain = tp_domain_resolve_default (DB_TYPE_TIMESTAMPLTZ);
}
break;
case DB_TYPE_TIME:
if (TP_DOMAIN_TYPE (func_p->domain) == DB_TYPE_VARIABLE)
{
func_p->domain = tp_domain_resolve_default (DB_TYPE_TIME);
}
break;
default:
/* try to cast dbval to double, datetime then time */
tmp_domain_p = tp_domain_resolve_default (DB_TYPE_DOUBLE);
dom_status = tp_value_cast (&dbval, &dbval, tmp_domain_p, false);
if (dom_status != DOMAIN_COMPATIBLE)
{
/* try datetime */
tmp_domain_p = tp_domain_resolve_default (DB_TYPE_DATETIME);
dom_status = tp_value_cast (&dbval, &dbval, tmp_domain_p, false);
}
/* try time */
if (dom_status != DOMAIN_COMPATIBLE)
{
tmp_domain_p = tp_domain_resolve_default (DB_TYPE_TIME);
dom_status = tp_value_cast (&dbval, &dbval, tmp_domain_p, false);
}
if (dom_status != DOMAIN_COMPATIBLE)
{
error = ER_ARG_CAN_NOT_BE_CASTED_TO_DESIRED_DOMAIN;
er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, error, 2, fcode_get_uppercase_name (func_p->function),
"DOUBLE, DATETIME, TIME");
goto exit;
}
/* update domain */
func_p->domain = tmp_domain_p;
}
}
}
/* percentile value check */
if (func_p->function == PT_PERCENTILE_CONT || func_p->function == PT_PERCENTILE_DISC)
{
error =
fetch_peek_dbval (thread_p, percentile_info_p->percentile_reguvar, NULL, NULL, NULL, NULL, &peek_value_p);
if (error != NO_ERROR)
{
assert (er_errid () != NO_ERROR);
goto exit;
}
if ((peek_value_p == NULL) || (DB_VALUE_TYPE (peek_value_p) != DB_TYPE_DOUBLE)
|| (db_get_double (peek_value_p) != func_p->info.percentile.cur_group_percentile))
{
er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_PERCENTILE_FUNC_PERCENTILE_CHANGED_IN_GROUP, 0);
error = ER_PERCENTILE_FUNC_PERCENTILE_CHANGED_IN_GROUP;
goto exit;
}
}
/* copy value */
pr_clear_value (func_p->value);
error = db_value_coerce (&dbval, func_p->value, func_p->domain);
if (error != NO_ERROR)
{
goto exit;
}
break;
default:
er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_QPROC_INVALID_XASLNODE, 0);
error = ER_QPROC_INVALID_XASLNODE;
goto exit;
}
if (copy_opr)
{
/* copy resultant operand value to analytic node */
(void) pr_clear_value (func_p->value);
dbval_type = DB_VALUE_DOMAIN_TYPE (func_p->value);
pr_type_p = pr_type_from_id (dbval_type);
if (pr_type_p == NULL)
{
error = ER_FAILED;
goto exit;
}
pr_type_p->setval (func_p->value, opr_dbval_p, true);
}
func_p->curr_cnt++;
exit:
pr_clear_value (&dbval);
return error;
}
/*
* qdata_finalize_analytic_func () -
* return: NO_ERROR, or ER_code
* func_p(in): Analytic expression node
* is_same_group(in): Don't deallocate list file
*
*/
int
qdata_finalize_analytic_func (cubthread::entry *thread_p, ANALYTIC_TYPE *func_p, bool is_same_group)
{
DB_VALUE dbval;
QFILE_LIST_ID *list_id_p;
char *tuple_p;
const PR_TYPE *pr_type_p;
OR_BUF buf;
QFILE_LIST_SCAN_ID scan_id;
SCAN_CODE scan_code;
DB_VALUE xavgval, xavg_1val, x2avgval;
DB_VALUE xavg2val, varval, sqr_val, dval;
double dtmp;
QFILE_TUPLE_RECORD tuple_record = { NULL, 0 };
TP_DOMAIN *tmp_domain_ptr = NULL;
int err = NO_ERROR;
db_make_null (&sqr_val);
db_make_null (&dbval);
db_make_null (&xavgval);
db_make_null (&xavg_1val);
db_make_null (&x2avgval);
db_make_null (&xavg2val);
db_make_null (&varval);
db_make_null (&dval);
if (func_p->function == PT_VARIANCE || func_p->function == PT_VAR_POP || func_p->function == PT_VAR_SAMP
|| func_p->function == PT_STDDEV || func_p->function == PT_STDDEV_POP || func_p->function == PT_STDDEV_SAMP)
{
tmp_domain_ptr = tp_domain_resolve_default (DB_TYPE_DOUBLE);
}
/* set count-star aggregate values */
if (func_p->function == PT_COUNT_STAR)
{
db_make_bigint (func_p->value, (INT64) func_p->curr_cnt);
}
/* process list file for distinct */
if (func_p->option == Q_DISTINCT)
{
assert (func_p->list_id->sort_list != NULL);
list_id_p = qfile_sort_list (thread_p, func_p->list_id, NULL, Q_DISTINCT, false);
/* release the resource to prevent resource leak */
if (func_p->list_id != list_id_p)
{
qfile_close_list (thread_p, func_p->list_id);
qfile_destroy_list (thread_p, func_p->list_id);
}
if (!list_id_p)
{
return ER_FAILED;
}
func_p->list_id = list_id_p;
if (func_p->function == PT_COUNT)
{
db_make_bigint (func_p->value, list_id_p->tuple_cnt);
}
else
{
pr_type_p = list_id_p->type_list.domp[0]->type;
/* scan list file, accumulating total for sum/avg */
if (qfile_open_list_scan (list_id_p, &scan_id) != NO_ERROR)
{
qfile_close_list (thread_p, list_id_p);
qfile_destroy_list (thread_p, list_id_p);
return ER_FAILED;
}
(void) pr_clear_value (func_p->value);
db_make_null (func_p->value);
/* median and percentile funcs don't need to read all rows */
if (list_id_p->tuple_cnt > 0 && QPROC_IS_INTERPOLATION_FUNC (func_p))
{
err = qdata_analytic_interpolation (thread_p, func_p, &scan_id);
if (err != NO_ERROR)
{
qfile_close_scan (thread_p, &scan_id);
qfile_close_list (thread_p, list_id_p);
qfile_destroy_list (thread_p, list_id_p);
goto error;
}
}
else
{
while (true)
{
scan_code = qfile_scan_list_next (thread_p, &scan_id, &tuple_record, PEEK);
if (scan_code != S_SUCCESS)
{
break;
}
tuple_p = ((char *) tuple_record.tpl + QFILE_TUPLE_LENGTH_SIZE);
if (QFILE_GET_TUPLE_VALUE_FLAG (tuple_p) == V_UNBOUND)
{
continue;
}
or_init (&buf, (char *) tuple_p + QFILE_TUPLE_VALUE_HEADER_SIZE,
QFILE_GET_TUPLE_VALUE_LENGTH (tuple_p));
if (pr_type_p->data_readval (&buf, &dbval, list_id_p->type_list.domp[0], -1, true, NULL, 0) !=
NO_ERROR)
{
qfile_close_scan (thread_p, &scan_id);
qfile_close_list (thread_p, list_id_p);
qfile_destroy_list (thread_p, list_id_p);
return ER_FAILED;
}
if (func_p->function == PT_VARIANCE || func_p->function == PT_VAR_POP
|| func_p->function == PT_VAR_SAMP || func_p->function == PT_STDDEV
|| func_p->function == PT_STDDEV_POP || func_p->function == PT_STDDEV_SAMP)
{
if (tp_value_coerce (&dbval, &dbval, tmp_domain_ptr) != DOMAIN_COMPATIBLE)
{
(void) pr_clear_value (&dbval);
qfile_close_scan (thread_p, &scan_id);
qfile_close_list (thread_p, list_id_p);
qfile_destroy_list (thread_p, list_id_p);
return ER_FAILED;
}
}
if (DB_IS_NULL (func_p->value))
{
/* first iteration: can't add to a null agg_ptr->value */
const PR_TYPE *tmp_pr_type;
DB_TYPE dbval_type = DB_VALUE_DOMAIN_TYPE (&dbval);
tmp_pr_type = pr_type_from_id (dbval_type);
if (tmp_pr_type == NULL)
{
(void) pr_clear_value (&dbval);
qfile_close_scan (thread_p, &scan_id);
qfile_close_list (thread_p, list_id_p);
qfile_destroy_list (thread_p, list_id_p);
return ER_FAILED;
}
if (func_p->function == PT_STDDEV || func_p->function == PT_STDDEV_POP
|| func_p->function == PT_STDDEV_SAMP || func_p->function == PT_VARIANCE
|| func_p->function == PT_VAR_POP || func_p->function == PT_VAR_SAMP)
{
if (qdata_multiply_dbval (&dbval, &dbval, &sqr_val, tmp_domain_ptr) != NO_ERROR)
{
(void) pr_clear_value (&dbval);
qfile_close_scan (thread_p, &scan_id);
qfile_close_list (thread_p, list_id_p);
qfile_destroy_list (thread_p, list_id_p);
return ER_FAILED;
}
tmp_pr_type->setval (func_p->value2, &sqr_val, true);
}
tmp_pr_type->setval (func_p->value, &dbval, true);
}
else
{
TP_DOMAIN *domain_ptr;
if (func_p->function == PT_STDDEV || func_p->function == PT_STDDEV_POP
|| func_p->function == PT_STDDEV_SAMP || func_p->function == PT_VARIANCE
|| func_p->function == PT_VAR_POP || func_p->function == PT_VAR_SAMP)
{
if (qdata_multiply_dbval (&dbval, &dbval, &sqr_val, tmp_domain_ptr) != NO_ERROR)
{
(void) pr_clear_value (&dbval);
qfile_close_scan (thread_p, &scan_id);
qfile_close_list (thread_p, list_id_p);
qfile_destroy_list (thread_p, list_id_p);
return ER_FAILED;
}
if (qdata_add_dbval (func_p->value2, &sqr_val, func_p->value2, tmp_domain_ptr) != NO_ERROR)
{
(void) pr_clear_value (&dbval);
pr_clear_value (&sqr_val);
qfile_close_scan (thread_p, &scan_id);
qfile_close_list (thread_p, list_id_p);
qfile_destroy_list (thread_p, list_id_p);
return ER_FAILED;
}
}
domain_ptr = tmp_domain_ptr != NULL ? tmp_domain_ptr : func_p->domain;
if ((func_p->function == PT_AVG) && (dbval.domain.general_info.type == DB_TYPE_NUMERIC))
{
domain_ptr = NULL;
}
if (qdata_add_dbval (func_p->value, &dbval, func_p->value, domain_ptr) != NO_ERROR)
{
(void) pr_clear_value (&dbval);
qfile_close_scan (thread_p, &scan_id);
qfile_close_list (thread_p, list_id_p);
qfile_destroy_list (thread_p, list_id_p);
return ER_FAILED;
}
}
(void) pr_clear_value (&dbval);
} /* while (true) */
}
qfile_close_scan (thread_p, &scan_id);
func_p->curr_cnt = list_id_p->tuple_cnt;
}
}
if (is_same_group)
{
/* this is the end of a partition; save accumulator */
qdata_copy_db_value (&func_p->part_value, func_p->value);
}
/* compute averages */
if (func_p->curr_cnt > 0
&& (func_p->function == PT_AVG || func_p->function == PT_STDDEV || func_p->function == PT_STDDEV_POP
|| func_p->function == PT_STDDEV_SAMP || func_p->function == PT_VARIANCE || func_p->function == PT_VAR_POP
|| func_p->function == PT_VAR_SAMP))
{
TP_DOMAIN *double_domain_ptr;
double_domain_ptr = tp_domain_resolve_default (DB_TYPE_DOUBLE);
/* compute AVG(X) = SUM(X)/COUNT(X) */
db_make_double (&dbval, func_p->curr_cnt);
if (qdata_divide_dbval (func_p->value, &dbval, &xavgval, double_domain_ptr) != NO_ERROR)
{
goto error;
}
if (func_p->function == PT_AVG)
{
(void) pr_clear_value (func_p->value);
if (tp_value_coerce (&xavgval, func_p->value, double_domain_ptr) != DOMAIN_COMPATIBLE)
{
goto error;
}
goto exit;
}
if (func_p->function == PT_STDDEV_SAMP || func_p->function == PT_VAR_SAMP)
{
/* compute SUM(X^2) / (n-1) */
if (func_p->curr_cnt > 1)
{
db_make_double (&dbval, func_p->curr_cnt - 1);
}
else
{
/* when not enough samples, return NULL */
(void) pr_clear_value (func_p->value);
db_make_null (func_p->value);
goto exit;
}
}
else
{
assert (func_p->function == PT_STDDEV || func_p->function == PT_STDDEV_POP || func_p->function == PT_VARIANCE
|| func_p->function == PT_VAR_POP);
/* compute SUM(X^2) / n */
db_make_double (&dbval, func_p->curr_cnt);
}
if (qdata_divide_dbval (func_p->value2, &dbval, &x2avgval, double_domain_ptr) != NO_ERROR)
{
goto error;
}
/* compute {SUM(X) / (n)} OR {SUM(X) / (n-1)} for xxx_SAMP agg */
if (qdata_divide_dbval (func_p->value, &dbval, &xavg_1val, double_domain_ptr) != NO_ERROR)
{
goto error;
}
/* compute AVG(X) * {SUM(X) / (n)} , AVG(X) * {SUM(X) / (n-1)} for xxx_SAMP agg */
if (qdata_multiply_dbval (&xavgval, &xavg_1val, &xavg2val, double_domain_ptr) != NO_ERROR)
{
goto error;
}
/* compute VAR(X) = SUM(X^2)/(n) - AVG(X) * {SUM(X) / (n)} OR VAR(X) = SUM(X^2)/(n-1) - AVG(X) * {SUM(X) / (n-1)}
* for xxx_SAMP aggregates */
if (qdata_subtract_dbval (&x2avgval, &xavg2val, &varval, double_domain_ptr) != NO_ERROR)
{
goto error;
}
if (func_p->function == PT_VARIANCE || func_p->function == PT_VAR_POP || func_p->function == PT_VAR_SAMP
|| func_p->function == PT_STDDEV || func_p->function == PT_STDDEV_POP || func_p->function == PT_STDDEV_SAMP)
{
pr_clone_value (&varval, func_p->value);
}
if (!DB_IS_NULL (&varval)
&& (func_p->function == PT_STDDEV || func_p->function == PT_STDDEV_POP || func_p->function == PT_STDDEV_SAMP))
{
db_value_domain_init (&dval, DB_TYPE_DOUBLE, DB_DEFAULT_PRECISION, DB_DEFAULT_SCALE);
if (tp_value_coerce (&varval, &dval, double_domain_ptr) != DOMAIN_COMPATIBLE)
{
goto error;
}
dtmp = db_get_double (&dval);
/* mathematically, dtmp should be zero or positive; however, due to some precision errors, in some cases it
* can be a very small negative number of which we cannot extract the square root */
dtmp = (dtmp < 0.0f ? 0.0f : dtmp);
dtmp = sqrt (dtmp);
db_make_double (&dval, dtmp);
pr_clone_value (&dval, func_p->value);
}
}
exit:
/* destroy distinct temp list file */
if (!is_same_group)
{
qfile_close_list (thread_p, func_p->list_id);
qfile_destroy_list (thread_p, func_p->list_id);
}
return NO_ERROR;
error:
qfile_close_list (thread_p, func_p->list_id);
qfile_destroy_list (thread_p, func_p->list_id);
return ER_FAILED;
}
static int
qdata_analytic_interpolation (cubthread::entry *thread_p, cubxasl::analytic_list_node *ana_p,
QFILE_LIST_SCAN_ID *scan_id)
{
int error = NO_ERROR;
INT64 tuple_count;
double row_num_d, f_row_num_d, c_row_num_d, percentile_d;
FUNC_CODE function;
double cur_group_percentile;
assert (ana_p != NULL && scan_id != NULL && scan_id->status == S_OPENED);
assert (QPROC_IS_INTERPOLATION_FUNC (ana_p));
function = ana_p->function;
cur_group_percentile = ana_p->info.percentile.cur_group_percentile;
tuple_count = scan_id->list_id.tuple_cnt;
if (tuple_count < 1)
{
return NO_ERROR;
}
if (function == PT_MEDIAN)
{
percentile_d = 0.5;
}
else
{
percentile_d = cur_group_percentile;
if (function == PT_PERCENTILE_DISC)
{
percentile_d = ceil (percentile_d * tuple_count) / tuple_count;
}
}
row_num_d = ((double) (tuple_count - 1)) * percentile_d;
f_row_num_d = floor (row_num_d);
if (function == PT_PERCENTILE_DISC)
{
c_row_num_d = f_row_num_d;
}
else
{
c_row_num_d = ceil (row_num_d);
}
error =
qdata_get_interpolation_function_result (thread_p, scan_id, scan_id->list_id.type_list.domp[0], 0, row_num_d,
f_row_num_d, c_row_num_d, ana_p->value, &ana_p->domain,
ana_p->function);
if (error == NO_ERROR)
{
ana_p->opr_dbtype = TP_DOMAIN_TYPE (ana_p->domain);
}
return error;
}