Skip to content

File db_json_path.cpp

File List > compat > db_json_path.cpp

Go to the documentation of this file

/*
 * Copyright 2008 Search Solution Corporation
 * Copyright 2016 CUBRID Corporation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

#include "db_json_path.hpp"

#include "db_json.hpp"
#include "db_rapidjson.hpp"
#include "memory_alloc.h"
#include "string_opfunc.h"
#include "system_parameter.h"

#include <algorithm>
#include <cctype>
#include <cerrno>
#include <cstdlib>
#include <limits>
#include <string>
#include <unordered_set>
#include <vector>
// XXX: SHOULD BE THE LAST INCLUDE HEADER
#include "memory_wrapper.hpp"

enum class JSON_PATH_TYPE
{
  JSON_PATH_SQL_JSON,
  JSON_PATH_POINTER
};

static void db_json_trim_leading_spaces (std::string &path_string);
static JSON_PATH_TYPE db_json_get_path_type (std::string &path_string);
static bool db_json_isspace (const unsigned char &ch);
static std::size_t skip_whitespaces (const std::string &path, std::size_t token_begin);
static int db_json_path_is_token_valid_array_index (const std::string &str, bool allow_wildcards, unsigned long &index,
    std::size_t start = 0, std::size_t end = 0);
static bool db_json_path_is_token_valid_quoted_object_key (const std::string &path, std::size_t &token_begin);
static bool db_json_path_quote_and_validate_unquoted_object_key (std::string &path, std::size_t &token_begin);
static bool db_json_path_is_token_valid_unquoted_object_key (const std::string &path, std::size_t &token_begin);
static bool db_json_path_is_valid_identifier_start_char (unsigned char ch);
static bool db_json_path_is_valid_identifier_char (unsigned char ch);
static void db_json_remove_leading_zeros_index (std::string &index);
static bool db_json_iszero (const unsigned char &ch);

static bool
db_json_iszero (const unsigned char &ch)
{
  return ch == '0';
}

/*
 * db_json_path_is_token_valid_quoted_object_key () - Check if a quoted object_key is valid
 *
 * return               : true/false
 * path (in)            : path to be checked
 * token_begin (in/out) : beginning offset of the token, is replaced with beginning of the next token or path.length ()
 */
static bool
db_json_path_is_token_valid_quoted_object_key (const std::string &path, std::size_t &token_begin)
{
  std::size_t i = token_begin + 1;
  bool unescaped_backslash = false;
  // stop at unescaped '"'; note that there should be an odd nr of backslashes before '"' for it to be escaped
  for (; i < path.length () && (path[i] != '"' || unescaped_backslash); ++i)
    {
      if (path[i] == '\\')
    {
      unescaped_backslash = !unescaped_backslash;
    }
      else
    {
      unescaped_backslash = false;
    }
    }

  if (i == path.length ())
    {
      return false;
    }

  token_begin = skip_whitespaces (path, i + 1);
  return true;
}

/*
 * db_json_path_is_token_valid_unquoted_object_key () - Validate and quote an object_key
 *
 * return               : validation result
 * path (in/out)        : path to be checked
 * token_begin (in/out) : is replaced with beginning of the next token or path.length ()
 */
static bool
db_json_path_quote_and_validate_unquoted_object_key (std::string &path, std::size_t &token_begin)
{
  std::size_t i = token_begin;
  bool validation_result = db_json_path_is_token_valid_unquoted_object_key (path, i);
  if (validation_result)
    {
      // we normalize object_keys by quoting them - e.g. $.objectkey we represent as $."objectkey"
      path.insert (token_begin, "\"");
      path.insert (i + 1, "\"");

      token_begin = skip_whitespaces (path, i + 2 /* we inserted 2 quotation marks */);
    }
  return validation_result;
}

static bool
db_json_path_is_valid_identifier_start_char (unsigned char ch)
{
  // todo: As per SQL Standard accept Ecmascript Identifier start:
  // \UnicodeEscapedSequence
  // Any char in Unicode categories: Titlecase letter (Lt), Modifier letter (Lm), Other letter (Lo), Letter number (Nl)

  return ch == '_' || std::isalpha (ch);
}

static bool
db_json_path_is_valid_identifier_char (unsigned char ch)
{
  // todo: As per SQL Standard accept Ecmascript Identifier:
  // \UnicodeEscapedSequence
  // Any char in Unicode categories: Connector punctuation (Pc), Non-spacing mark (Mn),
  // Combining spacing mark (Mc), Decimal number (Nd), Titlecase letter (Lt), Modifier letter (Lm), Other letter (Lo)
  // Letter number (Nl)

  return ch == '_' || std::isalnum (ch);
}

/*
 * db_json_path_is_token_valid_unquoted_object_key () - Check if an unquoted object_key is valid
 *
 * return                  : true/false
 * path (in)               : path to be checked
 * token_begin (in/out)    : beginning offset of the token, is replaced with first char's position
 *                           outside of the current valid token
 */
static bool
db_json_path_is_token_valid_unquoted_object_key (const std::string &path, std::size_t &token_begin)
{
  if (path == "")
    {
      return false;
    }
  std::size_t i = token_begin;

  // todo: this needs change. SQL standard specifies that object key format must obey
  // JavaScript rules of an Identifier (6.10.1).
  // Besides alphanumerics, object keys can be valid ECMAScript identifiers as defined in
  // http://www.ecma-international.org/ecma-262/5.1/#sec-7.6

  // Defined syntax (approx.):
  // IdentifierName -> IdentifierStart | (IdentifierName IdentifierPart)
  // IdentifierStart -> $ ( note: this is the ONLY specified forbidden by SQL Standard) | _ | \UnicodeEscapeSequence
  // IdentifierPart -> IdentifierStart | InicodeCombinigMark | UnicodeDigit | UnicodeConnectorPunctuation | <ZWNJ>
  // | <ZWJ>

  if (i < path.length () && !db_json_path_is_valid_identifier_start_char (static_cast<unsigned char> (path[i])))
    {
      return false;
    }

  ++i;
  for (; i < path.length () && db_json_path_is_valid_identifier_char (static_cast<unsigned char> (path[i])); ++i);

  token_begin = i;

  return true;
}

/*
 * db_json_path_is_token_valid_array_index () - verify if token is a valid array index. token can be a substring of
 *                                              first argument (by default the entire argument).
 *
 * return          : no error if token can be converted successfully to an integer smaller than json_max_array_idx
 *                   variable
 * str (in)        : token or the string that token belong to
 * allow_wildcards : whether json_path wildcards are allowed
 * index (out)     : created index token
 * start (in)      : start of token; default is start of string
 * end (in)        : end of token; default is end of string; 0 is considered default value
 */
static int
db_json_path_is_token_valid_array_index (const std::string &str, bool allow_wildcards,
    unsigned long &index, std::size_t start, std::size_t end)
{
  // json pointer will corespond the symbol '-' to JSON_ARRAY length
  // so if we have the json {"A":[1,2,3]} and the path /A/-
  // this will point to the 4th element of the array (zero indexed)
  if (str == "-")
    {
      return NO_ERROR;
    }

  if (end == 0)
    {
      // default is end of string
      end = str.length ();
    }

  if (start == end)
    {
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
      return ER_JSON_INVALID_PATH;
    }

  std::size_t last_non_space = end - 1;
  for (; last_non_space > start && str[last_non_space] == ' '; --last_non_space);
  if (allow_wildcards && start == last_non_space && str[start] == '*')
    {
      return NO_ERROR;
    }

  // Remaining invalid cases are: 1. Non-digits are present
  //                              2. Index overflows Rapidjson's index representation type

  // we need to check for non-digits since strtoul simply returns 0 in case conversion
  // can not be made
  for (auto it = str.cbegin () + start; it < str.cbegin () + last_non_space + 1; ++it)
    {
      if (!std::isdigit (static_cast<unsigned char> (*it)))
    {
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
      return ER_JSON_INVALID_PATH;
    }
    }

  char *end_str;
  index = std::strtoul (str.c_str () + start, &end_str, 10);
  if (errno == ERANGE)
    {
      errno = 0;
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_ARRAY_INDEX_TOO_LARGE, 0);
      return ER_JSON_ARRAY_INDEX_TOO_LARGE;
    }

  if (index > (unsigned long) prm_get_integer_value (PRM_ID_JSON_MAX_ARRAY_IDX))
    {
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_ARRAY_INDEX_TOO_LARGE, 0);
      return ER_JSON_ARRAY_INDEX_TOO_LARGE;
    }

  // this is a valid array index
  return NO_ERROR;
}

/*
 * skip_whitespaces  () - Advance offset to first non_space
 *
 * return              : offset of first non_space character
 * sql_path (in)       : path
 * pos (in)            : starting position offset
 */
static std::size_t
skip_whitespaces (const std::string &path, std::size_t pos)
{
  for (; pos < path.length () && path[pos] == ' '; ++pos);
  return pos;
}

static bool
db_json_isspace (const unsigned char &ch)
{
  return std::isspace (ch) != 0;
}

static void
db_json_trim_leading_spaces (std::string &path_string)
{
  // trim leading spaces
  auto first_non_space = std::find_if_not (path_string.begin (), path_string.end (), db_json_isspace);
  path_string.erase (path_string.begin (), first_non_space);
}

static JSON_PATH_TYPE
db_json_get_path_type (std::string &path_string)
{
  db_json_trim_leading_spaces (path_string);

  if (path_string.empty () || path_string[0] != '$')
    {
      return JSON_PATH_TYPE::JSON_PATH_POINTER;
    }
  else
    {
      return JSON_PATH_TYPE::JSON_PATH_SQL_JSON;
    }
}

/*
 * validate_and_create_from_json_path () - Check if a given path is a SQL valid path
 *
 * return                  : ER_JSON_INVALID_PATH if path is invalid
 * sql_path (in/out)       : path to be checked
 */
int
JSON_PATH::validate_and_create_from_json_path (std::string &sql_path)
{
  // skip leading white spaces
  db_json_trim_leading_spaces (sql_path);
  if (sql_path.empty ())
    {
      // empty
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
      return ER_JSON_INVALID_PATH;
    }

  if (sql_path[0] != '$')
    {
      // first character should always be '$'
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
      return ER_JSON_INVALID_PATH;
    }
  // start parsing path string by skipping dollar character
  std::size_t i = skip_whitespaces (sql_path, 1);

  while (i < sql_path.length ())
    {
      // to begin a next token we have only 3 possibilities:
      // with dot we start an object name
      // with bracket we start an index
      // with * we have the beginning of a '**' wildcard
      switch (sql_path[i])
    {
    case '[':
    {
      std::size_t end_bracket_offset;
      i = skip_whitespaces (sql_path, i + 1);

      end_bracket_offset = sql_path.find_first_of (']', i);
      if (end_bracket_offset == std::string::npos)
        {
          er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
          return ER_JSON_INVALID_PATH;
        }
      unsigned long index;
      int error_code = db_json_path_is_token_valid_array_index (sql_path, true, index, i, end_bracket_offset);
      if (error_code != NO_ERROR)
        {
          ASSERT_ERROR ();
          return error_code;
        }

      // todo check if it is array_index or array_index_wildcard
      if (sql_path[i] == '*')
        {
          push_array_index_wildcard ();
        }
      else
        {
          // note that db_json_path_is_token_valid_array_index () checks the index to not overflow
          // a rapidjson::SizeType (unsinged int).
          push_array_index (index);
        }
      i = skip_whitespaces (sql_path, end_bracket_offset + 1);
      break;
    }
    case '.':
      i = skip_whitespaces (sql_path, i + 1);
      if (i == sql_path.length ())
        {
          er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
          return ER_JSON_INVALID_PATH;
        }
      switch (sql_path[i])
        {
        case '"':
        {
          size_t old_idx = i;
          if (!db_json_path_is_token_valid_quoted_object_key (sql_path, i))
        {
          er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
          return ER_JSON_INVALID_PATH;
        }
          push_object_key (sql_path.substr (old_idx, i - old_idx));
          break;
        }
        case '*':
          push_object_key_wildcard ();
          i = skip_whitespaces (sql_path, i + 1);
          break;
        default:
        {
          size_t old_idx = i;
          // unquoted object_keys
          if (!db_json_path_quote_and_validate_unquoted_object_key (sql_path, i))
        {
          er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
          return ER_JSON_INVALID_PATH;
        }
          push_object_key (sql_path.substr (old_idx, i - old_idx));
          break;
        }
        }
      break;

    case '*':
      // only ** wildcard is allowed in this case
      if (++i >= sql_path.length () || sql_path[i] != '*')
        {
          er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
          return ER_JSON_INVALID_PATH;
        }
      push_double_wildcard ();
      i = skip_whitespaces (sql_path, i + 1);
      if (i == sql_path.length ())
        {
          // ** wildcard requires suffix
          er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
          return ER_JSON_INVALID_PATH;
        }
      break;

    default:
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
      return ER_JSON_INVALID_PATH;
    }
    }
  return NO_ERROR;
}

int
db_json_split_path_by_delimiters (const std::string &path, const std::string &delim, bool allow_empty,
                  std::vector<std::string> &split_path)
{
  std::size_t start = 0;
  std::size_t end = path.find_first_of (delim, start);

  while (end != std::string::npos)
    {
      if (path[end] == '"')
    {
      std::size_t index_of_closing_quote = path.find_first_of ('"', end + 1);
      if (index_of_closing_quote == std::string::npos)
        {
          assert (false);
          split_path.clear ();
          er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
          return ER_JSON_INVALID_PATH;
          /* this should have been catched earlier */
        }
      else
        {
          split_path.push_back (path.substr (end + 1, index_of_closing_quote - end - 1));
          end = index_of_closing_quote;
          start = end + 1;
        }
    }
      // do not tokenize on escaped quotes
      else if (path[end] != '"' || ((end >= 1) && path[end - 1] != '\\'))
    {
      const std::string &substring = path.substr (start, end - start);
      if (!substring.empty () || allow_empty)
        {
          split_path.push_back (substring);
        }
      start = end + 1;
    }

      end = path.find_first_of (delim, end + 1);
    }

  const std::string &substring = path.substr (start, end);
  if (!substring.empty () || allow_empty)
    {
      split_path.push_back (substring);
    }

  std::size_t tokens_size = split_path.size ();
  for (std::size_t i = 0; i < tokens_size; i++)
    {
      unsigned long index;
      int error_code = db_json_path_is_token_valid_array_index (split_path[i], false, index);
      if (error_code != NO_ERROR)
    {
      // ignore error. We only need to decide whether to skip it in case it is not array_idx
      er_clear ();
      continue;
    }

      db_json_remove_leading_zeros_index (split_path[i]);
    }

  return NO_ERROR;
}

JSON_PATH::MATCH_RESULT
JSON_PATH::match_pattern (const JSON_PATH &pattern, const JSON_PATH::token_containter_type::const_iterator &it1,
              const JSON_PATH &path, const JSON_PATH::token_containter_type::const_iterator &it2)
{
  if (it1 == pattern.m_path_tokens.end () && it2 == path.m_path_tokens.end ())
    {
      return FULL_MATCH;
    }

  if (it1 == pattern.m_path_tokens.end ())
    {
      return PREFIX_MATCH;
    }

  if (it2 == path.m_path_tokens.end ())
    {
      // note that in case of double wildcard we have guaranteed a token after it
      return NO_MATCH;
    }

  if (it1->m_type == PATH_TOKEN::double_wildcard)
    {
      // for "**" wildcard we try to match the remaining pattern against each suffix of the path
      MATCH_RESULT advance_pattern = match_pattern (pattern, it1 + 1, path, it2);
      if (advance_pattern == FULL_MATCH)
    {
      // return early if we have a full result
      return advance_pattern;
    }

      MATCH_RESULT advance_path = match_pattern (pattern, it1, path, it2 + 1);
      if (advance_path == FULL_MATCH)
    {
      return advance_path;
    }
      return (advance_pattern == PREFIX_MATCH || advance_path == PREFIX_MATCH) ? PREFIX_MATCH : NO_MATCH;
    }

  return !PATH_TOKEN::match_pattern (*it1, *it2) ? NO_MATCH : match_pattern (pattern, it1 + 1, path, it2 + 1);
}

JSON_PATH::MATCH_RESULT
JSON_PATH::match_pattern (const JSON_PATH &pattern, const JSON_PATH &path)
{
  assert (!path.contains_wildcard ());

  return match_pattern (pattern, pattern.m_path_tokens.begin (), path,  path.m_path_tokens.begin ());
}

/*
 * db_json_path_unquote_object_keys () - Unquote, when possible, object_keys of the json_path
 *
 * return                  : ER_JSON_INVALID_PATH if a validation error occured
 * sql_path (in/out)       : path
 */
int
db_json_path_unquote_object_keys (std::string &sql_path)
{
  // todo: rewrite as json_path.dump () + unquoting the object_keys
  std::vector<std::string> tokens;
  int error_code = db_json_split_path_by_delimiters (sql_path, ".[", false, tokens);
  if (error_code != NO_ERROR)
    {
      ASSERT_ERROR ();
      return error_code;
    }
  std::string res = "$";

  assert (!tokens.empty () && tokens[0] == "$");
  for (std::size_t i = 1; i < tokens.size(); ++i)
    {
      if (tokens[i][0] == '"')
    {
      res += ".";
      std::string unquoted = tokens[i].substr (1, tokens[i].length () - 2);
      std::size_t start = 0;

      if (db_json_path_is_token_valid_unquoted_object_key (unquoted, start) && start >= unquoted.length ())
        {
          res.append (unquoted);
        }
      else
        {
          res += tokens[i];
        }
    }
      else
    {
      res += "[";
      res += tokens[i];
    }
    }

  sql_path = std::move (res);
  return NO_ERROR;
}

/*
 * db_json_remove_leading_zeros_index () - Erase leading zeros from sql path index
 *
 * index (in)                : current object
 * example: $[000123] -> $[123]
 */
static void
db_json_remove_leading_zeros_index (std::string &index)
{
  // trim leading zeros
  auto first_non_zero = std::find_if_not (index.begin (), index.end (), db_json_iszero);
  index.erase (index.begin (), first_non_zero);

  if (index.empty ())
    {
      index = "0";
    }
}

PATH_TOKEN::PATH_TOKEN ()
  : m_type (array_index)
{

}

PATH_TOKEN::PATH_TOKEN (token_type type, unsigned long array_idx)
  : m_type (type)
  , m_array_idx (array_idx)
{

}

PATH_TOKEN::PATH_TOKEN (token_type type, std::string &&s)
  : m_type (type)
  , m_object_key (std::move (s))
{

}

const std::string &
PATH_TOKEN::get_object_key () const
{
  assert (m_type == object_key);

  return m_object_key;
}

unsigned long
PATH_TOKEN::get_array_index () const
{
  assert (m_type == array_index);

  return m_array_idx;
}

bool
PATH_TOKEN::is_wildcard () const
{
  return m_type == object_key_wildcard || m_type == array_index_wildcard || m_type == double_wildcard;
}

bool
PATH_TOKEN::match_pattern (const PATH_TOKEN &matcher, const PATH_TOKEN &matchee)
{
  assert (!matchee.is_wildcard ());

  switch (matcher.m_type)
    {
    case double_wildcard:
      return matchee.m_type == object_key || matchee.m_type == array_index;
    case object_key_wildcard:
      return matchee.m_type == object_key;
    case array_index_wildcard:
      return matchee.m_type == array_index;
    case object_key:
      return matchee.m_type == object_key && matcher.get_object_key () == matchee.get_object_key ();
    case array_index:
      return matchee.m_type == array_index && matcher.get_array_index () == matchee.get_array_index ();
    default:
      return false;
    }
}

void
JSON_PATH::push_array_index (unsigned long idx)
{
  m_path_tokens.emplace_back (PATH_TOKEN::token_type::array_index, idx);
}

void
JSON_PATH::push_array_index_wildcard ()
{
  m_path_tokens.emplace_back (PATH_TOKEN::token_type::array_index_wildcard, std::string ("*"));
}

void
JSON_PATH::push_object_key (std::string &&object_key)
{
  m_path_tokens.emplace_back (PATH_TOKEN::token_type::object_key, std::move (object_key));
}

void
JSON_PATH::push_object_key_wildcard ()
{
  m_path_tokens.emplace_back (PATH_TOKEN::token_type::object_key_wildcard, std::string ("*"));
}

void
JSON_PATH::push_double_wildcard ()
{
  m_path_tokens.emplace_back (PATH_TOKEN::token_type::double_wildcard, std::string ("**"));
}

void
JSON_PATH::pop ()
{
  m_path_tokens.pop_back ();
}

bool
JSON_PATH::contains_wildcard () const
{
  for (const PATH_TOKEN &tkn : m_path_tokens)
    {
      if (tkn.is_wildcard ())
    {
      return true;
    }
    }
  return false;
}

std::string
JSON_PATH::dump_json_path () const
{
  std::string res = "$";

  for (const auto &tkn : m_path_tokens)
    {
      switch (tkn.m_type)
    {
    case PATH_TOKEN::array_index:
      res += '[';
      res += std::to_string (tkn.get_array_index ());
      res += ']';
      break;
    case PATH_TOKEN::array_index_wildcard:
      res += "[*]";
      break;
    case PATH_TOKEN::object_key:
      res += '.';
      res += tkn.get_object_key ();
      break;
    case PATH_TOKEN::object_key_wildcard:
      res += ".*";
      break;
    case PATH_TOKEN::double_wildcard:
      res += "**";
      break;
    case PATH_TOKEN::array_end_index:
      // this case is valid and possible in case of ER_JSON_PATH_DOES_NOT_EXIST
      // we don't have the JSON in this context and cannot replace '-' with last index
      // for json_pointer -> json_path conversion so we leave empty suffix
      break;
    default:
      assert (false);
      break;
    }
    }

  return res;
}

void
JSON_PATH::set (JSON_DOC &jd, const JSON_VALUE &jv) const
{
  set (db_json_doc_to_value (jd), jv, jd.GetAllocator ());
}

/*
 * set () - Create or replace a value at path in the document
 *
 * jd (in) - document we insert in
 * jv (in) - value to be inserted
 * allocator
 * return : found value at path
 *
 * Our implementation does not follow the JSON Pointer https://tools.ietf.org/html/rfc6901#section-4 standard fully
 * We normalize json_pointers to json_paths and resolve token types independently of the document that gets operated
 * by the normalized path.
 * Therefore, we cannot traverse the doc contextually as described in the rfc e.g. both '{"0":10}' an '[10]' to provide
 * same results for '/1' json_pointer.
 */
void
JSON_PATH::set (JSON_VALUE &jd, const JSON_VALUE &jv, JSON_PRIVATE_MEMPOOL &allocator) const
{
  JSON_VALUE *val = &jd;
  for (const PATH_TOKEN &tkn : m_path_tokens)
    {
      switch (tkn.m_type)
    {
    case PATH_TOKEN::token_type::array_index:
    case PATH_TOKEN::token_type::array_end_index:
      if (!val->IsArray ())
        {
          val->SetArray ();
        }
      break;
    case PATH_TOKEN::token_type::object_key:
      if (!val->IsObject ())
        {
          val->SetObject ();
        }
      break;
    case PATH_TOKEN::token_type::array_index_wildcard:
    case PATH_TOKEN::token_type::object_key_wildcard:
    case PATH_TOKEN::token_type::double_wildcard:
      // error? unexpected set - wildcards not allowed for set
      assert (false);
      return;
    }

      if (val->IsArray ())
    {
      JSON_VALUE::Array arr = val->GetArray ();
      if (tkn.m_type == PATH_TOKEN::token_type::array_end_index)
        {
          // insert dummy
          arr.PushBack (JSON_VALUE ().SetNull (), allocator);
          val = &val->GetArray ()[val->GetArray ().Size () - 1];
        }
      else
        {
          rapidjson::SizeType idx = (rapidjson::SizeType) tkn.get_array_index ();
          while (idx >= arr.Size ())
        {
          arr.PushBack (JSON_VALUE ().SetNull (), allocator);
        }
          val = &val->GetArray ()[idx];
        }
    }
      else if (val->IsObject ())
    {
      std::string encoded_key = db_json_json_string_as_utf8 (tkn.get_object_key ());
      JSON_VALUE::MemberIterator m = val->FindMember (encoded_key.c_str ());
      if (m == val->MemberEnd ())
        {
          // insert dummy
          unsigned int len = (rapidjson::SizeType) encoded_key.length ();
          val->AddMember (JSON_VALUE (encoded_key.c_str (), len, allocator), JSON_VALUE ().SetNull (), allocator);

          val = & (--val->MemberEnd ())->value; // Assume AddMember() appends at the end
        }
      else
        {
          val = &m->value;
        }
    }
    }

  val->CopyFrom (jv, allocator);
}

JSON_VALUE *
JSON_PATH::get (JSON_DOC &jd) const
{
  return const_cast<JSON_VALUE *> (get (const_cast<const JSON_DOC &> (jd)));
}

/*
 * get () - Walk a doc following a path and retrive the value pointed at
 *
 * jd (in)
 * return : found value at path
 */
const JSON_VALUE *
JSON_PATH::get (const JSON_DOC &jd) const
{
  const JSON_VALUE *val = &db_json_doc_to_value (jd);
  for (const PATH_TOKEN &tkn : m_path_tokens)
    {
      if (val->IsArray ())
    {
      if (tkn.m_type != PATH_TOKEN::token_type::array_index)
        {
          return NULL;
        }

      unsigned idx = tkn.get_array_index ();
      if (idx >= val->GetArray ().Size ())
        {
          return NULL;
        }

      val = &val->GetArray ()[idx];
    }
      else if (val->IsObject ())
    {
      if (tkn.m_type != PATH_TOKEN::token_type::object_key)
        {
          return NULL;
        }
      std::string encoded_key = db_json_json_string_as_utf8 (tkn.get_object_key ());
      JSON_VALUE::ConstMemberIterator m = val->FindMember (encoded_key.c_str ());
      if (m == val->MemberEnd ())
        {
          return NULL;
        }
      val = &m->value;
    }
      else
    {
      return NULL;
    }
    }
  return val;
}

void
JSON_PATH::extract_from_subtree (const JSON_PATH &path, size_t tkn_array_offset, const JSON_VALUE &jv,
                 std::unordered_set<const JSON_VALUE *> &vals_hash_set,
                 std::vector<const JSON_VALUE *> &vals)
{
  if (tkn_array_offset == path.get_token_count ())
    {
      // No suffix remaining -> collect match
      // Note: some nodes of the tree are encountered multiple times (only during double wildcards)
      // therefore the use of unordered_set
      if (vals_hash_set.find (&jv) == vals_hash_set.end ())
    {
      vals_hash_set.insert (&jv);
      vals.push_back (&jv);
    }
      return;
    }

  const PATH_TOKEN &crt_tkn = path.m_path_tokens[tkn_array_offset];
  if (jv.IsArray ())
    {
      switch (crt_tkn.m_type)
    {
    case PATH_TOKEN::token_type::array_index:
    {
      unsigned idx = crt_tkn.get_array_index ();
      if (idx >= jv.GetArray ().Size ())
        {
          return;
        }
      extract_from_subtree (path, tkn_array_offset + 1, jv.GetArray ()[idx], vals_hash_set, vals);
      return;
    }
    case PATH_TOKEN::token_type::array_index_wildcard:
      for (rapidjson::SizeType i = 0; i < jv.GetArray ().Size (); ++i)
        {
          extract_from_subtree (path, tkn_array_offset + 1, jv.GetArray ()[i], vals_hash_set, vals);
        }
      return;
    case PATH_TOKEN::token_type::double_wildcard:
      // Advance token_array_offset
      extract_from_subtree (path, tkn_array_offset + 1, jv, vals_hash_set, vals);
      for (rapidjson::SizeType i = 0; i < jv.GetArray ().Size (); ++i)
        {
          // Advance in tree, keep current token_array_offset
          extract_from_subtree (path, tkn_array_offset, jv.GetArray ()[i], vals_hash_set, vals);
        }
      return;
    default:
      return;
    }
    }
  else if (jv.IsObject ())
    {
      switch (crt_tkn.m_type)
    {
    case PATH_TOKEN::token_type::object_key:
    {
      std::string encoded_key = db_json_json_string_as_utf8 (crt_tkn.get_object_key ());
      JSON_VALUE::ConstMemberIterator m = jv.FindMember (encoded_key.c_str ());
      if (m == jv.MemberEnd ())
        {
          return;
        }
      extract_from_subtree (path, tkn_array_offset + 1, m->value, vals_hash_set, vals);
      return;
    }
    case PATH_TOKEN::token_type::object_key_wildcard:
      for (JSON_VALUE::ConstMemberIterator m = jv.MemberBegin (); m != jv.MemberEnd (); ++m)
        {
          extract_from_subtree (path, tkn_array_offset + 1, m->value, vals_hash_set, vals);
        }
      return;
    case PATH_TOKEN::token_type::double_wildcard:
      // Advance token_array_offset
      extract_from_subtree (path, tkn_array_offset + 1, jv, vals_hash_set, vals);
      for (JSON_VALUE::ConstMemberIterator m = jv.MemberBegin (); m != jv.MemberEnd (); ++m)
        {
          // Advance in tree, keep current token_array_offset
          extract_from_subtree (path, tkn_array_offset, m->value, vals_hash_set, vals);
        }
      return;
    default:
      return;
    }
    }
  // Json scalars are ignored if there is a remaining suffix
}

std::vector<const JSON_VALUE *>
JSON_PATH::extract (const JSON_DOC &jd) const
{
  std::unordered_set<const JSON_VALUE *> vals_hash_set;
  std::vector<const JSON_VALUE *> res;

  extract_from_subtree (*this, 0, db_json_doc_to_value (jd), vals_hash_set, res);

  return res;
}

bool
JSON_PATH::erase (JSON_DOC &jd) const
{
  if (get_token_count () == 0)
    {
      return false;
    }

  JSON_VALUE *value = get_parent ().get (jd);
  if (value == nullptr)
    {
      return false;
    }

  const PATH_TOKEN &tkn = m_path_tokens.back ();

  if (value->IsArray ())
    {
      if (!is_last_array_index_less_than (value->GetArray ().Size ()))
    {
      return false;
    }
      value->Erase (value->Begin () + tkn.get_array_index ());
      return true;
    }
  else if (value->IsObject ())
    {
      if (tkn.m_type != PATH_TOKEN::object_key)
    {
      return false;
    }
      std::string encoded_key = db_json_json_string_as_utf8 (tkn.get_object_key ());
      return value->EraseMember (encoded_key.c_str ());
    }

  return false;
}

const PATH_TOKEN *
JSON_PATH::get_last_token () const
{
  return get_token_count () > 0 ? &m_path_tokens[get_token_count () - 1] : NULL;
}

size_t
JSON_PATH::get_token_count () const
{
  return m_path_tokens.size ();
}

bool
JSON_PATH::is_root_path () const
{
  return get_token_count () == 0;
}

JSON_PATH
JSON_PATH::get_parent () const
{
  if (get_token_count () == 0)
    {
      // this should not happen
      assert (false);
      JSON_PATH parent;
      return parent;
    }
  else
    {
      // todo: improve getting a slice of the m_path_tokens vector
      JSON_PATH parent (*this);
      parent.pop ();
      return parent;
    }
}

bool
JSON_PATH::is_last_array_index_less_than (size_t size) const
{
  const PATH_TOKEN *last_token = get_last_token ();
  assert (last_token != NULL);

  return last_token->m_type == PATH_TOKEN::array_index && last_token->get_array_index () < size;
}

bool
JSON_PATH::is_last_token_array_index_zero () const
{
  return is_last_array_index_less_than (1);
}

bool
JSON_PATH::points_to_array_cell () const
{
  const PATH_TOKEN *last_token = get_last_token ();
  return (last_token != NULL && (last_token->m_type == PATH_TOKEN::array_index
                 || (last_token->m_type == PATH_TOKEN::array_end_index)));
}

bool
JSON_PATH::parent_exists (JSON_DOC &jd) const
{
  if (get_token_count () == 0)
    {
      return false;
    }

  if (get_parent ().get (jd) != NULL)
    {
      return true;
    }

  return false;
}

/*
 * init ()
 *
 * path (in)
 * An sql_path is normalized to rapidjson standard path
 * Example: $[0]."name1".name2[2] -> /0/name1/name2/2
 */
int
JSON_PATH::parse (const char *path)
{
  std::string sql_path_string (path);
  JSON_PATH_TYPE json_path_type = db_json_get_path_type (sql_path_string);

  if (json_path_type == JSON_PATH_TYPE::JSON_PATH_POINTER)
    {
      // path is not SQL path format; consider it JSON pointer.
      int error_code = from_json_pointer (sql_path_string);
      if (error_code != NO_ERROR)
    {
      ASSERT_ERROR ();
    }
      return error_code;
    }

  int error_code = validate_and_create_from_json_path (sql_path_string);
  if (error_code != NO_ERROR)
    {
      ASSERT_ERROR ();
    }
  return error_code;
}

int
JSON_PATH::from_json_pointer (const std::string &pointer_path)
{
  typedef rapidjson::GenericPointer<JSON_VALUE>::Token TOKEN;
  static const rapidjson::SizeType kPointerInvalidIndex = rapidjson::kPointerInvalidIndex;

  typedef rapidjson::GenericPointer<JSON_VALUE> JSON_POINTER;

  JSON_POINTER jp (pointer_path.c_str ());
  if (!jp.IsValid ())
    {
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
      return ER_JSON_INVALID_PATH;
    }

  size_t tkn_cnt = jp.GetTokenCount ();
  const TOKEN *tokens = jp.GetTokens ();

  // convert rapidjson's tokens to our tokens:
  for (size_t i = 0; i < tkn_cnt; ++i)
    {
      const TOKEN &rapid_token = tokens[i];

      if (rapid_token.index != kPointerInvalidIndex)
    {
      // array_index
      push_array_index (rapid_token.index);
    }
      else if (rapid_token.length == 1 && rapid_token.name[0] == '-' )
    {
      // '-' special idx token
      m_path_tokens.emplace_back (PATH_TOKEN::token_type::array_end_index, "-");
    }
      else
    {
      // object_key
      char *escaped;
      size_t escaped_size;
      db_string_escape_str (rapid_token.name, rapid_token.length, &escaped, &escaped_size);

      push_object_key (escaped);
      db_private_free (NULL, escaped);
    }
    }

  return NO_ERROR;
}