cubrid-doxygen/db__json__path_8cpp_source.html

 /*
  * Copyright 2008 Search Solution Corporation
  * Copyright 2016 CUBRID Corporation
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
  *  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  *  Unless required by applicable law or agreed to in writing, software
  *  distributed under the License is distributed on an "AS IS" BASIS,
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  *  See the License for the specific language governing permissions and
  *  limitations under the License.
  *
  */

 #include "db_json_path.hpp"

 #include "db_json.hpp"
 #include "db_rapidjson.hpp"
 #include "memory_alloc.h"
 #include "string_opfunc.h"
 #include "system_parameter.h"

 #include <algorithm>
 #include <cctype>
 #include <cerrno>
 #include <cstdlib>
 #include <limits>
 #include <string>
 #include <unordered_set>
 #include <vector>

 enum class JSON_PATH_TYPE
 {
   JSON_PATH_SQL_JSON,
   JSON_PATH_POINTER
 };

 static void db_json_trim_leading_spaces (std::string &path_string);
 static JSON_PATH_TYPE db_json_get_path_type (std::string &path_string);
 static bool db_json_isspace (const unsigned char &ch);
 static std::size_t skip_whitespaces (const std::string &path, std::size_t token_begin);
 static int db_json_path_is_token_valid_array_index (const std::string &str, bool allow_wildcards, unsigned long &index,
     std::size_t start = 0, std::size_t end = 0);
 static bool db_json_path_is_token_valid_quoted_object_key (const std::string &path, std::size_t &token_begin);
 static bool db_json_path_quote_and_validate_unquoted_object_key (std::string &path, std::size_t &token_begin);
 static bool db_json_path_is_token_valid_unquoted_object_key (const std::string &path, std::size_t &token_begin);
 static bool db_json_path_is_valid_identifier_start_char (unsigned char ch);
 static bool db_json_path_is_valid_identifier_char (unsigned char ch);
 static void db_json_remove_leading_zeros_index (std::string &index);
 static bool db_json_iszero (const unsigned char &ch);

 static bool
 db_json_iszero (const unsigned char &ch)
 {
   return ch == '0';
 }

 /*
  * db_json_path_is_token_valid_quoted_object_key () - Check if a quoted object_key is valid
  *
  * return               : true/false
  * path (in)            : path to be checked
  * token_begin (in/out) : beginning offset of the token, is replaced with beginning of the next token or path.length ()
  */
 static bool
 db_json_path_is_token_valid_quoted_object_key (const std::string &path, std::size_t &token_begin)
 {
   std::size_t i = token_begin + 1;
   bool unescaped_backslash = false;
   // stop at unescaped '"'; note that there should be an odd nr of backslashes before '"' for it to be escaped
   for (; i < path.length () && (path[i] != '"' || unescaped_backslash); ++i)
     {
       if (path[i] == '\\')
     {
       unescaped_backslash = !unescaped_backslash;
     }
       else
     {
       unescaped_backslash = false;
     }
     }

   if (i == path.length ())
     {
       return false;
     }

   token_begin = skip_whitespaces (path, i + 1);
   return true;
 }

 /*
  * db_json_path_is_token_valid_unquoted_object_key () - Validate and quote an object_key
  *
  * return               : validation result
  * path (in/out)        : path to be checked
  * token_begin (in/out) : is replaced with beginning of the next token or path.length ()
  */
 static bool
 db_json_path_quote_and_validate_unquoted_object_key (std::string &path, std::size_t &token_begin)
 {
   std::size_t i = token_begin;
   bool validation_result = db_json_path_is_token_valid_unquoted_object_key (path, i);
   if (validation_result)
     {
       // we normalize object_keys by quoting them - e.g. $.objectkey we represent as $."objectkey"
       path.insert (token_begin, "\"");
       path.insert (i + 1, "\"");

       token_begin = skip_whitespaces (path, i + 2 /* we inserted 2 quotation marks */);
     }
   return validation_result;
 }

 static bool
 db_json_path_is_valid_identifier_start_char (unsigned char ch)
 {
   // todo: As per SQL Standard accept Ecmascript Identifier start:
   // \UnicodeEscapedSequence
   // Any char in Unicode categories: Titlecase letter (Lt), Modifier letter (Lm), Other letter (Lo), Letter number (Nl)

   return ch == '_' || std::isalpha (ch);
 }

 static bool
 db_json_path_is_valid_identifier_char (unsigned char ch)
 {
   // todo: As per SQL Standard accept Ecmascript Identifier:
   // \UnicodeEscapedSequence
   // Any char in Unicode categories: Connector punctuation (Pc), Non-spacing mark (Mn),
   // Combining spacing mark (Mc), Decimal number (Nd), Titlecase letter (Lt), Modifier letter (Lm), Other letter (Lo)
   // Letter number (Nl)

   return ch == '_' || std::isalnum (ch);
 }

 /*
  * db_json_path_is_token_valid_unquoted_object_key () - Check if an unquoted object_key is valid
  *
  * return                  : true/false
  * path (in)               : path to be checked
  * token_begin (in/out)    : beginning offset of the token, is replaced with first char's position
  *                           outside of the current valid token
  */
 static bool
 db_json_path_is_token_valid_unquoted_object_key (const std::string &path, std::size_t &token_begin)
 {
   if (path == "")
     {
       return false;
     }
   std::size_t i = token_begin;

   // todo: this needs change. SQL standard specifies that object key format must obey
   // JavaScript rules of an Identifier (6.10.1).
   // Besides alphanumerics, object keys can be valid ECMAScript identifiers as defined in
   // http://www.ecma-international.org/ecma-262/5.1/#sec-7.6

   // Defined syntax (approx.):
   // IdentifierName -> IdentifierStart | (IdentifierName IdentifierPart)
   // IdentifierStart -> $ ( note: this is the ONLY specified forbidden by SQL Standard) | _ | \UnicodeEscapeSequence
   // IdentifierPart -> IdentifierStart | InicodeCombinigMark | UnicodeDigit | UnicodeConnectorPunctuation | <ZWNJ>
   // | <ZWJ>

   if (i < path.length () && !db_json_path_is_valid_identifier_start_char (static_cast<unsigned char> (path[i])))
     {
       return false;
     }

   ++i;
   for (; i < path.length () && db_json_path_is_valid_identifier_char (static_cast<unsigned char> (path[i])); ++i);

   token_begin = i;

   return true;
 }

 /*
  * db_json_path_is_token_valid_array_index () - verify if token is a valid array index. token can be a substring of
  *                                              first argument (by default the entire argument).
  *
  * return          : no error if token can be converted successfully to an integer smaller than json_max_array_idx
  *                   variable
  * str (in)        : token or the string that token belong to
  * allow_wildcards : whether json_path wildcards are allowed
  * index (out)     : created index token
  * start (in)      : start of token; default is start of string
  * end (in)        : end of token; default is end of string; 0 is considered default value
  */
 static int
 db_json_path_is_token_valid_array_index (const std::string &str, bool allow_wildcards,
     unsigned long &index, std::size_t start, std::size_t end)
 {
   // json pointer will corespond the symbol '-' to JSON_ARRAY length
   // so if we have the json {"A":[1,2,3]} and the path /A/-
   // this will point to the 4th element of the array (zero indexed)
   if (str == "-")
     {
       return NO_ERROR;
     }

   if (end == 0)
     {
       // default is end of string
       end = str.length ();
     }

   if (start == end)
     {
       er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
       return ER_JSON_INVALID_PATH;
     }

   std::size_t last_non_space = end - 1;
   for (; last_non_space > start && str[last_non_space] == ' '; --last_non_space);
   if (allow_wildcards && start == last_non_space && str[start] == '*')
     {
       return NO_ERROR;
     }

   // Remaining invalid cases are: 1. Non-digits are present
   //                              2. Index overflows Rapidjson's index representation type

   // we need to check for non-digits since strtoul simply returns 0 in case conversion
   // can not be made
   for (auto it = str.cbegin () + start; it < str.cbegin () + last_non_space + 1; ++it)
     {
       if (!std::isdigit (static_cast<unsigned char> (*it)))
     {
       er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
       return ER_JSON_INVALID_PATH;
     }
     }

   char *end_str;
   index = std::strtoul (str.c_str () + start, &end_str, 10);
   if (errno == ERANGE)
     {
       errno = 0;
       er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_ARRAY_INDEX_TOO_LARGE, 0);
       return ER_JSON_ARRAY_INDEX_TOO_LARGE;
     }

   if (index > (unsigned long) prm_get_integer_value (PRM_ID_JSON_MAX_ARRAY_IDX))
     {
       er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_ARRAY_INDEX_TOO_LARGE, 0);
       return ER_JSON_ARRAY_INDEX_TOO_LARGE;
     }

   // this is a valid array index
   return NO_ERROR;
 }

 /*
  * skip_whitespaces  () - Advance offset to first non_space
  *
  * return              : offset of first non_space character
  * sql_path (in)       : path
  * pos (in)            : starting position offset
  */
 static std::size_t
 skip_whitespaces (const std::string &path, std::size_t pos)
 {
   for (; pos < path.length () && path[pos] == ' '; ++pos);
   return pos;
 }

 static bool
 db_json_isspace (const unsigned char &ch)
 {
   return std::isspace (ch) != 0;
 }

 static void
 db_json_trim_leading_spaces (std::string &path_string)
 {
   // trim leading spaces
   auto first_non_space = std::find_if_not (path_string.begin (), path_string.end (), db_json_isspace);
   path_string.erase (path_string.begin (), first_non_space);
 }

 static JSON_PATH_TYPE
 db_json_get_path_type (std::string &path_string)
 {
   db_json_trim_leading_spaces (path_string);

   if (path_string.empty () || path_string[0] != '$')
     {
       return JSON_PATH_TYPE::JSON_PATH_POINTER;
     }
   else
     {
       return JSON_PATH_TYPE::JSON_PATH_SQL_JSON;
     }
 }

 /*
  * validate_and_create_from_json_path () - Check if a given path is a SQL valid path
  *
  * return                  : ER_JSON_INVALID_PATH if path is invalid
  * sql_path (in/out)       : path to be checked
  */
 int
 JSON_PATH::validate_and_create_from_json_path (std::string &sql_path)
 {
   // skip leading white spaces
   db_json_trim_leading_spaces (sql_path);
   if (sql_path.empty ())
     {
       // empty
       er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
       return ER_JSON_INVALID_PATH;
     }

   if (sql_path[0] != '$')
     {
       // first character should always be '$'
       er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
       return ER_JSON_INVALID_PATH;
     }
   // start parsing path string by skipping dollar character
   std::size_t i = skip_whitespaces (sql_path, 1);

   while (i < sql_path.length ())
     {
       // to begin a next token we have only 3 possibilities:
       // with dot we start an object name
       // with bracket we start an index
       // with * we have the beginning of a '**' wildcard
       switch (sql_path[i])
     {
     case '[':
     {
       std::size_t end_bracket_offset;
       i = skip_whitespaces (sql_path, i + 1);

       end_bracket_offset = sql_path.find_first_of (']', i);
       if (end_bracket_offset == std::string::npos)
         {
           er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
           return ER_JSON_INVALID_PATH;
         }
       unsigned long index;
       int error_code = db_json_path_is_token_valid_array_index (sql_path, true, index, i, end_bracket_offset);
       if (error_code != NO_ERROR)
         {
           ASSERT_ERROR ();
           return error_code;
         }

       // todo check if it is array_index or array_index_wildcard
       if (sql_path[i] == '*')
         {
           push_array_index_wildcard ();
         }
       else
         {
           // note that db_json_path_is_token_valid_array_index () checks the index to not overflow
           // a rapidjson::SizeType (unsinged int).
           push_array_index (index);
         }
       i = skip_whitespaces (sql_path, end_bracket_offset + 1);
       break;
     }
     case '.':
       i = skip_whitespaces (sql_path, i + 1);
       if (i == sql_path.length ())
         {
           er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
           return ER_JSON_INVALID_PATH;
         }
       switch (sql_path[i])
         {
         case '"':
         {
           size_t old_idx = i;
           if (!db_json_path_is_token_valid_quoted_object_key (sql_path, i))
         {
           er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
           return ER_JSON_INVALID_PATH;
         }
           push_object_key (sql_path.substr (old_idx, i - old_idx));
           break;
         }
         case '*':
           push_object_key_wildcard ();
           i = skip_whitespaces (sql_path, i + 1);
           break;
         default:
         {
           size_t old_idx = i;
           // unquoted object_keys
           if (!db_json_path_quote_and_validate_unquoted_object_key (sql_path, i))
         {
           er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
           return ER_JSON_INVALID_PATH;
         }
           push_object_key (sql_path.substr (old_idx, i - old_idx));
           break;
         }
         }
       break;

     case '*':
       // only ** wildcard is allowed in this case
       if (++i >= sql_path.length () || sql_path[i] != '*')
         {
           er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
           return ER_JSON_INVALID_PATH;
         }
       push_double_wildcard ();
       i = skip_whitespaces (sql_path, i + 1);
       if (i == sql_path.length ())
         {
           // ** wildcard requires suffix
           er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
           return ER_JSON_INVALID_PATH;
         }
       break;

     default:
       er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
       return ER_JSON_INVALID_PATH;
     }
     }
   return NO_ERROR;
 }

 int
 db_json_split_path_by_delimiters (const std::string &path, const std::string &delim, bool allow_empty,
                   std::vector<std::string> &split_path)
 {
   std::size_t start = 0;
   std::size_t end = path.find_first_of (delim, start);

   while (end != std::string::npos)
     {
       if (path[end] == '"')
     {
       std::size_t index_of_closing_quote = path.find_first_of ('"', end + 1);
       if (index_of_closing_quote == std::string::npos)
         {
           assert (false);
           split_path.clear ();
           er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
           return ER_JSON_INVALID_PATH;
           /* this should have been catched earlier */
         }
       else
         {
           split_path.push_back (path.substr (end + 1, index_of_closing_quote - end - 1));
           end = index_of_closing_quote;
           start = end + 1;
         }
     }
       // do not tokenize on escaped quotes
       else if (path[end] != '"' || ((end >= 1) && path[end - 1] != '\\'))
     {
       const std::string &substring = path.substr (start, end - start);
       if (!substring.empty () || allow_empty)
         {
           split_path.push_back (substring);
         }
       start = end + 1;
     }

       end = path.find_first_of (delim, end + 1);
     }

   const std::string &substring = path.substr (start, end);
   if (!substring.empty () || allow_empty)
     {
       split_path.push_back (substring);
     }

   std::size_t tokens_size = split_path.size ();
   for (std::size_t i = 0; i < tokens_size; i++)
     {
       unsigned long index;
       int error_code = db_json_path_is_token_valid_array_index (split_path[i], false, index);
       if (error_code != NO_ERROR)
     {
       // ignore error. We only need to decide whether to skip it in case it is not array_idx
       er_clear ();
       continue;
     }

       db_json_remove_leading_zeros_index (split_path[i]);
     }

   return NO_ERROR;
 }

 JSON_PATH::MATCH_RESULT
 JSON_PATH::match_pattern (const JSON_PATH &pattern, const JSON_PATH::token_containter_type::const_iterator &it1,
               const JSON_PATH &path, const JSON_PATH::token_containter_type::const_iterator &it2)
 {
   if (it1 == pattern.m_path_tokens.end () && it2 == path.m_path_tokens.end ())
     {
       return FULL_MATCH;
     }

   if (it1 == pattern.m_path_tokens.end ())
     {
       return PREFIX_MATCH;
     }

   if (it2 == path.m_path_tokens.end ())
     {
       // note that in case of double wildcard we have guaranteed a token after it
       return NO_MATCH;
     }

   if (it1->m_type == PATH_TOKEN::double_wildcard)
     {
       // for "**" wildcard we try to match the remaining pattern against each suffix of the path
       MATCH_RESULT advance_pattern = match_pattern (pattern, it1 + 1, path, it2);
       if (advance_pattern == FULL_MATCH)
     {
       // return early if we have a full result
       return advance_pattern;
     }

       MATCH_RESULT advance_path = match_pattern (pattern, it1, path, it2 + 1);
       if (advance_path == FULL_MATCH)
     {
       return advance_path;
     }
       return (advance_pattern == PREFIX_MATCH || advance_path == PREFIX_MATCH) ? PREFIX_MATCH : NO_MATCH;
     }

   return !PATH_TOKEN::match_pattern (*it1, *it2) ? NO_MATCH : match_pattern (pattern, it1 + 1, path, it2 + 1);
 }

 JSON_PATH::MATCH_RESULT
 JSON_PATH::match_pattern (const JSON_PATH &pattern, const JSON_PATH &path)
 {
   assert (!path.contains_wildcard ());

   return match_pattern (pattern, pattern.m_path_tokens.begin (), path,  path.m_path_tokens.begin ());
 }

 /*
  * db_json_path_unquote_object_keys () - Unquote, when possible, object_keys of the json_path
  *
  * return                  : ER_JSON_INVALID_PATH if a validation error occured
  * sql_path (in/out)       : path
  */
 int
 db_json_path_unquote_object_keys (std::string &sql_path)
 {
   // todo: rewrite as json_path.dump () + unquoting the object_keys
   std::vector<std::string> tokens;
   int error_code = db_json_split_path_by_delimiters (sql_path, ".[", false, tokens);
   if (error_code != NO_ERROR)
     {
       ASSERT_ERROR ();
       return error_code;
     }
   std::string res = "$";

   assert (!tokens.empty () && tokens[0] == "$");
   for (std::size_t i = 1; i < tokens.size(); ++i)
     {
       if (tokens[i][0] == '"')
     {
       res += ".";
       std::string unquoted = tokens[i].substr (1, tokens[i].length () - 2);
       std::size_t start = 0;

       if (db_json_path_is_token_valid_unquoted_object_key (unquoted, start) && start >= unquoted.length ())
         {
           res.append (unquoted);
         }
       else
         {
           res += tokens[i];
         }
     }
       else
     {
       res += "[";
       res += tokens[i];
     }
     }

   sql_path = std::move (res);
   return NO_ERROR;
 }

 /*
  * db_json_remove_leading_zeros_index () - Erase leading zeros from sql path index
  *
  * index (in)                : current object
  * example: $[000123] -> $[123]
  */
 static void
 db_json_remove_leading_zeros_index (std::string &index)
 {
   // trim leading zeros
   auto first_non_zero = std::find_if_not (index.begin (), index.end (), db_json_iszero);
   index.erase (index.begin (), first_non_zero);

   if (index.empty ())
     {
       index = "0";
     }
 }

 PATH_TOKEN::PATH_TOKEN ()
   : m_type (array_index)
 {

 }

 PATH_TOKEN::PATH_TOKEN (token_type type, unsigned long array_idx)
   : m_type (type)
   , m_array_idx (array_idx)
 {

 }

 PATH_TOKEN::PATH_TOKEN (token_type type, std::string &&s)
   : m_type (type)
   , m_object_key (std::move (s))
 {

 }

 const std::string &
 PATH_TOKEN::get_object_key () const
 {
   assert (m_type == object_key);

   return m_object_key;
 }

 unsigned long
 PATH_TOKEN::get_array_index () const
 {
   assert (m_type == array_index);

   return m_array_idx;
 }

 bool
 PATH_TOKEN::is_wildcard () const
 {
   return m_type == object_key_wildcard || m_type == array_index_wildcard || m_type == double_wildcard;
 }

 bool
 PATH_TOKEN::match_pattern (const PATH_TOKEN &matcher, const PATH_TOKEN &matchee)
 {
   assert (!matchee.is_wildcard ());

   switch (matcher.m_type)
     {
     case double_wildcard:
       return matchee.m_type == object_key || matchee.m_type == array_index;
     case object_key_wildcard:
       return matchee.m_type == object_key;
     case array_index_wildcard:
       return matchee.m_type == array_index;
     case object_key:
       return matchee.m_type == object_key && matcher.get_object_key () == matchee.get_object_key ();
     case array_index:
       return matchee.m_type == array_index && matcher.get_array_index () == matchee.get_array_index ();
     default:
       return false;
     }
 }

 void
 JSON_PATH::push_array_index (unsigned long idx)
 {
   m_path_tokens.emplace_back (PATH_TOKEN::token_type::array_index, idx);
 }

 void
 JSON_PATH::push_array_index_wildcard ()
 {
   m_path_tokens.emplace_back (PATH_TOKEN::token_type::array_index_wildcard, std::string ("*"));
 }

 void
 JSON_PATH::push_object_key (std::string &&object_key)
 {
   m_path_tokens.emplace_back (PATH_TOKEN::token_type::object_key, std::move (object_key));
 }

 void
 JSON_PATH::push_object_key_wildcard ()
 {
   m_path_tokens.emplace_back (PATH_TOKEN::token_type::object_key_wildcard, std::string ("*"));
 }

 void
 JSON_PATH::push_double_wildcard ()
 {
   m_path_tokens.emplace_back (PATH_TOKEN::token_type::double_wildcard, std::string ("**"));
 }

 void
 JSON_PATH::pop ()
 {
   m_path_tokens.pop_back ();
 }

 bool
 JSON_PATH::contains_wildcard () const
 {
   for (const PATH_TOKEN &tkn : m_path_tokens)
     {
       if (tkn.is_wildcard ())
     {
       return true;
     }
     }
   return false;
 }

 std::string
 JSON_PATH::dump_json_path () const
 {
   std::string res = "$";

   for (const auto &tkn : m_path_tokens)
     {
       switch (tkn.m_type)
     {
     case PATH_TOKEN::array_index:
       res += '[';
       res += std::to_string (tkn.get_array_index ());
       res += ']';
       break;
     case PATH_TOKEN::array_index_wildcard:
       res += "[*]";
       break;
     case PATH_TOKEN::object_key:
       res += '.';
       res += tkn.get_object_key ();
       break;
     case PATH_TOKEN::object_key_wildcard:
       res += ".*";
       break;
     case PATH_TOKEN::double_wildcard:
       res += "**";
       break;
     case PATH_TOKEN::array_end_index:
       // this case is valid and possible in case of ER_JSON_PATH_DOES_NOT_EXIST
       // we don't have the JSON in this context and cannot replace '-' with last index
       // for json_pointer -> json_path conversion so we leave empty suffix
       break;
     default:
       assert (false);
       break;
     }
     }

   return res;
 }

 void
 JSON_PATH::set (JSON_DOC &jd, const JSON_VALUE &jv) const
 {
   set (db_json_doc_to_value (jd), jv, jd.GetAllocator ());
 }

 /*
  * set () - Create or replace a value at path in the document
  *
  * jd (in) - document we insert in
  * jv (in) - value to be inserted
  * allocator
  * return : found value at path
  *
  * Our implementation does not follow the JSON Pointer https://tools.ietf.org/html/rfc6901#section-4 standard fully
  * We normalize json_pointers to json_paths and resolve token types independently of the document that gets operated
  * by the normalized path.
  * Therefore, we cannot traverse the doc contextually as described in the rfc e.g. both '{"0":10}' an '[10]' to provide
  * same results for '/1' json_pointer.
  */
 void
 JSON_PATH::set (JSON_VALUE &jd, const JSON_VALUE &jv, JSON_PRIVATE_MEMPOOL &allocator) const
 {
   JSON_VALUE *val = &jd;
   for (const PATH_TOKEN &tkn : m_path_tokens)
     {
       switch (tkn.m_type)
     {
     case PATH_TOKEN::token_type::array_index:
     case PATH_TOKEN::token_type::array_end_index:
       if (!val->IsArray ())
         {
           val->SetArray ();
         }
       break;
     case PATH_TOKEN::token_type::object_key:
       if (!val->IsObject ())
         {
           val->SetObject ();
         }
       break;
     case PATH_TOKEN::token_type::array_index_wildcard:
     case PATH_TOKEN::token_type::object_key_wildcard:
     case PATH_TOKEN::token_type::double_wildcard:
       // error? unexpected set - wildcards not allowed for set
       assert (false);
       return;
     }

       if (val->IsArray ())
     {
       JSON_VALUE::Array arr = val->GetArray ();
       if (tkn.m_type == PATH_TOKEN::token_type::array_end_index)
         {
           // insert dummy
           arr.PushBack (JSON_VALUE ().SetNull (), allocator);
           val = &val->GetArray ()[val->GetArray ().Size () - 1];
         }
       else
         {
           rapidjson::SizeType idx = (rapidjson::SizeType) tkn.get_array_index ();
           while (idx >= arr.Size ())
         {
           arr.PushBack (JSON_VALUE ().SetNull (), allocator);
         }
           val = &val->GetArray ()[idx];
         }
     }
       else if (val->IsObject ())
     {
       std::string encoded_key = db_json_json_string_as_utf8 (tkn.get_object_key ());
       JSON_VALUE::MemberIterator m = val->FindMember (encoded_key.c_str ());
       if (m == val->MemberEnd ())
         {
           // insert dummy
           unsigned int len = (rapidjson::SizeType) encoded_key.length ();
           val->AddMember (JSON_VALUE (encoded_key.c_str (), len, allocator), JSON_VALUE ().SetNull (), allocator);

           val = & (--val->MemberEnd ())->value; // Assume AddMember() appends at the end
         }
       else
         {
           val = &m->value;
         }
     }
     }

   val->CopyFrom (jv, allocator);
 }

 JSON_VALUE *
 JSON_PATH::get (JSON_DOC &jd) const
 {
   return const_cast<JSON_VALUE *> (get (const_cast<const JSON_DOC &> (jd)));
 }

 /*
  * get () - Walk a doc following a path and retrive the value pointed at
  *
  * jd (in)
  * return : found value at path
  */
 const JSON_VALUE *
 JSON_PATH::get (const JSON_DOC &jd) const
 {
   const JSON_VALUE *val = &db_json_doc_to_value (jd);
   for (const PATH_TOKEN &tkn : m_path_tokens)
     {
       if (val->IsArray ())
     {
       if (tkn.m_type != PATH_TOKEN::token_type::array_index)
         {
           return NULL;
         }

       unsigned idx = tkn.get_array_index ();
       if (idx >= val->GetArray ().Size ())
         {
           return NULL;
         }

       val = &val->GetArray ()[idx];
     }
       else if (val->IsObject ())
     {
       if (tkn.m_type != PATH_TOKEN::token_type::object_key)
         {
           return NULL;
         }
       std::string encoded_key = db_json_json_string_as_utf8 (tkn.get_object_key ());
       JSON_VALUE::ConstMemberIterator m = val->FindMember (encoded_key.c_str ());
       if (m == val->MemberEnd ())
         {
           return NULL;
         }
       val = &m->value;
     }
       else
     {
       return NULL;
     }
     }
   return val;
 }

 void
 JSON_PATH::extract_from_subtree (const JSON_PATH &path, size_t tkn_array_offset, const JSON_VALUE &jv,
                  std::unordered_set<const JSON_VALUE *> &vals_hash_set,
                  std::vector<const JSON_VALUE *> &vals)
 {
   if (tkn_array_offset == path.get_token_count ())
     {
       // No suffix remaining -> collect match
       // Note: some nodes of the tree are encountered multiple times (only during double wildcards)
       // therefore the use of unordered_set
       if (vals_hash_set.find (&jv) == vals_hash_set.end ())
     {
       vals_hash_set.insert (&jv);
       vals.push_back (&jv);
     }
       return;
     }

   const PATH_TOKEN &crt_tkn = path.m_path_tokens[tkn_array_offset];
   if (jv.IsArray ())
     {
       switch (crt_tkn.m_type)
     {
     case PATH_TOKEN::token_type::array_index:
     {
       unsigned idx = crt_tkn.get_array_index ();
       if (idx >= jv.GetArray ().Size ())
         {
           return;
         }
       extract_from_subtree (path, tkn_array_offset + 1, jv.GetArray ()[idx], vals_hash_set, vals);
       return;
     }
     case PATH_TOKEN::token_type::array_index_wildcard:
       for (rapidjson::SizeType i = 0; i < jv.GetArray ().Size (); ++i)
         {
           extract_from_subtree (path, tkn_array_offset + 1, jv.GetArray ()[i], vals_hash_set, vals);
         }
       return;
     case PATH_TOKEN::token_type::double_wildcard:
       // Advance token_array_offset
       extract_from_subtree (path, tkn_array_offset + 1, jv, vals_hash_set, vals);
       for (rapidjson::SizeType i = 0; i < jv.GetArray ().Size (); ++i)
         {
           // Advance in tree, keep current token_array_offset
           extract_from_subtree (path, tkn_array_offset, jv.GetArray ()[i], vals_hash_set, vals);
         }
       return;
     default:
       return;
     }
     }
   else if (jv.IsObject ())
     {
       switch (crt_tkn.m_type)
     {
     case PATH_TOKEN::token_type::object_key:
     {
       std::string encoded_key = db_json_json_string_as_utf8 (crt_tkn.get_object_key ());
       JSON_VALUE::ConstMemberIterator m = jv.FindMember (encoded_key.c_str ());
       if (m == jv.MemberEnd ())
         {
           return;
         }
       extract_from_subtree (path, tkn_array_offset + 1, m->value, vals_hash_set, vals);
       return;
     }
     case PATH_TOKEN::token_type::object_key_wildcard:
       for (JSON_VALUE::ConstMemberIterator m = jv.MemberBegin (); m != jv.MemberEnd (); ++m)
         {
           extract_from_subtree (path, tkn_array_offset + 1, m->value, vals_hash_set, vals);
         }
       return;
     case PATH_TOKEN::token_type::double_wildcard:
       // Advance token_array_offset
       extract_from_subtree (path, tkn_array_offset + 1, jv, vals_hash_set, vals);
       for (JSON_VALUE::ConstMemberIterator m = jv.MemberBegin (); m != jv.MemberEnd (); ++m)
         {
           // Advance in tree, keep current token_array_offset
           extract_from_subtree (path, tkn_array_offset, m->value, vals_hash_set, vals);
         }
       return;
     default:
       return;
     }
     }
   // Json scalars are ignored if there is a remaining suffix
 }

 std::vector<const JSON_VALUE *>
 JSON_PATH::extract (const JSON_DOC &jd) const
 {
   std::unordered_set<const JSON_VALUE *> vals_hash_set;
   std::vector<const JSON_VALUE *> res;

   extract_from_subtree (*this, 0, db_json_doc_to_value (jd), vals_hash_set, res);

   return res;
 }

 bool
 JSON_PATH::erase (JSON_DOC &jd) const
 {
   if (get_token_count () == 0)
     {
       return false;
     }

   JSON_VALUE *value = get_parent ().get (jd);
   if (value == nullptr)
     {
       return false;
     }

   const PATH_TOKEN &tkn = m_path_tokens.back ();

   if (value->IsArray ())
     {
       if (!is_last_array_index_less_than (value->GetArray ().Size ()))
     {
       return false;
     }
       value->Erase (value->Begin () + tkn.get_array_index ());
       return true;
     }
   else if (value->IsObject ())
     {
       if (tkn.m_type != PATH_TOKEN::object_key)
     {
       return false;
     }
       std::string encoded_key = db_json_json_string_as_utf8 (tkn.get_object_key ());
       return value->EraseMember (encoded_key.c_str ());
     }

   return false;
 }

 const PATH_TOKEN *
 JSON_PATH::get_last_token () const
 {
   return get_token_count () > 0 ? &m_path_tokens[get_token_count () - 1] : NULL;
 }

 size_t
 JSON_PATH::get_token_count () const
 {
   return m_path_tokens.size ();
 }

 bool
 JSON_PATH::is_root_path () const
 {
   return get_token_count () == 0;
 }

 JSON_PATH
 JSON_PATH::get_parent () const
 {
   if (get_token_count () == 0)
     {
       // this should not happen
       assert (false);
       JSON_PATH parent;
       return parent;
     }
   else
     {
       // todo: improve getting a slice of the m_path_tokens vector
       JSON_PATH parent (*this);
       parent.pop ();
       return parent;
     }
 }

 bool
 JSON_PATH::is_last_array_index_less_than (size_t size) const
 {
   const PATH_TOKEN *last_token = get_last_token ();
   assert (last_token != NULL);

   return last_token->m_type == PATH_TOKEN::array_index && last_token->get_array_index () < size;
 }

 bool
 JSON_PATH::is_last_token_array_index_zero () const
 {
   return is_last_array_index_less_than (1);
 }

 bool
 JSON_PATH::points_to_array_cell () const
 {
   const PATH_TOKEN *last_token = get_last_token ();
   return (last_token != NULL && (last_token->m_type == PATH_TOKEN::array_index
                  || (last_token->m_type == PATH_TOKEN::array_end_index)));
 }

 bool
 JSON_PATH::parent_exists (JSON_DOC &jd) const
 {
   if (get_token_count () == 0)
     {
       return false;
     }

   if (get_parent ().get (jd) != NULL)
     {
       return true;
     }

   return false;
 }

 /*
  * init ()
  *
  * path (in)
  * An sql_path is normalized to rapidjson standard path
  * Example: $[0]."name1".name2[2] -> /0/name1/name2/2
  */
 int
 JSON_PATH::parse (const char *path)
 {
   std::string sql_path_string (path);
   JSON_PATH_TYPE json_path_type = db_json_get_path_type (sql_path_string);

   if (json_path_type == JSON_PATH_TYPE::JSON_PATH_POINTER)
     {
       // path is not SQL path format; consider it JSON pointer.
       int error_code = from_json_pointer (sql_path_string);
       if (error_code != NO_ERROR)
     {
       ASSERT_ERROR ();
     }
       return error_code;
     }

   int error_code = validate_and_create_from_json_path (sql_path_string);
   if (error_code != NO_ERROR)
     {
       ASSERT_ERROR ();
     }
   return error_code;
 }

 int
 JSON_PATH::from_json_pointer (const std::string &pointer_path)
 {
   typedef rapidjson::GenericPointer<JSON_VALUE>::Token TOKEN;
   static const rapidjson::SizeType kPointerInvalidIndex = rapidjson::kPointerInvalidIndex;

   typedef rapidjson::GenericPointer<JSON_VALUE> JSON_POINTER;

   JSON_POINTER jp (pointer_path.c_str ());
   if (!jp.IsValid ())
     {
       er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_JSON_INVALID_PATH, 0);
       return ER_JSON_INVALID_PATH;
     }

   size_t tkn_cnt = jp.GetTokenCount ();
   const TOKEN *tokens = jp.GetTokens ();

   // convert rapidjson's tokens to our tokens:
   for (size_t i = 0; i < tkn_cnt; ++i)
     {
       const TOKEN &rapid_token = tokens[i];

       if (rapid_token.index != kPointerInvalidIndex)
     {
       // array_index
       push_array_index (rapid_token.index);
     }
       else if (rapid_token.length == 1 && rapid_token.name[0] == '-' )
     {
       // '-' special idx token
       m_path_tokens.emplace_back (PATH_TOKEN::token_type::array_end_index, "-");
     }
       else
     {
       // object_key
       char *escaped;
       size_t escaped_size;
       db_string_escape_str (rapid_token.name, rapid_token.length, &escaped, &escaped_size);

       push_object_key (escaped);
       db_private_free (NULL, escaped);
     }
     }

   return NO_ERROR;
 }
db_json_doc_to_value
JSON_VALUE & db_json_doc_to_value(JSON_DOC &doc)
Definition: db_json_types_internal.cpp:37

JSON_PATH::m_path_tokens
token_containter_type m_path_tokens
Definition: db_json_path.hpp:116

PATH_TOKEN::get_object_key
const std::string & get_object_key() const
Definition: db_json_path.cpp:635

JSON_PATH_TYPE::JSON_PATH_SQL_JSON

NO_ERROR
#define NO_ERROR
Definition: error_code.h:46

ER_ERROR_SEVERITY
Definition: error_manager.h:117

PATH_TOKEN
Definition: db_json_path.hpp:34

JSON_DOC
Definition: db_json_types_internal.hpp:33

PATH_TOKEN::match_pattern
static bool match_pattern(const PATH_TOKEN &matcher, const PATH_TOKEN &matchee)
Definition: db_json_path.cpp:657

ASSERT_ERROR
#define ASSERT_ERROR()
Definition: error_manager.h:156

JSON_PATH::parent_exists
bool parent_exists(JSON_DOC &jd) const
Definition: db_json_path.cpp:1112

JSON_PATH::points_to_array_cell
bool points_to_array_cell() const
Definition: db_json_path.cpp:1104

PATH_TOKEN::array_index
Definition: db_json_path.hpp:42

db_json_path_is_valid_identifier_start_char
static bool db_json_path_is_valid_identifier_start_char(unsigned char ch)
Definition: db_json_path.cpp:120

JSON_PATH::parse
int parse(const char *path)
Definition: db_json_path.cpp:1135

db_json_path_is_token_valid_quoted_object_key
static bool db_json_path_is_token_valid_quoted_object_key(const std::string &path, std::size_t &token_begin)
Definition: db_json_path.cpp:70

string_opfunc.h

JSON_PATH::is_last_array_index_less_than
bool is_last_array_index_less_than(size_t size) const
Definition: db_json_path.cpp:1089

JSON_PATH::MATCH_RESULT
MATCH_RESULT
Definition: db_json_path.hpp:65

PATH_TOKEN::PATH_TOKEN
PATH_TOKEN()
Definition: db_json_path.cpp:614

JSON_PATH::contains_wildcard
bool contains_wildcard() const
Definition: db_json_path.cpp:715

JSON_PATH::is_root_path
bool is_root_path() const
Definition: db_json_path.cpp:1064

JSON_PATH::get_last_token
const PATH_TOKEN * get_last_token() const
Definition: db_json_path.cpp:1052

db_json.hpp

PATH_TOKEN::array_end_index
Definition: db_json_path.hpp:44

JSON_PATH::push_array_index
void push_array_index(unsigned long idx)
Definition: db_json_path.cpp:679

PATH_TOKEN::m_type
token_type m_type
Definition: db_json_path.hpp:47

db_json_path.hpp

JSON_PATH_TYPE::JSON_PATH_POINTER

JSON_PATH::pop
void pop()
Definition: db_json_path.cpp:709

db_json_iszero
static bool db_json_iszero(const unsigned char &ch)
Definition: db_json_path.cpp:57

PATH_TOKEN::is_wildcard
bool is_wildcard() const
Definition: db_json_path.cpp:651

er_set
void er_set(int severity, const char *file_name, const int line_no, int err_id, int num_args,...)
Definition: error_manager.c:1228

JSON_PRIVATE_MEMPOOL
rapidjson::MemoryPoolAllocator< JSON_PRIVATE_ALLOCATOR > JSON_PRIVATE_MEMPOOL
Definition: db_json_allocator.hpp:33

assert
#define assert(x)
Definition: malloc_2_8_3.c:1204

prm_get_integer_value
int prm_get_integer_value(PARAM_ID prm_id)
Definition: system_parameter.c:10964

JSON_PATH::extract
std::vector< const JSON_VALUE * > extract(const JSON_DOC &) const
Definition: db_json_path.cpp:1003

JSON_PATH_TYPE
JSON_PATH_TYPE
Definition: db_json_path.cpp:36

PATH_TOKEN::object_key
Definition: db_json_path.hpp:41

lockfree::tran::index
size_t index
Definition: lockfree_transaction_def.hpp:30

JSON_PATH::push_object_key
void push_object_key(std::string &&object_key)
Definition: db_json_path.cpp:691

db_json_path_is_token_valid_array_index
static int db_json_path_is_token_valid_array_index(const std::string &str, bool allow_wildcards, unsigned long &index, std::size_t start=0, std::size_t end=0)
Definition: db_json_path.cpp:195

JSON_PATH::get_parent
JSON_PATH get_parent() const
Definition: db_json_path.cpp:1070

JSON_PATH::push_object_key_wildcard
void push_object_key_wildcard()
Definition: db_json_path.cpp:697

NULL
#define NULL
Definition: freelistheap.h:34

db_json_path_quote_and_validate_unquoted_object_key
static bool db_json_path_quote_and_validate_unquoted_object_key(std::string &path, std::size_t &token_begin)
Definition: db_json_path.cpp:104

JSON_PATH::dump_json_path
std::string dump_json_path() const
Definition: db_json_path.cpp:728

db_private_free
#define db_private_free(thrd, ptr)
Definition: memory_alloc.h:229

ER_JSON_ARRAY_INDEX_TOO_LARGE
#define ER_JSON_ARRAY_INDEX_TOO_LARGE
Definition: error_code.h:1557

db_json_remove_leading_zeros_index
static void db_json_remove_leading_zeros_index(std::string &index)
Definition: db_json_path.cpp:602

db_json_path_unquote_object_keys
int db_json_path_unquote_object_keys(std::string &sql_path)
Definition: db_json_path.cpp:554

skip_whitespaces
static std::size_t skip_whitespaces(const std::string &path, std::size_t token_begin)
Definition: db_json_path.cpp:266

JSON_PATH
Definition: db_json_path.hpp:62

db_json_trim_leading_spaces
static void db_json_trim_leading_spaces(std::string &path_string)
Definition: db_json_path.cpp:279

JSON_PATH::extract_from_subtree
static void extract_from_subtree(const JSON_PATH &path, size_t tkn_array_offset, const JSON_VALUE &jv, std::unordered_set< const JSON_VALUE * > &unique_elements, std::vector< const JSON_VALUE * > &vals)
Definition: db_json_path.cpp:914

ER_JSON_INVALID_PATH
#define ER_JSON_INVALID_PATH
Definition: error_code.h:1549

PATH_TOKEN::get_array_index
unsigned long get_array_index() const
Definition: db_json_path.cpp:643

ARG_FILE_LINE
#define ARG_FILE_LINE
Definition: error_manager.h:44

JSON_PATH::set
void set(JSON_DOC &jd, const JSON_VALUE &jv) const
Definition: db_json_path.cpp:769

db_string_escape_str
int db_string_escape_str(const char *src_str, size_t src_size, char **res_string, size_t *dest_size)
Definition: string_opfunc.c:1933

db_rapidjson.hpp

JSON_PATH::is_last_token_array_index_zero
bool is_last_token_array_index_zero() const
Definition: db_json_path.cpp:1098

JSON_PATH::push_double_wildcard
void push_double_wildcard()
Definition: db_json_path.cpp:703

JSON_PATH::get
JSON_VALUE * get(JSON_DOC &jd) const
Definition: db_json_path.cpp:859

db_json_json_string_as_utf8
std::string db_json_json_string_as_utf8(std::string raw_json_string)
Definition: db_json.cpp:2893

db_json_path_is_valid_identifier_char
static bool db_json_path_is_valid_identifier_char(unsigned char ch)
Definition: db_json_path.cpp:130

memory_alloc.h

PATH_TOKEN::double_wildcard
Definition: db_json_path.hpp:40

er_clear
void er_clear(void)
Definition: error_manager.c:1201

PATH_TOKEN::token_type
token_type
Definition: db_json_path.hpp:36

PATH_TOKEN::array_index_wildcard
Definition: db_json_path.hpp:39

db_json_path_is_token_valid_unquoted_object_key
static bool db_json_path_is_token_valid_unquoted_object_key(const std::string &path, std::size_t &token_begin)
Definition: db_json_path.cpp:150

db_json_split_path_by_delimiters
int db_json_split_path_by_delimiters(const std::string &path, const std::string &delim, bool allow_empty, std::vector< std::string > &split_path)
Definition: db_json_path.cpp:434

JSON_VALUE
rapidjson::GenericValue< JSON_ENCODING, JSON_PRIVATE_MEMPOOL > JSON_VALUE
Definition: db_json_types_internal.hpp:31

i
int i
Definition: dynamic_load.c:954

PATH_TOKEN::m_object_key
std::string m_object_key
Definition: db_json_path.hpp:48

system_parameter.h

PRM_ID_JSON_MAX_ARRAY_IDX
Definition: system_parameter.h:404

JSON_PATH::match_pattern
static MATCH_RESULT match_pattern(const JSON_PATH &pattern, const JSON_PATH &path)
Definition: db_json_path.cpp:540

PATH_TOKEN::object_key_wildcard
Definition: db_json_path.hpp:38

JSON_PATH::validate_and_create_from_json_path
int validate_and_create_from_json_path(std::string &sql_path)
Definition: db_json_path.cpp:308

JSON_PATH::get_token_count
size_t get_token_count() const
Definition: db_json_path.cpp:1058

PATH_TOKEN::m_array_idx
unsigned long m_array_idx
Definition: db_json_path.hpp:49

JSON_PATH::push_array_index_wildcard
void push_array_index_wildcard()
Definition: db_json_path.cpp:685

JSON_PATH::erase
bool erase(JSON_DOC &jd) const
Definition: db_json_path.cpp:1014

db_json_get_path_type
static JSON_PATH_TYPE db_json_get_path_type(std::string &path_string)
Definition: db_json_path.cpp:287

JSON_PATH::from_json_pointer
int from_json_pointer(const std::string &pointer_path)
Definition: db_json_path.cpp:1160

db_json_isspace
static bool db_json_isspace(const unsigned char &ch)
Definition: db_json_path.cpp:273