CUBRID Engine  latest
db_json_path.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Search Solution Corporation
3  * Copyright 2016 CUBRID Corporation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 #include "db_json_path.hpp"
20 
21 #include "db_json.hpp"
22 #include "db_rapidjson.hpp"
23 #include "memory_alloc.h"
24 #include "string_opfunc.h"
25 #include "system_parameter.h"
26 
27 #include <algorithm>
28 #include <cctype>
29 #include <cerrno>
30 #include <cstdlib>
31 #include <limits>
32 #include <string>
33 #include <unordered_set>
34 #include <vector>
35 
36 enum class JSON_PATH_TYPE
37 {
40 };
41 
42 static void db_json_trim_leading_spaces (std::string &path_string);
43 static JSON_PATH_TYPE db_json_get_path_type (std::string &path_string);
44 static bool db_json_isspace (const unsigned char &ch);
45 static std::size_t skip_whitespaces (const std::string &path, std::size_t token_begin);
46 static int db_json_path_is_token_valid_array_index (const std::string &str, bool allow_wildcards, unsigned long &index,
47  std::size_t start = 0, std::size_t end = 0);
48 static bool db_json_path_is_token_valid_quoted_object_key (const std::string &path, std::size_t &token_begin);
49 static bool db_json_path_quote_and_validate_unquoted_object_key (std::string &path, std::size_t &token_begin);
50 static bool db_json_path_is_token_valid_unquoted_object_key (const std::string &path, std::size_t &token_begin);
51 static bool db_json_path_is_valid_identifier_start_char (unsigned char ch);
52 static bool db_json_path_is_valid_identifier_char (unsigned char ch);
53 static void db_json_remove_leading_zeros_index (std::string &index);
54 static bool db_json_iszero (const unsigned char &ch);
55 
56 static bool
57 db_json_iszero (const unsigned char &ch)
58 {
59  return ch == '0';
60 }
61 
62 /*
63  * db_json_path_is_token_valid_quoted_object_key () - Check if a quoted object_key is valid
64  *
65  * return : true/false
66  * path (in) : path to be checked
67  * token_begin (in/out) : beginning offset of the token, is replaced with beginning of the next token or path.length ()
68  */
69 static bool
70 db_json_path_is_token_valid_quoted_object_key (const std::string &path, std::size_t &token_begin)
71 {
72  std::size_t i = token_begin + 1;
73  bool unescaped_backslash = false;
74  // stop at unescaped '"'; note that there should be an odd nr of backslashes before '"' for it to be escaped
75  for (; i < path.length () && (path[i] != '"' || unescaped_backslash); ++i)
76  {
77  if (path[i] == '\\')
78  {
79  unescaped_backslash = !unescaped_backslash;
80  }
81  else
82  {
83  unescaped_backslash = false;
84  }
85  }
86 
87  if (i == path.length ())
88  {
89  return false;
90  }
91 
92  token_begin = skip_whitespaces (path, i + 1);
93  return true;
94 }
95 
96 /*
97  * db_json_path_is_token_valid_unquoted_object_key () - Validate and quote an object_key
98  *
99  * return : validation result
100  * path (in/out) : path to be checked
101  * token_begin (in/out) : is replaced with beginning of the next token or path.length ()
102  */
103 static bool
104 db_json_path_quote_and_validate_unquoted_object_key (std::string &path, std::size_t &token_begin)
105 {
106  std::size_t i = token_begin;
107  bool validation_result = db_json_path_is_token_valid_unquoted_object_key (path, i);
108  if (validation_result)
109  {
110  // we normalize object_keys by quoting them - e.g. $.objectkey we represent as $."objectkey"
111  path.insert (token_begin, "\"");
112  path.insert (i + 1, "\"");
113 
114  token_begin = skip_whitespaces (path, i + 2 /* we inserted 2 quotation marks */);
115  }
116  return validation_result;
117 }
118 
119 static bool
121 {
122  // todo: As per SQL Standard accept Ecmascript Identifier start:
123  // \UnicodeEscapedSequence
124  // Any char in Unicode categories: Titlecase letter (Lt), Modifier letter (Lm), Other letter (Lo), Letter number (Nl)
125 
126  return ch == '_' || std::isalpha (ch);
127 }
128 
129 static bool
131 {
132  // todo: As per SQL Standard accept Ecmascript Identifier:
133  // \UnicodeEscapedSequence
134  // Any char in Unicode categories: Connector punctuation (Pc), Non-spacing mark (Mn),
135  // Combining spacing mark (Mc), Decimal number (Nd), Titlecase letter (Lt), Modifier letter (Lm), Other letter (Lo)
136  // Letter number (Nl)
137 
138  return ch == '_' || std::isalnum (ch);
139 }
140 
141 /*
142  * db_json_path_is_token_valid_unquoted_object_key () - Check if an unquoted object_key is valid
143  *
144  * return : true/false
145  * path (in) : path to be checked
146  * token_begin (in/out) : beginning offset of the token, is replaced with first char's position
147  * outside of the current valid token
148  */
149 static bool
150 db_json_path_is_token_valid_unquoted_object_key (const std::string &path, std::size_t &token_begin)
151 {
152  if (path == "")
153  {
154  return false;
155  }
156  std::size_t i = token_begin;
157 
158  // todo: this needs change. SQL standard specifies that object key format must obey
159  // JavaScript rules of an Identifier (6.10.1).
160  // Besides alphanumerics, object keys can be valid ECMAScript identifiers as defined in
161  // http://www.ecma-international.org/ecma-262/5.1/#sec-7.6
162 
163  // Defined syntax (approx.):
164  // IdentifierName -> IdentifierStart | (IdentifierName IdentifierPart)
165  // IdentifierStart -> $ ( note: this is the ONLY specified forbidden by SQL Standard) | _ | \UnicodeEscapeSequence
166  // IdentifierPart -> IdentifierStart | InicodeCombinigMark | UnicodeDigit | UnicodeConnectorPunctuation | <ZWNJ>
167  // | <ZWJ>
168 
169  if (i < path.length () && !db_json_path_is_valid_identifier_start_char (static_cast<unsigned char> (path[i])))
170  {
171  return false;
172  }
173 
174  ++i;
175  for (; i < path.length () && db_json_path_is_valid_identifier_char (static_cast<unsigned char> (path[i])); ++i);
176 
177  token_begin = i;
178 
179  return true;
180 }
181 
182 /*
183  * db_json_path_is_token_valid_array_index () - verify if token is a valid array index. token can be a substring of
184  * first argument (by default the entire argument).
185  *
186  * return : no error if token can be converted successfully to an integer smaller than json_max_array_idx
187  * variable
188  * str (in) : token or the string that token belong to
189  * allow_wildcards : whether json_path wildcards are allowed
190  * index (out) : created index token
191  * start (in) : start of token; default is start of string
192  * end (in) : end of token; default is end of string; 0 is considered default value
193  */
194 static int
195 db_json_path_is_token_valid_array_index (const std::string &str, bool allow_wildcards,
196  unsigned long &index, std::size_t start, std::size_t end)
197 {
198  // json pointer will corespond the symbol '-' to JSON_ARRAY length
199  // so if we have the json {"A":[1,2,3]} and the path /A/-
200  // this will point to the 4th element of the array (zero indexed)
201  if (str == "-")
202  {
203  return NO_ERROR;
204  }
205 
206  if (end == 0)
207  {
208  // default is end of string
209  end = str.length ();
210  }
211 
212  if (start == end)
213  {
215  return ER_JSON_INVALID_PATH;
216  }
217 
218  std::size_t last_non_space = end - 1;
219  for (; last_non_space > start && str[last_non_space] == ' '; --last_non_space);
220  if (allow_wildcards && start == last_non_space && str[start] == '*')
221  {
222  return NO_ERROR;
223  }
224 
225  // Remaining invalid cases are: 1. Non-digits are present
226  // 2. Index overflows Rapidjson's index representation type
227 
228  // we need to check for non-digits since strtoul simply returns 0 in case conversion
229  // can not be made
230  for (auto it = str.cbegin () + start; it < str.cbegin () + last_non_space + 1; ++it)
231  {
232  if (!std::isdigit (static_cast<unsigned char> (*it)))
233  {
235  return ER_JSON_INVALID_PATH;
236  }
237  }
238 
239  char *end_str;
240  index = std::strtoul (str.c_str () + start, &end_str, 10);
241  if (errno == ERANGE)
242  {
243  errno = 0;
246  }
247 
248  if (index > (unsigned long) prm_get_integer_value (PRM_ID_JSON_MAX_ARRAY_IDX))
249  {
252  }
253 
254  // this is a valid array index
255  return NO_ERROR;
256 }
257 
258 /*
259  * skip_whitespaces () - Advance offset to first non_space
260  *
261  * return : offset of first non_space character
262  * sql_path (in) : path
263  * pos (in) : starting position offset
264  */
265 static std::size_t
266 skip_whitespaces (const std::string &path, std::size_t pos)
267 {
268  for (; pos < path.length () && path[pos] == ' '; ++pos);
269  return pos;
270 }
271 
272 static bool
273 db_json_isspace (const unsigned char &ch)
274 {
275  return std::isspace (ch) != 0;
276 }
277 
278 static void
279 db_json_trim_leading_spaces (std::string &path_string)
280 {
281  // trim leading spaces
282  auto first_non_space = std::find_if_not (path_string.begin (), path_string.end (), db_json_isspace);
283  path_string.erase (path_string.begin (), first_non_space);
284 }
285 
286 static JSON_PATH_TYPE
287 db_json_get_path_type (std::string &path_string)
288 {
289  db_json_trim_leading_spaces (path_string);
290 
291  if (path_string.empty () || path_string[0] != '$')
292  {
294  }
295  else
296  {
298  }
299 }
300 
301 /*
302  * validate_and_create_from_json_path () - Check if a given path is a SQL valid path
303  *
304  * return : ER_JSON_INVALID_PATH if path is invalid
305  * sql_path (in/out) : path to be checked
306  */
307 int
309 {
310  // skip leading white spaces
311  db_json_trim_leading_spaces (sql_path);
312  if (sql_path.empty ())
313  {
314  // empty
316  return ER_JSON_INVALID_PATH;
317  }
318 
319  if (sql_path[0] != '$')
320  {
321  // first character should always be '$'
323  return ER_JSON_INVALID_PATH;
324  }
325  // start parsing path string by skipping dollar character
326  std::size_t i = skip_whitespaces (sql_path, 1);
327 
328  while (i < sql_path.length ())
329  {
330  // to begin a next token we have only 3 possibilities:
331  // with dot we start an object name
332  // with bracket we start an index
333  // with * we have the beginning of a '**' wildcard
334  switch (sql_path[i])
335  {
336  case '[':
337  {
338  std::size_t end_bracket_offset;
339  i = skip_whitespaces (sql_path, i + 1);
340 
341  end_bracket_offset = sql_path.find_first_of (']', i);
342  if (end_bracket_offset == std::string::npos)
343  {
345  return ER_JSON_INVALID_PATH;
346  }
347  unsigned long index;
348  int error_code = db_json_path_is_token_valid_array_index (sql_path, true, index, i, end_bracket_offset);
349  if (error_code != NO_ERROR)
350  {
351  ASSERT_ERROR ();
352  return error_code;
353  }
354 
355  // todo check if it is array_index or array_index_wildcard
356  if (sql_path[i] == '*')
357  {
358  push_array_index_wildcard ();
359  }
360  else
361  {
362  // note that db_json_path_is_token_valid_array_index () checks the index to not overflow
363  // a rapidjson::SizeType (unsinged int).
364  push_array_index (index);
365  }
366  i = skip_whitespaces (sql_path, end_bracket_offset + 1);
367  break;
368  }
369  case '.':
370  i = skip_whitespaces (sql_path, i + 1);
371  if (i == sql_path.length ())
372  {
374  return ER_JSON_INVALID_PATH;
375  }
376  switch (sql_path[i])
377  {
378  case '"':
379  {
380  size_t old_idx = i;
382  {
384  return ER_JSON_INVALID_PATH;
385  }
386  push_object_key (sql_path.substr (old_idx, i - old_idx));
387  break;
388  }
389  case '*':
390  push_object_key_wildcard ();
391  i = skip_whitespaces (sql_path, i + 1);
392  break;
393  default:
394  {
395  size_t old_idx = i;
396  // unquoted object_keys
398  {
400  return ER_JSON_INVALID_PATH;
401  }
402  push_object_key (sql_path.substr (old_idx, i - old_idx));
403  break;
404  }
405  }
406  break;
407 
408  case '*':
409  // only ** wildcard is allowed in this case
410  if (++i >= sql_path.length () || sql_path[i] != '*')
411  {
413  return ER_JSON_INVALID_PATH;
414  }
415  push_double_wildcard ();
416  i = skip_whitespaces (sql_path, i + 1);
417  if (i == sql_path.length ())
418  {
419  // ** wildcard requires suffix
421  return ER_JSON_INVALID_PATH;
422  }
423  break;
424 
425  default:
427  return ER_JSON_INVALID_PATH;
428  }
429  }
430  return NO_ERROR;
431 }
432 
433 int
434 db_json_split_path_by_delimiters (const std::string &path, const std::string &delim, bool allow_empty,
435  std::vector<std::string> &split_path)
436 {
437  std::size_t start = 0;
438  std::size_t end = path.find_first_of (delim, start);
439 
440  while (end != std::string::npos)
441  {
442  if (path[end] == '"')
443  {
444  std::size_t index_of_closing_quote = path.find_first_of ('"', end + 1);
445  if (index_of_closing_quote == std::string::npos)
446  {
447  assert (false);
448  split_path.clear ();
450  return ER_JSON_INVALID_PATH;
451  /* this should have been catched earlier */
452  }
453  else
454  {
455  split_path.push_back (path.substr (end + 1, index_of_closing_quote - end - 1));
456  end = index_of_closing_quote;
457  start = end + 1;
458  }
459  }
460  // do not tokenize on escaped quotes
461  else if (path[end] != '"' || ((end >= 1) && path[end - 1] != '\\'))
462  {
463  const std::string &substring = path.substr (start, end - start);
464  if (!substring.empty () || allow_empty)
465  {
466  split_path.push_back (substring);
467  }
468  start = end + 1;
469  }
470 
471  end = path.find_first_of (delim, end + 1);
472  }
473 
474  const std::string &substring = path.substr (start, end);
475  if (!substring.empty () || allow_empty)
476  {
477  split_path.push_back (substring);
478  }
479 
480  std::size_t tokens_size = split_path.size ();
481  for (std::size_t i = 0; i < tokens_size; i++)
482  {
483  unsigned long index;
484  int error_code = db_json_path_is_token_valid_array_index (split_path[i], false, index);
485  if (error_code != NO_ERROR)
486  {
487  // ignore error. We only need to decide whether to skip it in case it is not array_idx
488  er_clear ();
489  continue;
490  }
491 
492  db_json_remove_leading_zeros_index (split_path[i]);
493  }
494 
495  return NO_ERROR;
496 }
497 
499 JSON_PATH::match_pattern (const JSON_PATH &pattern, const JSON_PATH::token_containter_type::const_iterator &it1,
500  const JSON_PATH &path, const JSON_PATH::token_containter_type::const_iterator &it2)
501 {
502  if (it1 == pattern.m_path_tokens.end () && it2 == path.m_path_tokens.end ())
503  {
504  return FULL_MATCH;
505  }
506 
507  if (it1 == pattern.m_path_tokens.end ())
508  {
509  return PREFIX_MATCH;
510  }
511 
512  if (it2 == path.m_path_tokens.end ())
513  {
514  // note that in case of double wildcard we have guaranteed a token after it
515  return NO_MATCH;
516  }
517 
518  if (it1->m_type == PATH_TOKEN::double_wildcard)
519  {
520  // for "**" wildcard we try to match the remaining pattern against each suffix of the path
521  MATCH_RESULT advance_pattern = match_pattern (pattern, it1 + 1, path, it2);
522  if (advance_pattern == FULL_MATCH)
523  {
524  // return early if we have a full result
525  return advance_pattern;
526  }
527 
528  MATCH_RESULT advance_path = match_pattern (pattern, it1, path, it2 + 1);
529  if (advance_path == FULL_MATCH)
530  {
531  return advance_path;
532  }
533  return (advance_pattern == PREFIX_MATCH || advance_path == PREFIX_MATCH) ? PREFIX_MATCH : NO_MATCH;
534  }
535 
536  return !PATH_TOKEN::match_pattern (*it1, *it2) ? NO_MATCH : match_pattern (pattern, it1 + 1, path, it2 + 1);
537 }
538 
540 JSON_PATH::match_pattern (const JSON_PATH &pattern, const JSON_PATH &path)
541 {
542  assert (!path.contains_wildcard ());
543 
544  return match_pattern (pattern, pattern.m_path_tokens.begin (), path, path.m_path_tokens.begin ());
545 }
546 
547 /*
548  * db_json_path_unquote_object_keys () - Unquote, when possible, object_keys of the json_path
549  *
550  * return : ER_JSON_INVALID_PATH if a validation error occured
551  * sql_path (in/out) : path
552  */
553 int
554 db_json_path_unquote_object_keys (std::string &sql_path)
555 {
556  // todo: rewrite as json_path.dump () + unquoting the object_keys
557  std::vector<std::string> tokens;
558  int error_code = db_json_split_path_by_delimiters (sql_path, ".[", false, tokens);
559  if (error_code != NO_ERROR)
560  {
561  ASSERT_ERROR ();
562  return error_code;
563  }
564  std::string res = "$";
565 
566  assert (!tokens.empty () && tokens[0] == "$");
567  for (std::size_t i = 1; i < tokens.size(); ++i)
568  {
569  if (tokens[i][0] == '"')
570  {
571  res += ".";
572  std::string unquoted = tokens[i].substr (1, tokens[i].length () - 2);
573  std::size_t start = 0;
574 
575  if (db_json_path_is_token_valid_unquoted_object_key (unquoted, start) && start >= unquoted.length ())
576  {
577  res.append (unquoted);
578  }
579  else
580  {
581  res += tokens[i];
582  }
583  }
584  else
585  {
586  res += "[";
587  res += tokens[i];
588  }
589  }
590 
591  sql_path = std::move (res);
592  return NO_ERROR;
593 }
594 
595 /*
596  * db_json_remove_leading_zeros_index () - Erase leading zeros from sql path index
597  *
598  * index (in) : current object
599  * example: $[000123] -> $[123]
600  */
601 static void
603 {
604  // trim leading zeros
605  auto first_non_zero = std::find_if_not (index.begin (), index.end (), db_json_iszero);
606  index.erase (index.begin (), first_non_zero);
607 
608  if (index.empty ())
609  {
610  index = "0";
611  }
612 }
613 
615  : m_type (array_index)
616 {
617 
618 }
619 
620 PATH_TOKEN::PATH_TOKEN (token_type type, unsigned long array_idx)
621  : m_type (type)
622  , m_array_idx (array_idx)
623 {
624 
625 }
626 
627 PATH_TOKEN::PATH_TOKEN (token_type type, std::string &&s)
628  : m_type (type)
629  , m_object_key (std::move (s))
630 {
631 
632 }
633 
634 const std::string &
636 {
637  assert (m_type == object_key);
638 
639  return m_object_key;
640 }
641 
642 unsigned long
644 {
646 
647  return m_array_idx;
648 }
649 
650 bool
652 {
654 }
655 
656 bool
657 PATH_TOKEN::match_pattern (const PATH_TOKEN &matcher, const PATH_TOKEN &matchee)
658 {
659  assert (!matchee.is_wildcard ());
660 
661  switch (matcher.m_type)
662  {
663  case double_wildcard:
664  return matchee.m_type == object_key || matchee.m_type == array_index;
665  case object_key_wildcard:
666  return matchee.m_type == object_key;
668  return matchee.m_type == array_index;
669  case object_key:
670  return matchee.m_type == object_key && matcher.get_object_key () == matchee.get_object_key ();
671  case array_index:
672  return matchee.m_type == array_index && matcher.get_array_index () == matchee.get_array_index ();
673  default:
674  return false;
675  }
676 }
677 
678 void
679 JSON_PATH::push_array_index (unsigned long idx)
680 {
681  m_path_tokens.emplace_back (PATH_TOKEN::token_type::array_index, idx);
682 }
683 
684 void
686 {
687  m_path_tokens.emplace_back (PATH_TOKEN::token_type::array_index_wildcard, std::string ("*"));
688 }
689 
690 void
692 {
693  m_path_tokens.emplace_back (PATH_TOKEN::token_type::object_key, std::move (object_key));
694 }
695 
696 void
698 {
699  m_path_tokens.emplace_back (PATH_TOKEN::token_type::object_key_wildcard, std::string ("*"));
700 }
701 
702 void
704 {
705  m_path_tokens.emplace_back (PATH_TOKEN::token_type::double_wildcard, std::string ("**"));
706 }
707 
708 void
710 {
711  m_path_tokens.pop_back ();
712 }
713 
714 bool
716 {
717  for (const PATH_TOKEN &tkn : m_path_tokens)
718  {
719  if (tkn.is_wildcard ())
720  {
721  return true;
722  }
723  }
724  return false;
725 }
726 
727 std::string
729 {
730  std::string res = "$";
731 
732  for (const auto &tkn : m_path_tokens)
733  {
734  switch (tkn.m_type)
735  {
737  res += '[';
738  res += std::to_string (tkn.get_array_index ());
739  res += ']';
740  break;
742  res += "[*]";
743  break;
745  res += '.';
746  res += tkn.get_object_key ();
747  break;
749  res += ".*";
750  break;
752  res += "**";
753  break;
755  // this case is valid and possible in case of ER_JSON_PATH_DOES_NOT_EXIST
756  // we don't have the JSON in this context and cannot replace '-' with last index
757  // for json_pointer -> json_path conversion so we leave empty suffix
758  break;
759  default:
760  assert (false);
761  break;
762  }
763  }
764 
765  return res;
766 }
767 
768 void
769 JSON_PATH::set (JSON_DOC &jd, const JSON_VALUE &jv) const
770 {
771  set (db_json_doc_to_value (jd), jv, jd.GetAllocator ());
772 }
773 
774 /*
775  * set () - Create or replace a value at path in the document
776  *
777  * jd (in) - document we insert in
778  * jv (in) - value to be inserted
779  * allocator
780  * return : found value at path
781  *
782  * Our implementation does not follow the JSON Pointer https://tools.ietf.org/html/rfc6901#section-4 standard fully
783  * We normalize json_pointers to json_paths and resolve token types independently of the document that gets operated
784  * by the normalized path.
785  * Therefore, we cannot traverse the doc contextually as described in the rfc e.g. both '{"0":10}' an '[10]' to provide
786  * same results for '/1' json_pointer.
787  */
788 void
789 JSON_PATH::set (JSON_VALUE &jd, const JSON_VALUE &jv, JSON_PRIVATE_MEMPOOL &allocator) const
790 {
791  JSON_VALUE *val = &jd;
792  for (const PATH_TOKEN &tkn : m_path_tokens)
793  {
794  switch (tkn.m_type)
795  {
796  case PATH_TOKEN::token_type::array_index:
797  case PATH_TOKEN::token_type::array_end_index:
798  if (!val->IsArray ())
799  {
800  val->SetArray ();
801  }
802  break;
803  case PATH_TOKEN::token_type::object_key:
804  if (!val->IsObject ())
805  {
806  val->SetObject ();
807  }
808  break;
809  case PATH_TOKEN::token_type::array_index_wildcard:
810  case PATH_TOKEN::token_type::object_key_wildcard:
811  case PATH_TOKEN::token_type::double_wildcard:
812  // error? unexpected set - wildcards not allowed for set
813  assert (false);
814  return;
815  }
816 
817  if (val->IsArray ())
818  {
819  JSON_VALUE::Array arr = val->GetArray ();
820  if (tkn.m_type == PATH_TOKEN::token_type::array_end_index)
821  {
822  // insert dummy
823  arr.PushBack (JSON_VALUE ().SetNull (), allocator);
824  val = &val->GetArray ()[val->GetArray ().Size () - 1];
825  }
826  else
827  {
828  rapidjson::SizeType idx = (rapidjson::SizeType) tkn.get_array_index ();
829  while (idx >= arr.Size ())
830  {
831  arr.PushBack (JSON_VALUE ().SetNull (), allocator);
832  }
833  val = &val->GetArray ()[idx];
834  }
835  }
836  else if (val->IsObject ())
837  {
838  std::string encoded_key = db_json_json_string_as_utf8 (tkn.get_object_key ());
839  JSON_VALUE::MemberIterator m = val->FindMember (encoded_key.c_str ());
840  if (m == val->MemberEnd ())
841  {
842  // insert dummy
843  unsigned int len = (rapidjson::SizeType) encoded_key.length ();
844  val->AddMember (JSON_VALUE (encoded_key.c_str (), len, allocator), JSON_VALUE ().SetNull (), allocator);
845 
846  val = & (--val->MemberEnd ())->value; // Assume AddMember() appends at the end
847  }
848  else
849  {
850  val = &m->value;
851  }
852  }
853  }
854 
855  val->CopyFrom (jv, allocator);
856 }
857 
858 JSON_VALUE *
860 {
861  return const_cast<JSON_VALUE *> (get (const_cast<const JSON_DOC &> (jd)));
862 }
863 
864 /*
865  * get () - Walk a doc following a path and retrive the value pointed at
866  *
867  * jd (in)
868  * return : found value at path
869  */
870 const JSON_VALUE *
871 JSON_PATH::get (const JSON_DOC &jd) const
872 {
873  const JSON_VALUE *val = &db_json_doc_to_value (jd);
874  for (const PATH_TOKEN &tkn : m_path_tokens)
875  {
876  if (val->IsArray ())
877  {
878  if (tkn.m_type != PATH_TOKEN::token_type::array_index)
879  {
880  return NULL;
881  }
882 
883  unsigned idx = tkn.get_array_index ();
884  if (idx >= val->GetArray ().Size ())
885  {
886  return NULL;
887  }
888 
889  val = &val->GetArray ()[idx];
890  }
891  else if (val->IsObject ())
892  {
893  if (tkn.m_type != PATH_TOKEN::token_type::object_key)
894  {
895  return NULL;
896  }
897  std::string encoded_key = db_json_json_string_as_utf8 (tkn.get_object_key ());
898  JSON_VALUE::ConstMemberIterator m = val->FindMember (encoded_key.c_str ());
899  if (m == val->MemberEnd ())
900  {
901  return NULL;
902  }
903  val = &m->value;
904  }
905  else
906  {
907  return NULL;
908  }
909  }
910  return val;
911 }
912 
913 void
914 JSON_PATH::extract_from_subtree (const JSON_PATH &path, size_t tkn_array_offset, const JSON_VALUE &jv,
915  std::unordered_set<const JSON_VALUE *> &vals_hash_set,
916  std::vector<const JSON_VALUE *> &vals)
917 {
918  if (tkn_array_offset == path.get_token_count ())
919  {
920  // No suffix remaining -> collect match
921  // Note: some nodes of the tree are encountered multiple times (only during double wildcards)
922  // therefore the use of unordered_set
923  if (vals_hash_set.find (&jv) == vals_hash_set.end ())
924  {
925  vals_hash_set.insert (&jv);
926  vals.push_back (&jv);
927  }
928  return;
929  }
930 
931  const PATH_TOKEN &crt_tkn = path.m_path_tokens[tkn_array_offset];
932  if (jv.IsArray ())
933  {
934  switch (crt_tkn.m_type)
935  {
936  case PATH_TOKEN::token_type::array_index:
937  {
938  unsigned idx = crt_tkn.get_array_index ();
939  if (idx >= jv.GetArray ().Size ())
940  {
941  return;
942  }
943  extract_from_subtree (path, tkn_array_offset + 1, jv.GetArray ()[idx], vals_hash_set, vals);
944  return;
945  }
946  case PATH_TOKEN::token_type::array_index_wildcard:
947  for (rapidjson::SizeType i = 0; i < jv.GetArray ().Size (); ++i)
948  {
949  extract_from_subtree (path, tkn_array_offset + 1, jv.GetArray ()[i], vals_hash_set, vals);
950  }
951  return;
952  case PATH_TOKEN::token_type::double_wildcard:
953  // Advance token_array_offset
954  extract_from_subtree (path, tkn_array_offset + 1, jv, vals_hash_set, vals);
955  for (rapidjson::SizeType i = 0; i < jv.GetArray ().Size (); ++i)
956  {
957  // Advance in tree, keep current token_array_offset
958  extract_from_subtree (path, tkn_array_offset, jv.GetArray ()[i], vals_hash_set, vals);
959  }
960  return;
961  default:
962  return;
963  }
964  }
965  else if (jv.IsObject ())
966  {
967  switch (crt_tkn.m_type)
968  {
969  case PATH_TOKEN::token_type::object_key:
970  {
971  std::string encoded_key = db_json_json_string_as_utf8 (crt_tkn.get_object_key ());
972  JSON_VALUE::ConstMemberIterator m = jv.FindMember (encoded_key.c_str ());
973  if (m == jv.MemberEnd ())
974  {
975  return;
976  }
977  extract_from_subtree (path, tkn_array_offset + 1, m->value, vals_hash_set, vals);
978  return;
979  }
980  case PATH_TOKEN::token_type::object_key_wildcard:
981  for (JSON_VALUE::ConstMemberIterator m = jv.MemberBegin (); m != jv.MemberEnd (); ++m)
982  {
983  extract_from_subtree (path, tkn_array_offset + 1, m->value, vals_hash_set, vals);
984  }
985  return;
986  case PATH_TOKEN::token_type::double_wildcard:
987  // Advance token_array_offset
988  extract_from_subtree (path, tkn_array_offset + 1, jv, vals_hash_set, vals);
989  for (JSON_VALUE::ConstMemberIterator m = jv.MemberBegin (); m != jv.MemberEnd (); ++m)
990  {
991  // Advance in tree, keep current token_array_offset
992  extract_from_subtree (path, tkn_array_offset, m->value, vals_hash_set, vals);
993  }
994  return;
995  default:
996  return;
997  }
998  }
999  // Json scalars are ignored if there is a remaining suffix
1000 }
1001 
1002 std::vector<const JSON_VALUE *>
1003 JSON_PATH::extract (const JSON_DOC &jd) const
1004 {
1005  std::unordered_set<const JSON_VALUE *> vals_hash_set;
1006  std::vector<const JSON_VALUE *> res;
1007 
1008  extract_from_subtree (*this, 0, db_json_doc_to_value (jd), vals_hash_set, res);
1009 
1010  return res;
1011 }
1012 
1013 bool
1015 {
1016  if (get_token_count () == 0)
1017  {
1018  return false;
1019  }
1020 
1021  JSON_VALUE *value = get_parent ().get (jd);
1022  if (value == nullptr)
1023  {
1024  return false;
1025  }
1026 
1027  const PATH_TOKEN &tkn = m_path_tokens.back ();
1028 
1029  if (value->IsArray ())
1030  {
1031  if (!is_last_array_index_less_than (value->GetArray ().Size ()))
1032  {
1033  return false;
1034  }
1035  value->Erase (value->Begin () + tkn.get_array_index ());
1036  return true;
1037  }
1038  else if (value->IsObject ())
1039  {
1040  if (tkn.m_type != PATH_TOKEN::object_key)
1041  {
1042  return false;
1043  }
1044  std::string encoded_key = db_json_json_string_as_utf8 (tkn.get_object_key ());
1045  return value->EraseMember (encoded_key.c_str ());
1046  }
1047 
1048  return false;
1049 }
1050 
1051 const PATH_TOKEN *
1053 {
1054  return get_token_count () > 0 ? &m_path_tokens[get_token_count () - 1] : NULL;
1055 }
1056 
1057 size_t
1059 {
1060  return m_path_tokens.size ();
1061 }
1062 
1063 bool
1065 {
1066  return get_token_count () == 0;
1067 }
1068 
1069 JSON_PATH
1071 {
1072  if (get_token_count () == 0)
1073  {
1074  // this should not happen
1075  assert (false);
1076  JSON_PATH parent;
1077  return parent;
1078  }
1079  else
1080  {
1081  // todo: improve getting a slice of the m_path_tokens vector
1082  JSON_PATH parent (*this);
1083  parent.pop ();
1084  return parent;
1085  }
1086 }
1087 
1088 bool
1090 {
1091  const PATH_TOKEN *last_token = get_last_token ();
1092  assert (last_token != NULL);
1093 
1094  return last_token->m_type == PATH_TOKEN::array_index && last_token->get_array_index () < size;
1095 }
1096 
1097 bool
1099 {
1100  return is_last_array_index_less_than (1);
1101 }
1102 
1103 bool
1105 {
1106  const PATH_TOKEN *last_token = get_last_token ();
1107  return (last_token != NULL && (last_token->m_type == PATH_TOKEN::array_index
1108  || (last_token->m_type == PATH_TOKEN::array_end_index)));
1109 }
1110 
1111 bool
1113 {
1114  if (get_token_count () == 0)
1115  {
1116  return false;
1117  }
1118 
1119  if (get_parent ().get (jd) != NULL)
1120  {
1121  return true;
1122  }
1123 
1124  return false;
1125 }
1126 
1127 /*
1128  * init ()
1129  *
1130  * path (in)
1131  * An sql_path is normalized to rapidjson standard path
1132  * Example: $[0]."name1".name2[2] -> /0/name1/name2/2
1133  */
1134 int
1135 JSON_PATH::parse (const char *path)
1136 {
1137  std::string sql_path_string (path);
1138  JSON_PATH_TYPE json_path_type = db_json_get_path_type (sql_path_string);
1139 
1140  if (json_path_type == JSON_PATH_TYPE::JSON_PATH_POINTER)
1141  {
1142  // path is not SQL path format; consider it JSON pointer.
1143  int error_code = from_json_pointer (sql_path_string);
1144  if (error_code != NO_ERROR)
1145  {
1146  ASSERT_ERROR ();
1147  }
1148  return error_code;
1149  }
1150 
1151  int error_code = validate_and_create_from_json_path (sql_path_string);
1152  if (error_code != NO_ERROR)
1153  {
1154  ASSERT_ERROR ();
1155  }
1156  return error_code;
1157 }
1158 
1159 int
1160 JSON_PATH::from_json_pointer (const std::string &pointer_path)
1161 {
1162  typedef rapidjson::GenericPointer<JSON_VALUE>::Token TOKEN;
1163  static const rapidjson::SizeType kPointerInvalidIndex = rapidjson::kPointerInvalidIndex;
1164 
1165  typedef rapidjson::GenericPointer<JSON_VALUE> JSON_POINTER;
1166 
1167  JSON_POINTER jp (pointer_path.c_str ());
1168  if (!jp.IsValid ())
1169  {
1171  return ER_JSON_INVALID_PATH;
1172  }
1173 
1174  size_t tkn_cnt = jp.GetTokenCount ();
1175  const TOKEN *tokens = jp.GetTokens ();
1176 
1177  // convert rapidjson's tokens to our tokens:
1178  for (size_t i = 0; i < tkn_cnt; ++i)
1179  {
1180  const TOKEN &rapid_token = tokens[i];
1181 
1182  if (rapid_token.index != kPointerInvalidIndex)
1183  {
1184  // array_index
1185  push_array_index (rapid_token.index);
1186  }
1187  else if (rapid_token.length == 1 && rapid_token.name[0] == '-' )
1188  {
1189  // '-' special idx token
1190  m_path_tokens.emplace_back (PATH_TOKEN::token_type::array_end_index, "-");
1191  }
1192  else
1193  {
1194  // object_key
1195  char *escaped;
1196  size_t escaped_size;
1197  db_string_escape_str (rapid_token.name, rapid_token.length, &escaped, &escaped_size);
1198 
1199  push_object_key (escaped);
1200  db_private_free (NULL, escaped);
1201  }
1202  }
1203 
1204  return NO_ERROR;
1205 }
JSON_VALUE & db_json_doc_to_value(JSON_DOC &doc)
token_containter_type m_path_tokens
const std::string & get_object_key() const
#define NO_ERROR
Definition: error_code.h:46
static bool match_pattern(const PATH_TOKEN &matcher, const PATH_TOKEN &matchee)
#define ASSERT_ERROR()
bool parent_exists(JSON_DOC &jd) const
bool points_to_array_cell() const
static bool db_json_path_is_valid_identifier_start_char(unsigned char ch)
int parse(const char *path)
static bool db_json_path_is_token_valid_quoted_object_key(const std::string &path, std::size_t &token_begin)
bool is_last_array_index_less_than(size_t size) const
bool contains_wildcard() const
bool is_root_path() const
const PATH_TOKEN * get_last_token() const
void push_array_index(unsigned long idx)
token_type m_type
static bool db_json_iszero(const unsigned char &ch)
bool is_wildcard() const
void er_set(int severity, const char *file_name, const int line_no, int err_id, int num_args,...)
rapidjson::MemoryPoolAllocator< JSON_PRIVATE_ALLOCATOR > JSON_PRIVATE_MEMPOOL
#define assert(x)
int prm_get_integer_value(PARAM_ID prm_id)
std::vector< const JSON_VALUE * > extract(const JSON_DOC &) const
JSON_PATH_TYPE
void push_object_key(std::string &&object_key)
static int db_json_path_is_token_valid_array_index(const std::string &str, bool allow_wildcards, unsigned long &index, std::size_t start=0, std::size_t end=0)
JSON_PATH get_parent() const
void push_object_key_wildcard()
#define NULL
Definition: freelistheap.h:34
static bool db_json_path_quote_and_validate_unquoted_object_key(std::string &path, std::size_t &token_begin)
std::string dump_json_path() const
#define db_private_free(thrd, ptr)
Definition: memory_alloc.h:229
#define ER_JSON_ARRAY_INDEX_TOO_LARGE
Definition: error_code.h:1557
static void db_json_remove_leading_zeros_index(std::string &index)
int db_json_path_unquote_object_keys(std::string &sql_path)
static std::size_t skip_whitespaces(const std::string &path, std::size_t token_begin)
static void db_json_trim_leading_spaces(std::string &path_string)
static void extract_from_subtree(const JSON_PATH &path, size_t tkn_array_offset, const JSON_VALUE &jv, std::unordered_set< const JSON_VALUE * > &unique_elements, std::vector< const JSON_VALUE * > &vals)
#define ER_JSON_INVALID_PATH
Definition: error_code.h:1549
unsigned long get_array_index() const
#define ARG_FILE_LINE
Definition: error_manager.h:44
void set(JSON_DOC &jd, const JSON_VALUE &jv) const
int db_string_escape_str(const char *src_str, size_t src_size, char **res_string, size_t *dest_size)
bool is_last_token_array_index_zero() const
void push_double_wildcard()
JSON_VALUE * get(JSON_DOC &jd) const
std::string db_json_json_string_as_utf8(std::string raw_json_string)
Definition: db_json.cpp:2893
static bool db_json_path_is_valid_identifier_char(unsigned char ch)
void er_clear(void)
static bool db_json_path_is_token_valid_unquoted_object_key(const std::string &path, std::size_t &token_begin)
int db_json_split_path_by_delimiters(const std::string &path, const std::string &delim, bool allow_empty, std::vector< std::string > &split_path)
rapidjson::GenericValue< JSON_ENCODING, JSON_PRIVATE_MEMPOOL > JSON_VALUE
int i
Definition: dynamic_load.c:954
std::string m_object_key
static MATCH_RESULT match_pattern(const JSON_PATH &pattern, const JSON_PATH &path)
int validate_and_create_from_json_path(std::string &sql_path)
size_t get_token_count() const
unsigned long m_array_idx
void push_array_index_wildcard()
bool erase(JSON_DOC &jd) const
static JSON_PATH_TYPE db_json_get_path_type(std::string &path_string)
int from_json_pointer(const std::string &pointer_path)
static bool db_json_isspace(const unsigned char &ch)