CUBRID Engine  latest
scan_json_table.hpp
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Search Solution Corporation
3  * Copyright 2016 CUBRID Corporation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 //
20 // scan_json_table.hpp - interface json table scanning
21 //
22 // JSON Table Scanner explained
23 //
24 // Behavior - todo - add manual reference here
25 //
26 // The syntax of JSON table is something like this:
27 // ... JSON_TABLE (expression,
28 // '$.[*] COLUMNS(rownum FOR ORDINALITY.
29 // a STRING PATH '$.a',
30 // b INT EXISTS '$.b',
31 // NESTED PATH '$.arr[*]' COLUMNS (c JSON PATH '$.c') as jt
32 // WHERE b != 0 and a > d...
33 //
34 // Expression is the input JSON for table. Each element found in the array of COLUMNS path is expanded into a row.
35 // (above example expands root of JSON and then the array at $.c).
36 //
37 // For each row found in root ('$'), column 'a' is value of $.a converted to string, column 'b' is 1 if $.b exists
38 // and 0 otherwise; these values are repeated for each element found in $.arr, extracting the value of $.arr[*].c
39 //
40 // Rows that do not pass the WHERE check are filtered.
41 //
42 // NOTE: if there are multiple nested paths to the same node, they are not cross-joined. while one nested path is
43 // expanded, the values for sibling nested paths will be all null.
44 //
45 //
46 // Implementation
47 //
48 // A root scan node is always used based on the input JSON (result of expression) and the first COLUMNS path. For
49 // each NESTED PATH, a child scan node is generated (a node may have no, one or multiple children scan nodes).
50 //
51 // Each scanner::next_scan call generates one row, or none if it was consumed entirely. It starts by generating a
52 // small row for root node. If it is has (nested) children, for each child one by one, it computes the input node by
53 // extracting nested node path from its root input and repeats same process until a leaf node reached.
54 //
55 // When a leaf-level node row is generated, the scan row is considered complete and next_scan returns success.
56 //
57 // A "breadcrumb" like cursor is used to remember where last row is generated. It generates a new row on the same
58 // leaf node if possible, or clears all values for this node and returns to its parent (non-leaf node).
59 //
60 // The parent will then try to advance to another children, or if all children have been processed, it will generate
61 // a new row.
62 //
63 // The process is repeated recursively until all nodes have been consumed and other rows can no longer be generated.
64 //
65 //
66 // Future
67 //
68 // Rows are filtered after a complete row is generated. We could partition the scan predicate on scan nodes and
69 // filter invalid rows at node level, cutting of an entire branch of rows that would all be invalid.
70 //
71 
72 #ifndef _SCAN_JSON_TABLE_HPP_
73 #define _SCAN_JSON_TABLE_HPP_
74 
75 #include "query_evaluator.h"
76 #include "storage_common.h"
77 
78 #include <vector>
79 
80 // forward definitions
81 // access_json_table.hpp
82 namespace cubxasl
83 {
84  namespace json_table
85  {
86  struct spec_node;
87  struct node;
88  struct column;
89  }
90 }
91 // db_json.hpp
92 class JSON_DOC;
93 class JSON_ITERATOR;
94 // scan_manager.h
95 struct scan_id_struct;
96 struct val_descr;
97 struct xasl_node;
98 
99 // thread_entry.hpp
100 namespace cubthread
101 {
102  class entry;
103 }
104 
105 namespace cubscan
106 {
107  namespace json_table
108  {
109  class scanner
110  {
111  public:
112 
113  // initialize scanner
114  void init (cubxasl::json_table::spec_node &spec);
115  // clear scanner
116  void clear (xasl_node *xasl_p, bool is_final, bool is_final_clear);
117 
118  // open a new scan
119  int open (cubthread::entry *thread_p);
120  // end a scan
121  void end (cubthread::entry *thread_p);
122 
123  // next_scan - generate a new row
124  //
125  // returns error code or NO_ERROR
126  //
127  // sid (in/out) : status and position is updated based on the success of scan
128  int next_scan (cubthread::entry *thread_p, scan_id_struct &sid, SCAN_CODE &sc);
129 
130  SCAN_PRED &get_predicate ();
131  void set_value_descriptor (val_descr *vd);
132 
133  scanner () = default;
134 
135  private:
136  // cursor used to track scanner progress and resume scan on each scan_next call; implementation in cpp file
137  struct cursor;
138 
139  // clear columns fetched values
140  void clear_node_columns (cubxasl::json_table::node &node);
141  // reset node ordinality (row number)
142  void reset_ordinality (cubxasl::json_table::node &node);
143 
144  // init iterators considering the expansion type
145  void init_iterators (cubxasl::json_table::node &node);
146 
147  // cursor functions
148  int init_cursor (const JSON_DOC &doc, cubxasl::json_table::node &node, cursor &cursor_out);
149  int set_next_cursor (const cursor &current_cursor, size_t next_depth);
150 
151  // to start scanning a node, an input document is set
152  int set_input_document (cursor &cursor, const cubxasl::json_table::node &node, const JSON_DOC &document);
153 
154  // compute scan tree height; recursive function
155  size_t get_tree_height (const cubxasl::json_table::node &node);
156 
157  // recursive scan next called on json table node / cursor
158  int scan_next_internal (cubthread::entry *thread_p, size_t depth, bool &found_row_output);
159 
160  cubxasl::json_table::spec_node *m_specp; // pointer to json table spec node in XASL
161  cursor *m_scan_cursor; // cursor to keep track progress in each scan node
162  size_t m_scan_cursor_depth; // the current level where the cursor was left
163  size_t m_tree_height; // will be used to initialize cursor vector
164  scan_pred m_scan_predicate; // scan predicate to filter generated rows
166  };
167  } // namespace json_table
168 } // namespace cubscan
169 
170 // naming convention of SCAN_ID's
172 
173 #endif // _SCAN_JSON_TABLE_HPP_
SCAN_CODE
cubxasl::json_table::spec_node * m_specp
void clear(cub_regex_object *&regex, char *&pattern)