xref: /aosp_15_r20/external/icu/icu4c/source/common/rbbinode.cpp (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 *   Copyright (C) 2002-2016 International Business Machines Corporation   *
6 *   and others. All rights reserved.                                      *
7 ***************************************************************************
8 */
9 
10 //
11 //  File:  rbbinode.cpp
12 //
13 //         Implementation of class RBBINode, which represents a node in the
14 //         tree generated when parsing the Rules Based Break Iterator rules.
15 //
16 //         This "Class" is actually closer to a struct.
17 //         Code using it is expected to directly access fields much of the time.
18 //
19 
20 #include "unicode/utypes.h"
21 
22 #if !UCONFIG_NO_BREAK_ITERATION
23 
24 #include "unicode/unistr.h"
25 #include "unicode/uniset.h"
26 #include "unicode/uchar.h"
27 #include "unicode/parsepos.h"
28 
29 #include "cstr.h"
30 #include "uvector.h"
31 
32 #include "rbbirb.h"
33 #include "rbbinode.h"
34 
35 #include "uassert.h"
36 
37 
38 U_NAMESPACE_BEGIN
39 
40 #ifdef RBBI_DEBUG
41 static int  gLastSerial = 0;
42 #endif
43 
44 
45 //-------------------------------------------------------------------------
46 //
47 //    Constructor.   Just set the fields to reasonable default values.
48 //
49 //-------------------------------------------------------------------------
RBBINode(NodeType t)50 RBBINode::RBBINode(NodeType t) : UMemory() {
51 #ifdef RBBI_DEBUG
52     fSerialNum    = ++gLastSerial;
53 #endif
54     fType         = t;
55     fParent       = nullptr;
56     fLeftChild    = nullptr;
57     fRightChild   = nullptr;
58     fInputSet     = nullptr;
59     fFirstPos     = 0;
60     fLastPos      = 0;
61     fNullable     = false;
62     fLookAheadEnd = false;
63     fRuleRoot     = false;
64     fChainIn      = false;
65     fVal          = 0;
66     fPrecedence   = precZero;
67 
68     UErrorCode     status = U_ZERO_ERROR;
69     fFirstPosSet  = new UVector(status);  // TODO - get a real status from somewhere
70     fLastPosSet   = new UVector(status);
71     fFollowPos    = new UVector(status);
72     if      (t==opCat)    {fPrecedence = precOpCat;}
73     else if (t==opOr)     {fPrecedence = precOpOr;}
74     else if (t==opStart)  {fPrecedence = precStart;}
75     else if (t==opLParen) {fPrecedence = precLParen;}
76 
77 }
78 
79 
RBBINode(const RBBINode & other)80 RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
81 #ifdef RBBI_DEBUG
82     fSerialNum   = ++gLastSerial;
83 #endif
84     fType        = other.fType;
85     fParent      = nullptr;
86     fLeftChild   = nullptr;
87     fRightChild  = nullptr;
88     fInputSet    = other.fInputSet;
89     fPrecedence  = other.fPrecedence;
90     fText        = other.fText;
91     fFirstPos    = other.fFirstPos;
92     fLastPos     = other.fLastPos;
93     fNullable    = other.fNullable;
94     fVal         = other.fVal;
95     fRuleRoot    = false;
96     fChainIn     = other.fChainIn;
97     UErrorCode     status = U_ZERO_ERROR;
98     fFirstPosSet = new UVector(status);   // TODO - get a real status from somewhere
99     fLastPosSet  = new UVector(status);
100     fFollowPos   = new UVector(status);
101 }
102 
103 
104 //-------------------------------------------------------------------------
105 //
106 //    Destructor.   Deletes both this node AND any child nodes,
107 //                  except in the case of variable reference nodes.  For
108 //                  these, the l. child points back to the definition, which
109 //                  is common for all references to the variable, meaning
110 //                  it can't be deleted here.
111 //
112 //-------------------------------------------------------------------------
~RBBINode()113 RBBINode::~RBBINode() {
114     // printf("deleting node %8x   serial %4d\n", this, this->fSerialNum);
115     delete fInputSet;
116     fInputSet = nullptr;
117 
118     switch (this->fType) {
119     case varRef:
120     case setRef:
121         // for these node types, multiple instances point to the same "children"
122         // Storage ownership of children handled elsewhere.  Don't delete here.
123         break;
124 
125     default:
126         // Avoid using a recursive implementation because of stack overflow problems.
127         // See bug ICU-22584.
128         // delete        fLeftChild;
129         NRDeleteNode(fLeftChild);
130         fLeftChild =   nullptr;
131         // delete        fRightChild;
132         NRDeleteNode(fRightChild);
133         fRightChild = nullptr;
134     }
135 
136     delete fFirstPosSet;
137     delete fLastPosSet;
138     delete fFollowPos;
139 }
140 
141 /**
142  * Non-recursive delete of a node + its children. Used from the node destructor
143  * instead of the more obvious recursive implementation to avoid problems with
144  * stack overflow with some perverse test rule data (from fuzzing).
145  */
NRDeleteNode(RBBINode * node)146 void RBBINode::NRDeleteNode(RBBINode *node) {
147     if (node == nullptr) {
148         return;
149     }
150 
151     RBBINode *stopNode = node->fParent;
152     RBBINode *nextNode = node;
153     while (nextNode != stopNode && nextNode != nullptr) {
154         RBBINode *currentNode = nextNode;
155 
156         if ((currentNode->fLeftChild == nullptr && currentNode->fRightChild == nullptr) ||
157                 currentNode->fType == varRef ||      // varRef and setRef nodes do not
158                 currentNode->fType == setRef) {      // own their children nodes.
159             // CurrentNode is effectively a leaf node; it's safe to go ahead and delete it.
160             nextNode = currentNode->fParent;
161             if (nextNode) {
162                 if (nextNode->fLeftChild == currentNode) {
163                     nextNode->fLeftChild = nullptr;
164                 } else if (nextNode->fRightChild == currentNode) {
165                     nextNode->fRightChild = nullptr;
166                 }
167             }
168             delete currentNode;
169         } else if (currentNode->fLeftChild) {
170             nextNode = currentNode->fLeftChild;
171             if (nextNode->fParent == nullptr) {
172                 nextNode->fParent = currentNode;
173                 // fParent isn't always set; do it now if not.
174             }
175             U_ASSERT(nextNode->fParent == currentNode);
176         } else if (currentNode->fRightChild) {
177             nextNode = currentNode->fRightChild;
178             if (nextNode->fParent == nullptr) {
179                 nextNode->fParent = currentNode;
180                 // fParent isn't always set; do it now if not.
181             }
182             U_ASSERT(nextNode->fParent == currentNode);
183         }
184     }
185 }
186 
187 //-------------------------------------------------------------------------
188 //
189 //    cloneTree     Make a copy of the subtree rooted at this node.
190 //                  Discard any variable references encountered along the way,
191 //                  and replace with copies of the variable's definitions.
192 //                  Used to replicate the expression underneath variable
193 //                  references in preparation for generating the DFA tables.
194 //
195 //-------------------------------------------------------------------------
cloneTree()196 RBBINode *RBBINode::cloneTree() {
197     RBBINode    *n;
198 
199     if (fType == RBBINode::varRef) {
200         // If the current node is a variable reference, skip over it
201         //   and clone the definition of the variable instead.
202         n = fLeftChild->cloneTree();
203     } else if (fType == RBBINode::uset) {
204         n = this;
205     } else {
206         n = new RBBINode(*this);
207         // Check for null pointer.
208         if (n != nullptr) {
209             if (fLeftChild != nullptr) {
210                 n->fLeftChild          = fLeftChild->cloneTree();
211                 n->fLeftChild->fParent = n;
212             }
213             if (fRightChild != nullptr) {
214                 n->fRightChild          = fRightChild->cloneTree();
215                 n->fRightChild->fParent = n;
216             }
217         }
218     }
219     return n;
220 }
221 
222 
223 
224 //-------------------------------------------------------------------------
225 //
226 //   flattenVariables   Walk a parse tree, replacing any variable
227 //                      references with a copy of the variable's definition.
228 //                      Aside from variables, the tree is not changed.
229 //
230 //                      Return the root of the tree.  If the root was not a variable
231 //                      reference, it remains unchanged - the root we started with
232 //                      is the root we return.  If, however, the root was a variable
233 //                      reference, the root of the newly cloned replacement tree will
234 //                      be returned, and the original tree deleted.
235 //
236 //                      This function works by recursively walking the tree
237 //                      without doing anything until a variable reference is
238 //                      found, then calling cloneTree() at that point.  Any
239 //                      nested references are handled by cloneTree(), not here.
240 //
241 //-------------------------------------------------------------------------
242 constexpr int kRecursiveDepthLimit = 3500;
flattenVariables(UErrorCode & status,int depth)243 RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
244     if (U_FAILURE(status)) {
245         return this;
246     }
247     // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
248     // to avoid stack overflow crash.
249     if (depth > kRecursiveDepthLimit) {
250         status = U_INPUT_TOO_LONG_ERROR;
251         return this;
252     }
253     if (fType == varRef) {
254         RBBINode *retNode  = fLeftChild->cloneTree();
255         if (retNode != nullptr) {
256             retNode->fRuleRoot = this->fRuleRoot;
257             retNode->fChainIn  = this->fChainIn;
258         }
259         delete this;   // TODO: undefined behavior. Fix.
260         return retNode;
261     }
262 
263     if (fLeftChild != nullptr) {
264         fLeftChild = fLeftChild->flattenVariables(status, depth+1);
265         fLeftChild->fParent  = this;
266     }
267     if (fRightChild != nullptr) {
268         fRightChild = fRightChild->flattenVariables(status, depth+1);
269         fRightChild->fParent = this;
270     }
271     return this;
272 }
273 
274 
275 //-------------------------------------------------------------------------
276 //
277 //  flattenSets    Walk the parse tree, replacing any nodes of type setRef
278 //                 with a copy of the expression tree for the set.  A set's
279 //                 equivalent expression tree is precomputed and saved as
280 //                 the left child of the uset node.
281 //
282 //-------------------------------------------------------------------------
flattenSets()283 void RBBINode::flattenSets() {
284     U_ASSERT(fType != setRef);
285 
286     if (fLeftChild != nullptr) {
287         if (fLeftChild->fType==setRef) {
288             RBBINode *setRefNode = fLeftChild;
289             RBBINode *usetNode   = setRefNode->fLeftChild;
290             RBBINode *replTree   = usetNode->fLeftChild;
291             fLeftChild           = replTree->cloneTree();
292             fLeftChild->fParent  = this;
293             delete setRefNode;
294         } else {
295             fLeftChild->flattenSets();
296         }
297     }
298 
299     if (fRightChild != nullptr) {
300         if (fRightChild->fType==setRef) {
301             RBBINode *setRefNode = fRightChild;
302             RBBINode *usetNode   = setRefNode->fLeftChild;
303             RBBINode *replTree   = usetNode->fLeftChild;
304             fRightChild           = replTree->cloneTree();
305             fRightChild->fParent  = this;
306             delete setRefNode;
307         } else {
308             fRightChild->flattenSets();
309         }
310     }
311 }
312 
313 
314 
315 //-------------------------------------------------------------------------
316 //
317 //   findNodes()     Locate all the nodes of the specified type, starting
318 //                   at the specified root.
319 //
320 //-------------------------------------------------------------------------
findNodes(UVector * dest,RBBINode::NodeType kind,UErrorCode & status)321 void   RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
322     /* test for buffer overflows */
323     if (U_FAILURE(status)) {
324         return;
325     }
326     U_ASSERT(!dest->hasDeleter());
327     if (fType == kind) {
328         dest->addElement(this, status);
329     }
330     if (fLeftChild != nullptr) {
331         fLeftChild->findNodes(dest, kind, status);
332     }
333     if (fRightChild != nullptr) {
334         fRightChild->findNodes(dest, kind, status);
335     }
336 }
337 
338 
339 //-------------------------------------------------------------------------
340 //
341 //    print.         Print out a single node, for debugging.
342 //
343 //-------------------------------------------------------------------------
344 #ifdef RBBI_DEBUG
345 
serial(const RBBINode * node)346 static int32_t serial(const RBBINode *node) {
347     return (node == nullptr? -1 : node->fSerialNum);
348 }
349 
350 
printNode(const RBBINode * node)351 void RBBINode::printNode(const RBBINode *node) {
352     static const char * const nodeTypeNames[] = {
353                 "setRef",
354                 "uset",
355                 "varRef",
356                 "leafChar",
357                 "lookAhead",
358                 "tag",
359                 "endMark",
360                 "opStart",
361                 "opCat",
362                 "opOr",
363                 "opStar",
364                 "opPlus",
365                 "opQuestion",
366                 "opBreak",
367                 "opReverse",
368                 "opLParen"
369     };
370 
371     if (node==nullptr) {
372         RBBIDebugPrintf("%10p", (void *)node);
373     } else {
374         RBBIDebugPrintf("%10p %5d %12s %c%c  %5d       %5d     %5d       %6d     %d ",
375             (void *)node, node->fSerialNum, nodeTypeNames[node->fType],
376             node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
377             serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
378             node->fFirstPos, node->fVal);
379         if (node->fType == varRef) {
380             RBBI_DEBUG_printUnicodeString(node->fText);
381         }
382     }
383     RBBIDebugPrintf("\n");
384 }
385 #endif
386 
387 
388 #ifdef RBBI_DEBUG
RBBI_DEBUG_printUnicodeString(const UnicodeString & s,int minWidth)389 U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
390     RBBIDebugPrintf("%*s", minWidth, CStr(s)());
391 }
392 #endif
393 
394 
395 //-------------------------------------------------------------------------
396 //
397 //    print.         Print out the tree of nodes rooted at "this"
398 //
399 //-------------------------------------------------------------------------
400 #ifdef RBBI_DEBUG
printNodeHeader()401 void RBBINode::printNodeHeader() {
402     RBBIDebugPrintf(" Address   serial        type     LeftChild  RightChild   Parent   position value\n");
403 }
404 
printTree(const RBBINode * node,UBool printHeading)405 void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
406     if (printHeading) {
407         printNodeHeader();
408     }
409     printNode(node);
410     if (node != nullptr) {
411         // Only dump the definition under a variable reference if asked to.
412         // Unconditionally dump children of all other node types.
413         if (node->fType != varRef) {
414             if (node->fLeftChild != nullptr) {
415                 printTree(node->fLeftChild, false);
416             }
417 
418             if (node->fRightChild != nullptr) {
419                 printTree(node->fRightChild, false);
420             }
421         }
422     }
423 }
424 #endif
425 
426 
427 
428 U_NAMESPACE_END
429 
430 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
431