@@ -287,21 +287,22 @@ typedef struct
287
287
* arcs - outgoing arcs of this state (List of TrgmArc)
288
288
* enterKeys - enter keys reachable from this state without reading any
289
289
* predictable trigram (List of TrgmStateKey)
290
- * fin - flag indicating this state is final
291
- * init - flag indicating this state is initial
290
+ * flags - flag bits
292
291
* parent - parent state, if this state has been merged into another
293
- * children - child states (states that have been merged into this one)
292
+ * tentParent - planned parent state, if considering a merge
294
293
* number - number of this state (used at the packaging stage)
295
294
*/
295
+ #define TSTATE_INIT 0x01 /* flag indicating this state is initial */
296
+ #define TSTATE_FIN 0x02 /* flag indicating this state is final */
297
+
296
298
typedef struct TrgmState
297
299
{
298
300
TrgmStateKey stateKey ; /* hashtable key: must be first field */
299
301
List * arcs ;
300
302
List * enterKeys ;
301
- bool fin ;
302
- bool init ;
303
+ int flags ;
303
304
struct TrgmState * parent ;
304
- List * children ;
305
+ struct TrgmState * tentParent ;
305
306
int number ;
306
307
} TrgmState ;
307
308
@@ -569,7 +570,7 @@ createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph,
569
570
* get from the initial state to the final state without reading any
570
571
* predictable trigram.
571
572
*/
572
- if (trgmNFA .initState -> fin )
573
+ if (trgmNFA .initState -> flags & TSTATE_FIN )
573
574
return NULL ;
574
575
575
576
/*
@@ -896,7 +897,7 @@ transformGraph(TrgmNFA *trgmNFA)
896
897
initkey .nstate = pg_reg_getinitialstate (trgmNFA -> regex );
897
898
898
899
initstate = getState (trgmNFA , & initkey );
899
- initstate -> init = true ;
900
+ initstate -> flags |= TSTATE_INIT ;
900
901
trgmNFA -> initState = initstate ;
901
902
902
903
/*
@@ -914,7 +915,7 @@ transformGraph(TrgmNFA *trgmNFA)
914
915
* actual processing.
915
916
*/
916
917
if (trgmNFA -> overflowed )
917
- state -> fin = true ;
918
+ state -> flags |= TSTATE_FIN ;
918
919
else
919
920
processState (trgmNFA , state );
920
921
@@ -939,7 +940,7 @@ processState(TrgmNFA *trgmNFA, TrgmState *state)
939
940
* queue is empty. But we can quit if the state gets marked final.
940
941
*/
941
942
addKey (trgmNFA , state , & state -> stateKey );
942
- while (trgmNFA -> keysQueue != NIL && !state -> fin )
943
+ while (trgmNFA -> keysQueue != NIL && !( state -> flags & TSTATE_FIN ) )
943
944
{
944
945
TrgmStateKey * key = (TrgmStateKey * ) linitial (trgmNFA -> keysQueue );
945
946
@@ -951,7 +952,7 @@ processState(TrgmNFA *trgmNFA, TrgmState *state)
951
952
* Add outgoing arcs only if state isn't final (we have no interest in
952
953
* outgoing arcs if we already match)
953
954
*/
954
- if (!state -> fin )
955
+ if (!( state -> flags & TSTATE_FIN ) )
955
956
addArcs (trgmNFA , state );
956
957
}
957
958
@@ -960,7 +961,7 @@ processState(TrgmNFA *trgmNFA, TrgmState *state)
960
961
* whether this should result in any further enter keys being added.
961
962
* If so, add those keys to keysQueue so that processState will handle them.
962
963
*
963
- * If the enter key is for the NFA's final state, set state->fin = TRUE .
964
+ * If the enter key is for the NFA's final state, mark state as TSTATE_FIN .
964
965
* This situation means that we can reach the final state from this expanded
965
966
* state without reading any predictable trigram, so we must consider this
966
967
* state as an accepting one.
@@ -1030,7 +1031,7 @@ addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
1030
1031
/* If state is now known final, mark it and we're done */
1031
1032
if (key -> nstate == pg_reg_getfinalstate (trgmNFA -> regex ))
1032
1033
{
1033
- state -> fin = true ;
1034
+ state -> flags |= TSTATE_FIN ;
1034
1035
return ;
1035
1036
}
1036
1037
@@ -1356,10 +1357,9 @@ getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
1356
1357
/* New state: initialize and queue it */
1357
1358
state -> arcs = NIL ;
1358
1359
state -> enterKeys = NIL ;
1359
- state -> init = false;
1360
- state -> fin = false;
1360
+ state -> flags = 0 ;
1361
1361
state -> parent = NULL ;
1362
- state -> children = NIL ;
1362
+ state -> tentParent = NULL ;
1363
1363
state -> number = -1 ;
1364
1364
1365
1365
trgmNFA -> queue = lappend (trgmNFA -> queue , state );
@@ -1538,20 +1538,60 @@ selectColorTrigrams(TrgmNFA *trgmNFA)
1538
1538
TrgmArcInfo * arcInfo = (TrgmArcInfo * ) lfirst (cell );
1539
1539
TrgmState * source = arcInfo -> source ,
1540
1540
* target = arcInfo -> target ;
1541
+ int source_flags ,
1542
+ target_flags ;
1541
1543
1542
1544
/* examine parent states, if any merging has already happened */
1543
1545
while (source -> parent )
1544
1546
source = source -> parent ;
1545
1547
while (target -> parent )
1546
1548
target = target -> parent ;
1547
1549
1548
- if ((source -> init || target -> init ) &&
1549
- (source -> fin || target -> fin ))
1550
+ /* we must also consider merges we are planning right now */
1551
+ source_flags = source -> flags ;
1552
+ while (source -> tentParent )
1553
+ {
1554
+ source = source -> tentParent ;
1555
+ source_flags |= source -> flags ;
1556
+ }
1557
+ target_flags = target -> flags ;
1558
+ while (target -> tentParent )
1559
+ {
1560
+ target = target -> tentParent ;
1561
+ target_flags |= target -> flags ;
1562
+ }
1563
+
1564
+ /* would fully-merged state have both INIT and FIN set? */
1565
+ if (((source_flags | target_flags ) & (TSTATE_INIT | TSTATE_FIN )) ==
1566
+ (TSTATE_INIT | TSTATE_FIN ))
1550
1567
{
1551
1568
canRemove = false;
1552
1569
break ;
1553
1570
}
1571
+
1572
+ /* ok so far, so remember planned merge */
1573
+ if (source != target )
1574
+ target -> tentParent = source ;
1554
1575
}
1576
+
1577
+ /* We must clear all the tentParent fields before continuing */
1578
+ foreach (cell , trgmInfo -> arcs )
1579
+ {
1580
+ TrgmArcInfo * arcInfo = (TrgmArcInfo * ) lfirst (cell );
1581
+ TrgmState * target = arcInfo -> target ;
1582
+ TrgmState * ttarget ;
1583
+
1584
+ while (target -> parent )
1585
+ target = target -> parent ;
1586
+
1587
+ while ((ttarget = target -> tentParent ) != NULL )
1588
+ {
1589
+ target -> tentParent = NULL ;
1590
+ target = ttarget ;
1591
+ }
1592
+ }
1593
+
1594
+ /* Now, move on if we can't drop this trigram */
1555
1595
if (!canRemove )
1556
1596
continue ;
1557
1597
@@ -1567,7 +1607,12 @@ selectColorTrigrams(TrgmNFA *trgmNFA)
1567
1607
while (target -> parent )
1568
1608
target = target -> parent ;
1569
1609
if (source != target )
1610
+ {
1570
1611
mergeStates (source , target );
1612
+ /* Assert we didn't merge initial and final states */
1613
+ Assert ((source -> flags & (TSTATE_INIT | TSTATE_FIN )) !=
1614
+ (TSTATE_INIT | TSTATE_FIN ));
1615
+ }
1571
1616
}
1572
1617
1573
1618
/* Mark trigram unexpanded, and update totalTrgmCount */
@@ -1709,27 +1754,15 @@ fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
1709
1754
static void
1710
1755
mergeStates (TrgmState * state1 , TrgmState * state2 )
1711
1756
{
1712
- ListCell * cell ;
1713
-
1714
1757
Assert (state1 != state2 );
1715
1758
Assert (!state1 -> parent );
1716
1759
Assert (!state2 -> parent );
1717
1760
1718
- /* state1 absorbs state2's init/fin flags */
1719
- state1 -> init |= state2 -> init ;
1720
- state1 -> fin |= state2 -> fin ;
1761
+ /* state1 absorbs state2's flags */
1762
+ state1 -> flags |= state2 -> flags ;
1721
1763
1722
- /* state2, and all its children, become children of state1 */
1723
- foreach (cell , state2 -> children )
1724
- {
1725
- TrgmState * state = (TrgmState * ) lfirst (cell );
1726
-
1727
- state -> parent = state1 ;
1728
- }
1764
+ /* state2, and indirectly all its children, become children of state1 */
1729
1765
state2 -> parent = state1 ;
1730
- state1 -> children = list_concat (state1 -> children , state2 -> children );
1731
- state1 -> children = lappend (state1 -> children , state2 );
1732
- state2 -> children = NIL ;
1733
1766
}
1734
1767
1735
1768
/*
@@ -1798,9 +1831,9 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
1798
1831
1799
1832
if (state -> number < 0 )
1800
1833
{
1801
- if (state -> init )
1834
+ if (state -> flags & TSTATE_INIT )
1802
1835
state -> number = 0 ;
1803
- else if (state -> fin )
1836
+ else if (state -> flags & TSTATE_FIN )
1804
1837
state -> number = 1 ;
1805
1838
else
1806
1839
{
@@ -2064,9 +2097,9 @@ printTrgmNFA(TrgmNFA *trgmNFA)
2064
2097
ListCell * cell ;
2065
2098
2066
2099
appendStringInfo (& buf , "s%p" , (void * ) state );
2067
- if (state -> fin )
2100
+ if (state -> flags & TSTATE_FIN )
2068
2101
appendStringInfo (& buf , " [shape = doublecircle]" );
2069
- if (state -> init )
2102
+ if (state -> flags & TSTATE_INIT )
2070
2103
initstate = state ;
2071
2104
appendStringInfo (& buf , " [label = \"%d\"]" , state -> stateKey .nstate );
2072
2105
appendStringInfo (& buf , ";\n" );
0 commit comments