@@ -318,21 +318,22 @@ typedef struct
318
318
* arcs - outgoing arcs of this state (List of TrgmArc)
319
319
* enterKeys - enter keys reachable from this state without reading any
320
320
* predictable trigram (List of TrgmStateKey)
321
- * fin - flag indicating this state is final
322
- * init - flag indicating this state is initial
321
+ * flags - flag bits
323
322
* parent - parent state, if this state has been merged into another
324
- * children - child states (states that have been merged into this one)
323
+ * tentParent - planned parent state, if considering a merge
325
324
* number - number of this state (used at the packaging stage)
326
325
*/
326
+ #define TSTATE_INIT 0x01 /* flag indicating this state is initial */
327
+ #define TSTATE_FIN 0x02 /* flag indicating this state is final */
328
+
327
329
typedef struct TrgmState
328
330
{
329
331
TrgmStateKey stateKey ; /* hashtable key: must be first field */
330
332
List * arcs ;
331
333
List * enterKeys ;
332
- bool fin ;
333
- bool init ;
334
+ int flags ;
334
335
struct TrgmState * parent ;
335
- List * children ;
336
+ struct TrgmState * tentParent ;
336
337
int number ;
337
338
} TrgmState ;
338
339
@@ -601,7 +602,7 @@ createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph,
601
602
* get from the initial state to the final state without reading any
602
603
* predictable trigram.
603
604
*/
604
- if (trgmNFA .initState -> fin )
605
+ if (trgmNFA .initState -> flags & TSTATE_FIN )
605
606
return NULL ;
606
607
607
608
/*
@@ -928,7 +929,7 @@ transformGraph(TrgmNFA *trgmNFA)
928
929
initkey .nstate = pg_reg_getinitialstate (trgmNFA -> regex );
929
930
930
931
initstate = getState (trgmNFA , & initkey );
931
- initstate -> init = true ;
932
+ initstate -> flags |= TSTATE_INIT ;
932
933
trgmNFA -> initState = initstate ;
933
934
934
935
/*
@@ -946,7 +947,7 @@ transformGraph(TrgmNFA *trgmNFA)
946
947
* actual processing.
947
948
*/
948
949
if (trgmNFA -> overflowed )
949
- state -> fin = true ;
950
+ state -> flags |= TSTATE_FIN ;
950
951
else
951
952
processState (trgmNFA , state );
952
953
@@ -971,7 +972,7 @@ processState(TrgmNFA *trgmNFA, TrgmState *state)
971
972
* queue is empty. But we can quit if the state gets marked final.
972
973
*/
973
974
addKey (trgmNFA , state , & state -> stateKey );
974
- while (trgmNFA -> keysQueue != NIL && !state -> fin )
975
+ while (trgmNFA -> keysQueue != NIL && !( state -> flags & TSTATE_FIN ) )
975
976
{
976
977
TrgmStateKey * key = (TrgmStateKey * ) linitial (trgmNFA -> keysQueue );
977
978
@@ -983,7 +984,7 @@ processState(TrgmNFA *trgmNFA, TrgmState *state)
983
984
* Add outgoing arcs only if state isn't final (we have no interest in
984
985
* outgoing arcs if we already match)
985
986
*/
986
- if (!state -> fin )
987
+ if (!( state -> flags & TSTATE_FIN ) )
987
988
addArcs (trgmNFA , state );
988
989
}
989
990
@@ -992,7 +993,7 @@ processState(TrgmNFA *trgmNFA, TrgmState *state)
992
993
* whether this should result in any further enter keys being added.
993
994
* If so, add those keys to keysQueue so that processState will handle them.
994
995
*
995
- * If the enter key is for the NFA's final state, set state->fin = TRUE .
996
+ * If the enter key is for the NFA's final state, mark state as TSTATE_FIN .
996
997
* This situation means that we can reach the final state from this expanded
997
998
* state without reading any predictable trigram, so we must consider this
998
999
* state as an accepting one.
@@ -1062,7 +1063,7 @@ addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
1062
1063
/* If state is now known final, mark it and we're done */
1063
1064
if (key -> nstate == pg_reg_getfinalstate (trgmNFA -> regex ))
1064
1065
{
1065
- state -> fin = true ;
1066
+ state -> flags |= TSTATE_FIN ;
1066
1067
return ;
1067
1068
}
1068
1069
@@ -1388,10 +1389,9 @@ getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
1388
1389
/* New state: initialize and queue it */
1389
1390
state -> arcs = NIL ;
1390
1391
state -> enterKeys = NIL ;
1391
- state -> init = false;
1392
- state -> fin = false;
1392
+ state -> flags = 0 ;
1393
1393
state -> parent = NULL ;
1394
- state -> children = NIL ;
1394
+ state -> tentParent = NULL ;
1395
1395
state -> number = -1 ;
1396
1396
1397
1397
trgmNFA -> queue = lappend (trgmNFA -> queue , state );
@@ -1585,20 +1585,60 @@ selectColorTrigrams(TrgmNFA *trgmNFA)
1585
1585
TrgmArcInfo * arcInfo = (TrgmArcInfo * ) lfirst (cell );
1586
1586
TrgmState * source = arcInfo -> source ,
1587
1587
* target = arcInfo -> target ;
1588
+ int source_flags ,
1589
+ target_flags ;
1588
1590
1589
1591
/* examine parent states, if any merging has already happened */
1590
1592
while (source -> parent )
1591
1593
source = source -> parent ;
1592
1594
while (target -> parent )
1593
1595
target = target -> parent ;
1594
1596
1595
- if ((source -> init || target -> init ) &&
1596
- (source -> fin || target -> fin ))
1597
+ /* we must also consider merges we are planning right now */
1598
+ source_flags = source -> flags ;
1599
+ while (source -> tentParent )
1600
+ {
1601
+ source = source -> tentParent ;
1602
+ source_flags |= source -> flags ;
1603
+ }
1604
+ target_flags = target -> flags ;
1605
+ while (target -> tentParent )
1606
+ {
1607
+ target = target -> tentParent ;
1608
+ target_flags |= target -> flags ;
1609
+ }
1610
+
1611
+ /* would fully-merged state have both INIT and FIN set? */
1612
+ if (((source_flags | target_flags ) & (TSTATE_INIT | TSTATE_FIN )) ==
1613
+ (TSTATE_INIT | TSTATE_FIN ))
1597
1614
{
1598
1615
canRemove = false;
1599
1616
break ;
1600
1617
}
1618
+
1619
+ /* ok so far, so remember planned merge */
1620
+ if (source != target )
1621
+ target -> tentParent = source ;
1601
1622
}
1623
+
1624
+ /* We must clear all the tentParent fields before continuing */
1625
+ foreach (cell , trgmInfo -> arcs )
1626
+ {
1627
+ TrgmArcInfo * arcInfo = (TrgmArcInfo * ) lfirst (cell );
1628
+ TrgmState * target = arcInfo -> target ;
1629
+ TrgmState * ttarget ;
1630
+
1631
+ while (target -> parent )
1632
+ target = target -> parent ;
1633
+
1634
+ while ((ttarget = target -> tentParent ) != NULL )
1635
+ {
1636
+ target -> tentParent = NULL ;
1637
+ target = ttarget ;
1638
+ }
1639
+ }
1640
+
1641
+ /* Now, move on if we can't drop this trigram */
1602
1642
if (!canRemove )
1603
1643
continue ;
1604
1644
@@ -1614,7 +1654,12 @@ selectColorTrigrams(TrgmNFA *trgmNFA)
1614
1654
while (target -> parent )
1615
1655
target = target -> parent ;
1616
1656
if (source != target )
1657
+ {
1617
1658
mergeStates (source , target );
1659
+ /* Assert we didn't merge initial and final states */
1660
+ Assert ((source -> flags & (TSTATE_INIT | TSTATE_FIN )) !=
1661
+ (TSTATE_INIT | TSTATE_FIN ));
1662
+ }
1618
1663
}
1619
1664
1620
1665
/* Mark trigram unexpanded, and update totals */
@@ -1757,27 +1802,15 @@ fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
1757
1802
static void
1758
1803
mergeStates (TrgmState * state1 , TrgmState * state2 )
1759
1804
{
1760
- ListCell * cell ;
1761
-
1762
1805
Assert (state1 != state2 );
1763
1806
Assert (!state1 -> parent );
1764
1807
Assert (!state2 -> parent );
1765
1808
1766
- /* state1 absorbs state2's init/fin flags */
1767
- state1 -> init |= state2 -> init ;
1768
- state1 -> fin |= state2 -> fin ;
1809
+ /* state1 absorbs state2's flags */
1810
+ state1 -> flags |= state2 -> flags ;
1769
1811
1770
- /* state2, and all its children, become children of state1 */
1771
- foreach (cell , state2 -> children )
1772
- {
1773
- TrgmState * state = (TrgmState * ) lfirst (cell );
1774
-
1775
- state -> parent = state1 ;
1776
- }
1812
+ /* state2, and indirectly all its children, become children of state1 */
1777
1813
state2 -> parent = state1 ;
1778
- state1 -> children = list_concat (state1 -> children , state2 -> children );
1779
- state1 -> children = lappend (state1 -> children , state2 );
1780
- state2 -> children = NIL ;
1781
1814
}
1782
1815
1783
1816
/*
@@ -1846,9 +1879,9 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
1846
1879
1847
1880
if (state -> number < 0 )
1848
1881
{
1849
- if (state -> init )
1882
+ if (state -> flags & TSTATE_INIT )
1850
1883
state -> number = 0 ;
1851
- else if (state -> fin )
1884
+ else if (state -> flags & TSTATE_FIN )
1852
1885
state -> number = 1 ;
1853
1886
else
1854
1887
{
@@ -2112,9 +2145,9 @@ printTrgmNFA(TrgmNFA *trgmNFA)
2112
2145
ListCell * cell ;
2113
2146
2114
2147
appendStringInfo (& buf , "s%p" , (void * ) state );
2115
- if (state -> fin )
2148
+ if (state -> flags & TSTATE_FIN )
2116
2149
appendStringInfoString (& buf , " [shape = doublecircle]" );
2117
- if (state -> init )
2150
+ if (state -> flags & TSTATE_INIT )
2118
2151
initstate = state ;
2119
2152
appendStringInfo (& buf , " [label = \"%d\"]" , state -> stateKey .nstate );
2120
2153
appendStringInfoString (& buf , ";\n" );
0 commit comments