Skip to content

Commit 8e7796a

Browse files
committed
lpeg 1.0.1
1 parent 6fa2414 commit 8e7796a

12 files changed

+252
-117
lines changed

3rd/lpeg/lpcap.h

+16-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $
2+
** $Id: lpcap.h,v 1.3 2016/09/13 17:45:58 roberto Exp $
33
*/
44

55
#if !defined(lpcap_h)
@@ -11,8 +11,21 @@
1111

1212
/* kinds of captures */
1313
typedef enum CapKind {
14-
Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction,
15-
Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup
14+
Cclose, /* not used in trees */
15+
Cposition,
16+
Cconst, /* ktable[key] is Lua constant */
17+
Cbackref, /* ktable[key] is "name" of group to get capture */
18+
Carg, /* 'key' is arg's number */
19+
Csimple, /* next node is pattern */
20+
Ctable, /* next node is pattern */
21+
Cfunction, /* ktable[key] is function; next node is pattern */
22+
Cquery, /* ktable[key] is table; next node is pattern */
23+
Cstring, /* ktable[key] is string; next node is pattern */
24+
Cnum, /* numbered capture; 'key' is number of value to return */
25+
Csubst, /* substitution capture; next node is pattern */
26+
Cfold, /* ktable[key] is function; next node is pattern */
27+
Cruntime, /* not used in trees (is uses another type for tree) */
28+
Cgroup /* ktable[key] is group's "name" */
1629
} CapKind;
1730

1831

3rd/lpeg/lpcode.c

+52-24
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $
2+
** $Id: lpcode.c,v 1.24 2016/09/15 17:46:13 roberto Exp $
33
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
44
*/
55

@@ -125,6 +125,27 @@ int tocharset (TTree *tree, Charset *cs) {
125125
}
126126

127127

128+
/*
129+
** Visit a TCall node taking care to stop recursion. If node not yet
130+
** visited, return 'f(sib2(tree))', otherwise return 'def' (default
131+
** value)
132+
*/
133+
static int callrecursive (TTree *tree, int f (TTree *t), int def) {
134+
int key = tree->key;
135+
assert(tree->tag == TCall);
136+
assert(sib2(tree)->tag == TRule);
137+
if (key == 0) /* node already visited? */
138+
return def; /* return default value */
139+
else { /* first visit */
140+
int result;
141+
tree->key = 0; /* mark call as already visited */
142+
result = f(sib2(tree)); /* go to called rule */
143+
tree->key = key; /* restore tree */
144+
return result;
145+
}
146+
}
147+
148+
128149
/*
129150
** Check whether a pattern tree has captures
130151
*/
@@ -134,14 +155,17 @@ int hascaptures (TTree *tree) {
134155
case TCapture: case TRunTime:
135156
return 1;
136157
case TCall:
137-
tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */
158+
return callrecursive(tree, hascaptures, 0);
159+
case TRule: /* do not follow siblings */
160+
tree = sib1(tree); goto tailcall;
138161
case TOpenCall: assert(0);
139162
default: {
140163
switch (numsiblings[tree->tag]) {
141164
case 1: /* return hascaptures(sib1(tree)); */
142165
tree = sib1(tree); goto tailcall;
143166
case 2:
144-
if (hascaptures(sib1(tree))) return 1;
167+
if (hascaptures(sib1(tree)))
168+
return 1;
145169
/* else return hascaptures(sib2(tree)); */
146170
tree = sib2(tree); goto tailcall;
147171
default: assert(numsiblings[tree->tag] == 0); return 0;
@@ -208,9 +232,9 @@ int checkaux (TTree *tree, int pred) {
208232

209233
/*
210234
** number of characters to match a pattern (or -1 if variable)
211-
** ('count' avoids infinite loops for grammars)
212235
*/
213-
int fixedlenx (TTree *tree, int count, int len) {
236+
int fixedlen (TTree *tree) {
237+
int len = 0; /* to accumulate in tail calls */
214238
tailcall:
215239
switch (tree->tag) {
216240
case TChar: case TSet: case TAny:
@@ -220,26 +244,29 @@ int fixedlenx (TTree *tree, int count, int len) {
220244
case TRep: case TRunTime: case TOpenCall:
221245
return -1;
222246
case TCapture: case TRule: case TGrammar:
223-
/* return fixedlenx(sib1(tree), count); */
247+
/* return fixedlen(sib1(tree)); */
224248
tree = sib1(tree); goto tailcall;
225-
case TCall:
226-
if (count++ >= MAXRULES)
227-
return -1; /* may be a loop */
228-
/* else return fixedlenx(sib2(tree), count); */
229-
tree = sib2(tree); goto tailcall;
249+
case TCall: {
250+
int n1 = callrecursive(tree, fixedlen, -1);
251+
if (n1 < 0)
252+
return -1;
253+
else
254+
return len + n1;
255+
}
230256
case TSeq: {
231-
len = fixedlenx(sib1(tree), count, len);
232-
if (len < 0) return -1;
233-
/* else return fixedlenx(sib2(tree), count, len); */
234-
tree = sib2(tree); goto tailcall;
257+
int n1 = fixedlen(sib1(tree));
258+
if (n1 < 0)
259+
return -1;
260+
/* else return fixedlen(sib2(tree)) + len; */
261+
len += n1; tree = sib2(tree); goto tailcall;
235262
}
236263
case TChoice: {
237-
int n1, n2;
238-
n1 = fixedlenx(sib1(tree), count, len);
239-
if (n1 < 0) return -1;
240-
n2 = fixedlenx(sib2(tree), count, len);
241-
if (n1 == n2) return n1;
242-
else return -1;
264+
int n1 = fixedlen(sib1(tree));
265+
int n2 = fixedlen(sib2(tree));
266+
if (n1 != n2 || n1 < 0)
267+
return -1;
268+
else
269+
return len + n1;
243270
}
244271
default: assert(0); return 0;
245272
};
@@ -710,9 +737,10 @@ static void codeand (CompileState *compst, TTree *tree, int tt) {
710737

711738

712739
/*
713-
** Captures: if pattern has fixed (and not too big) length, use
714-
** a single IFullCapture instruction after the match; otherwise,
715-
** enclose the pattern with OpenCapture - CloseCapture.
740+
** Captures: if pattern has fixed (and not too big) length, and it
741+
** has no nested captures, use a single IFullCapture instruction
742+
** after the match; otherwise, enclose the pattern with OpenCapture -
743+
** CloseCapture.
716744
*/
717745
static void codecapture (CompileState *compst, TTree *tree, int tt,
718746
const Charset *fl) {

3rd/lpeg/lpcode.h

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $
2+
** $Id: lpcode.h,v 1.8 2016/09/15 17:46:13 roberto Exp $
33
*/
44

55
#if !defined(lpcode_h)
@@ -13,7 +13,7 @@
1313

1414
int tocharset (TTree *tree, Charset *cs);
1515
int checkaux (TTree *tree, int pred);
16-
int fixedlenx (TTree *tree, int count, int len);
16+
int fixedlen (TTree *tree);
1717
int hascaptures (TTree *tree);
1818
int lp_gc (lua_State *L);
1919
Instruction *compile (lua_State *L, Pattern *p);
@@ -35,8 +35,6 @@ int sizei (const Instruction *i);
3535
*/
3636
#define nullable(t) checkaux(t, PEnullable)
3737

38-
#define fixedlen(t) fixedlenx(t, 0, 0)
39-
4038

4139

4240
#endif

3rd/lpeg/lpeg.html

+27-16
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
</head>
1111
<body>
1212

13-
<!-- $Id: lpeg.html,v 1.75 2015/09/28 17:17:41 roberto Exp $ -->
13+
<!-- $Id: lpeg.html,v 1.77 2017/01/13 13:40:05 roberto Exp $ -->
1414

1515
<div id="container">
1616

@@ -577,8 +577,9 @@ <h2><a name="grammar">Grammars</a></h2>
577577
<h2><a name="captures">Captures</a></h2>
578578

579579
<p>
580-
A <em>capture</em> is a pattern that creates values
581-
(the so called <em>semantic information</em>) when it matches.
580+
A <em>capture</em> is a pattern that produces values
581+
(the so called <em>semantic information</em>)
582+
according to what it matches.
582583
LPeg offers several kinds of captures,
583584
which produces values based on matches and combine these values to
584585
produce new values.
@@ -632,25 +633,28 @@ <h2><a name="captures">Captures</a></h2>
632633
</tbody></table>
633634

634635
<p>
635-
A capture pattern produces its values every time it succeeds.
636-
For instance,
637-
a capture inside a loop produces as many values as matched by the loop.
638-
A capture produces a value only when it succeeds.
636+
A capture pattern produces its values only when it succeeds.
639637
For instance,
640638
the pattern <code>lpeg.C(lpeg.P"a"^-1)</code>
641639
produces the empty string when there is no <code>"a"</code>
642640
(because the pattern <code>"a"?</code> succeeds),
643641
while the pattern <code>lpeg.C("a")^-1</code>
644642
does not produce any value when there is no <code>"a"</code>
645643
(because the pattern <code>"a"</code> fails).
644+
A pattern inside a loop or inside a recursive structure
645+
produces values for each match.
646646
</p>
647647

648648
<p>
649649
Usually,
650-
LPeg evaluates all captures only after (and if) the entire match succeeds.
651-
During <em>match time</em> it only gathers enough information
652-
to produce the capture values later.
653-
As a particularly important consequence,
650+
LPeg does not specify when (and if) it evaluates its captures.
651+
(As an example,
652+
consider the pattern <code>lpeg.P"a" / func / 0</code>.
653+
Because the "division" by 0 instructs LPeg to throw away the
654+
results from the pattern,
655+
LPeg may or may not call <code>func</code>.)
656+
Therefore, captures should avoid side effects.
657+
Moreover,
654658
most captures cannot affect the way a pattern matches a subject.
655659
The only exception to this rule is the
656660
so-called <a href="#matchtime"><em>match-time capture</em></a>.
@@ -700,6 +704,12 @@ <h3><a name="cap-b"></a><code>lpeg.Cb (name)</code></h3>
700704
another complete capture.
701705
</p>
702706

707+
<p>
708+
In the same way that LPeg does not specify when it evaluates captures,
709+
it does not specify whether it reuses
710+
values previously produced by the group
711+
or re-evaluates them.
712+
</p>
703713

704714
<h3><a name="cap-cc"></a><code>lpeg.Cc ([value, ...])</code></h3>
705715
<p>
@@ -806,7 +816,7 @@ <h3><a name="cap-s"></a><code>lpeg.Cs (patt)</code></h3>
806816
<h3><a name="cap-t"></a><code>lpeg.Ct (patt)</code></h3>
807817
<p>
808818
Creates a <em>table capture</em>.
809-
This capture creates a table and puts all values from all anonymous captures
819+
This capture returns a table with all values from all anonymous captures
810820
made by <code>patt</code> inside this table in successive integer keys,
811821
starting at 1.
812822
Moreover,
@@ -872,7 +882,8 @@ <h3><a name="matchtime"></a><code>lpeg.Cmt(patt, function)</code></h3>
872882
<p>
873883
Creates a <em>match-time capture</em>.
874884
Unlike all other captures,
875-
this one is evaluated immediately when a match occurs.
885+
this one is evaluated immediately when a match occurs
886+
(even if it is part of a larger pattern that fails later).
876887
It forces the immediate evaluation of all its nested captures
877888
and then calls <code>function</code>.
878889
</p>
@@ -1380,13 +1391,13 @@ <h3>Arithmetic expressions</h3>
13801391
<h2><a name="download"></a>Download</h2>
13811392

13821393
<p>LPeg
1383-
<a href="http://www.inf.puc-rio.br/~roberto/lpeg/lpeg-1.0.0.tar.gz">source code</a>.</p>
1394+
<a href="http://www.inf.puc-rio.br/~roberto/lpeg/lpeg-1.0.1.tar.gz">source code</a>.</p>
13841395

13851396

13861397
<h2><a name="license">License</a></h2>
13871398

13881399
<p>
1389-
Copyright &copy; 2007-2015 Lua.org, PUC-Rio.
1400+
Copyright &copy; 2007-2017 Lua.org, PUC-Rio.
13901401
</p>
13911402
<p>
13921403
Permission is hereby granted, free of charge,
@@ -1424,7 +1435,7 @@ <h2><a name="license">License</a></h2>
14241435

14251436
<div id="about">
14261437
<p><small>
1427-
$Id: lpeg.html,v 1.75 2015/09/28 17:17:41 roberto Exp $
1438+
$Id: lpeg.html,v 1.77 2017/01/13 13:40:05 roberto Exp $
14281439
</small></p>
14291440
</div> <!-- id="about" -->
14301441

3rd/lpeg/lpprint.c

+11-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
** $Id: lpprint.c,v 1.9 2015/06/15 16:09:57 roberto Exp $
2+
** $Id: lpprint.c,v 1.10 2016/09/13 16:06:03 roberto Exp $
33
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
44
*/
55

@@ -37,13 +37,13 @@ void printcharset (const byte *st) {
3737
}
3838

3939

40-
static void printcapkind (int kind) {
40+
static const char *capkind (int kind) {
4141
const char *const modes[] = {
4242
"close", "position", "constant", "backref",
4343
"argument", "simple", "table", "function",
4444
"query", "string", "num", "substitution", "fold",
4545
"runtime", "group"};
46-
printf("%s", modes[kind]);
46+
return modes[kind];
4747
}
4848

4949

@@ -73,13 +73,12 @@ void printinst (const Instruction *op, const Instruction *p) {
7373
break;
7474
}
7575
case IFullCapture: {
76-
printcapkind(getkind(p));
77-
printf(" (size = %d) (idx = %d)", getoff(p), p->i.key);
76+
printf("%s (size = %d) (idx = %d)",
77+
capkind(getkind(p)), getoff(p), p->i.key);
7878
break;
7979
}
8080
case IOpenCapture: {
81-
printcapkind(getkind(p));
82-
printf(" (idx = %d)", p->i.key);
81+
printf("%s (idx = %d)", capkind(getkind(p)), p->i.key);
8382
break;
8483
}
8584
case ISet: {
@@ -124,8 +123,8 @@ void printpatt (Instruction *p, int n) {
124123

125124
#if defined(LPEG_DEBUG)
126125
static void printcap (Capture *cap) {
127-
printcapkind(cap->kind);
128-
printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s);
126+
printf("%s (idx: %d - size: %d) -> %p\n",
127+
capkind(cap->kind), cap->idx, cap->siz, cap->s);
129128
}
130129

131130

@@ -177,7 +176,8 @@ void printtree (TTree *tree, int ident) {
177176
break;
178177
}
179178
case TOpenCall: case TCall: {
180-
printf(" key: %d\n", tree->key);
179+
assert(sib2(tree)->tag == TRule);
180+
printf(" key: %d (rule: %d)\n", tree->key, sib2(tree)->cap);
181181
break;
182182
}
183183
case TBehind: {
@@ -186,7 +186,7 @@ void printtree (TTree *tree, int ident) {
186186
break;
187187
}
188188
case TCapture: {
189-
printf(" cap: %d key: %d n: %d\n", tree->cap, tree->key, tree->u.n);
189+
printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key);
190190
printtree(sib1(tree), ident + 2);
191191
break;
192192
}

0 commit comments

Comments
 (0)