PACKET pattern match DEFINES (* Author: P.Heyderhoff *)
(* Date: 09.06.1986 *)
-,
OR,
**,
any,
notion,
bound,
match,
matchpos,
matchend,
somefix,
UNLIKE,
LIKE :
(*------- Operation codes of the internal intermeadiate language: --------*)
LET
z = ""0"",
stopz = ""1""0"",
closez = ""2""0"",
closor = ""2""0""3""0"",
or = ""3"",
oralpha = ""3""5"",
open2 = ""4""0""4""0"",
alpha = ""5"",
alphaz = ""5""0"",
lenz = ""6""0"",
nilz = ""6""0""0""0""7""0"", (* = any (0) *)
starz = ""7""0"",
star = ""8""0""2""7""0""1""0"", (* = any ** 1 *)
powerz = ""8""0"",
powerz0 = ""8""0""1"",
notionz = ""9""0"",
fullz = ""10""0"",
boundz = ""11""0"";
(*------------------------------------------------------------------------*)
LET undefined = 0, (* fixleft value *)
forcer = 0, (* vaHue parameter *)
delimiter = " !""#$%&'()*+,-./:;<=>?§^_`"; (* for 'PROC notion' *)
TEXT OP - (TEXT CONST alphabet ):
p:= "";
INT VAR j;
FOR j FROM 0 UPTO 255
REP IF pos(alphabet,code(j)) = 0
THEN p CAT code(j)
FI
PER;
p
ENDOP -;
TEXT OP OR (TEXT CONST a, b):
open2 + notnil (a) + closor + notnil (b) + closez
ENDOP OR;
TEXT OP ** (TEXT CONST p, INT CONST x):
powerz + code (1+x) + notnil (p) + stopz
ENDOP **;
TEXT CONST any:= starz;
TEXT PROC any (INT CONST n):
TEXT VAR t:= " ";
replace (t, 1, ABSn);
lenz + t + starz
ENDPROC any;
TEXT PROC any (TEXT CONST a): alphaz + a + starz ENDPROC any;
TEXT PROC any (INT CONST n, TEXT CONST a):
TEXT VAR t:= " ";
replace (t, 1, ABSn);
lenz + t + alphaz + a + starz
ENDPROC any;
TEXT PROC notion (TEXT CONST t): notionz + notnil(t) + stopz ENDPROC notion;
TEXT PROC notnil (TEXT CONST t):
IF t = ""
THEN nilz
ELSE t
FI
ENDPROC notnil;
TEXT CONST bound := boundz;
TEXT PROC full (TEXT CONST t): fullz + t + stopz ENDPROC full;
TEXT PROC match (INT CONST x):
subtext (p, matchpos(x), matchend(x))
ENDPROC match;
INT PROC matchpos (INT CONST x): mapos (1 + x MOD 256) ENDPROC matchpos;
INT PROC matchend (INT CONST x): maend (1 + x MOD 256) - 1
ENDPROC matchend;
(*----------------- GLOBAL VARIABLES: -----------------------------------*)
ROW 256 INT VAR
(* Table of match registers. Each entry consists of two *)
(* pointers, which points to the TEXT object 't' *)
mapos, (* points to the beginning of the match *)
maend; (* points to the position after the end of match *)
INT VAR ppos, tpos, (* workpositions in pattern 'p' and text 't' *)
floatpos, (* accumulation of all pending floatlengths *)
failpos, (* result of 'PROC in alpha' *)
plen, tlen, (* length of pattern 'p' and length of text 't' *)
skipcount, (* for track forward skipping *)
multi, vari; (* for handling of nonexclusive alternatives *)
TEXT VAR p, (* the pattern to be find or some result *)
stack, (* stack of pending assignments *)
alphabet:=""; (* result of 'PROC find alpha', reset to nil *)
(* after its usage by 'find any' *)
BOOL VAR fix, (* text position is fixed and not floating *)
no vari; (* not variing the order of alternatives *)
TEXT PROC somefix (TEXT CONST pattern):
(* delivers the first text occuring unconditionally in the pattern *)
p:= pattern;
INT VAR j:= 1, n:= 0, k, len:= LENGTH p;
REP
SELECT text( subtext (p, j, j+1), 2) ISUB 1 OF
CASE 1,3,7,9,10,11: j INCR 2
CASE 2: j INCR 2; n DECR 1 (* condition closed *)
CASE 4: j INCR 2; n INCR 1 (* condition opened *)
CASE 5: j := pos (p, starz, j+2) + 2
CASE 6: j INCR 4
CASE 8: j INCR 3
OTHERWISE k:= pos(p, z, j+1) - 1;
IF k <= 0 THEN k:= 1+len FI;
IF star found
THEN change (p, starpos, starpos, star);
len:= LENGTH p;
k:= starpos
FI;
IF n = 0 CAND ( p SUB k ) <> or CAND k > j
THEN LEAVE somefix WITH subtext(p,j,k-1)
ELSE j:=k
FI
ENDSELECT
UNTIL j > len
PER;
"" .
star found:
INT VAR starpos:= pos (p, "*", j);
starpos > 0 CAND starpos <= k .
ENDPROC somefix;
PROC skip (TEXT CONST p, BOOL CONST upto or):
(* skips 'ppos' upto the end of the opened nest, n = nesting level *)
INT VAR n:= 0;
REP
SELECT text (subtext (p, ppos, ppos+1), 2) ISUB 1 OF
CASE 1,2: IF n = 0
THEN LEAVE skip
FI;
ppos INCR 2;
nDECR1
CASE 3: IF n = 0 CAND upto or
THEN LEAVE skip
FI;
ppos INCR 2
CASE 7: ppos INCR 2
CASE 4,9,10,11: ppos INCR 2;
n INCR 1
CASE 5: ppos:= pos (p, starz, ppos+2) + 2
CASE 6: ppos INCR 4
CASE 8: ppos INCR 3;
n INCR 1
OTHERWISE ppos:= pos(p, z, ppos+1) - 1;
IF ppos < 0
THEN ppos:= plen;
LEAVE skip
FI
ENDSELECT
PER
ENDPROC skip;
BOOL OP UNLIKE (TEXT CONST t, p): NOT ( t LIKE p ) ENDOP UNLIKE;
BOOL OP LIKE (TEXT CONST t, pattern):
init;
BOOL CONST found:= find (t,1,1, fixresult, floatresult);
save;
found.
init: no vari:= TRUE;
vari:= 0;
tlen:= 1 + LENGTH t;
p:= full (pattern);
IF pos (p, bound) > 0
THEN
IF subtext (p, 14, 15) = bound
THEN p:= subtext (p, 1, 8) + powerz0 + subtext (p, 16)
FI;
plen:= LENGTH p - 7;
IF subtext (p, plen, plen+1) = bound
THEN p:= subtext (p, 1, plen - 1) + stopz + stopz
FI;
FI;
plen:= LENGTH p + 1;
INT VAR fixresult, floatresult;
tpos:= 1;
floatpos:= 0;
stack:= "";
alphabet:= "";
fix:= TRUE;
skipcount:= 0;
multi:= 0.
save: p:= t
ENDOP LIKE;
(*-------- Realisation of the pattern matching algorithms 'find' --------*)
BOOL PROC find
(TEXT CONST t, INT CONST unit, from, INT VAR fixleft, floatlen):
initialize;
BOOL CONST found:= pattern unit;
SELECT next command * unit OF
CASE 0,1,2: found
CASE 3: next;
find alternative
OTHERWISE find concatenation
ENDSELECT .
find alternative:
IF found
THEN save left position;
backtrack;
IF find pattern CAND better
THEN note multiplicity
ELSE back to first one
FI
ELSE backtrack multi
FI.
better: permutation XOR more left.
permutation: vari MOD 2 = 1.
save left position: j:= fixleft.
more left: j > fixleft.
backtrack multi: multi:= 2 * backmulti + 1;
vari:= backvari DIV 2;
find pattern.
note multiplicity: multi:= 2 * multi + 1;
vari:= vari DIV 2;
TRUE.
back to first one: backtrack;
IF find first subpattern
THEN skip (p, FALSE);
note multiplicity
ELSE errorstop ("pattern");
FALSE
FI.
find concatenation:
IF found
THEN IF ppos=plen COR find pattern COR track forward
COR ( multi > backmulti CAND vari = 0 CAND find variation )
THEN TRUE
ELSE backtrack; FALSE
FI
ELSE skip (p, TRUE); FALSE
FI.
track forward: (* must be performed before variation *)
j:=0;
last multi:= multi;
last vari:= vari;
WHILE skipcount = 0
REP IF tlen = tpos
THEN LEAVE track forward WITH FALSE
FI;
backtrack;
j INCR 1;
skipcount:= j
UNTIL find first subpattern CAND find pattern
PER;
j:= skipcount;
skipcount:=0;
j=0.
find variation:
multi:= last multi;
vari:= last vari;
FOR k FROM 1 UPTO (multi+1) DIV (backmulti+1) - 1
REP backtrack with variation;
IF find first subpattern CAND find pattern
THEN vari:=0;
LEAVE find variation WITH TRUE
FI
PER;
FALSE.
backtrack with variation:
backtrack;
vari:= k.
find pattern:
find (t, 1, ppos+forcer, fixresult, floatresult) CAND keep result.
find first subpattern:
find (t, 0, from, fixresult, floatresult) CAND keep result .
initialize:
INT VAR j,
k,
fixresult,
floatresult,
last multi,
last vari;
BOOL CONST backfix:= fix;
TEXT CONST backstack:= stack;
floatlen:= 0;
INT CONST back:= tpos,
backfloat:= floatpos,
backskip:= skipcount,
backmulti:= multi,
backvari:= vari;
fixleft:= fixleft0.
fixleft0: IF fix THEN back ELSE undefined FI.
backtrack:
fix:= backfix;
tpos:= back;
fixleft:= fixleft0;
floatlen:= 0;
floatpos:= backfloat;
stack:= backstack;
skipcount:= backskip;
multi:= backmulti;
vari:= backvari.
keep result:
IF fixleft = undefined
THEN IF fixresult = undefined
THEN floatlen INCR floatresult
ELSE fixleft := fixresult - floatlen;
floatpos DECR floatlen;
floatlen:= 0
FI
FI;
TRUE.
pattern unit:
init ppos;
SELECT command OF
CASE 1,2: find end
CASE 3: find nil
CASE 4: find choice
CASE 5: find alphabet
CASE 6: find fixlength any
CASE 7: find varlength any
CASE 8: find and store match
CASE 9: find notion
CASE 10: find full
CASE 11: next; find nil
OTHERWISE find plain text END SELECT.
init ppos: ppos:= from + 2.
command: text (subtext (p, from, from+1), 2) ISUB 1.
next command: text (subtext (p, ppos, ppos+1), 2) ISUB 1.
next: ppos INCR 2.
find end: ppos DECR 2;
fixleft:= tpos;
LEAVE find WITH TRUE;
TRUE.
find nil: ppos DECR 2;
fixleft:= tpos;
TRUE.
find choice: IF find pattern
THEN next; TRUE
ELSE next; FALSE
FI.
find plain text: find text upto next command;
IF fix THEN allow fix position only
ELIF text found THEN allow variable position
ELSE allow backtrack
FI.
find text upto next command:
ppos:= pos (p, z, from + 1);
IF ppos = 0
THEN ppos:= plen
ELSE ppos DECR 1
FI;
IF star found
THEN change (p, starpos, starpos, star);
plen:= 1 + LENGTH p;
ppos:= starpos
FI;
tpos:= pos (t, subtext (p, from, ppos - 1), tpos).
star found:
INT VAR starpos:= pos (p, "*", from);
starpos > 0 CAND starpos <= ppos .
text found:
WHILE skipcount > 0 CAND tpos > 0
REP skipcount DECR 1;
tpos:= pos (t, subtext(p,from,ppos-1), tpos+1)
PER;
tpos > 0 .
allow fix position only:
IF tpos = back
THEN tpos INCR (ppos-from); TRUE
ELSE tpos:= back;
from = ppos
FI.
allow variable position:
IF alphabet = "" COR in alpha (t, back, tpos)
THEN fix it;
tpos INCR (ppos-from);
TRUE
ELSE tpos:= back;
FALSE
FI.
allow backtrack:
tpos:= back;
IF from = ppos
THEN fix it;
TRUE
ELSE FALSE
FI .
find alphabet:
j:= pos (p, starz, ppos);
alphabet:= subtext (p, ppos, j-1);
ppos := j;
TRUE.
find fixlength any:
get length value;
find alpha attribut;
IF alphabet = ""
THEN find any with fix length
ELSE find any in alphabet with fix length
FI.
get length value:
floatlen:= subtext(p, ppos, ppos+1) ISUB 1;
ppos INCR 4.
find alpha attribut:
IF (p SUB (ppos-2)) = alpha CAND find alphabet
THEN next
FI.
find any with fix length:
tpos INCR floatlen;
IF tpos > tlen
THEN tpos:= back;
floatlen:=0;
FALSE
ELSE IF fix THEN floatlen:= 0
ELIF floatlen = 0
THEN fix it (* unlike niltext 6.6. *)
ELSE floatpos INCR floatlen
FI;
TRUE
FI.
find any in alphabet with fix length:
IF first character in alpha
THEN IF NOT fix THEN fix it FI;
set fix found
ELSE set fix not found
FI.
first character in alpha:
(fix COR advance) CAND in alpha (t, tpos, tpos+floatlen).
advance:
FOR tpos FROM back UPTO tlen
REP IF pos (alphabet, t SUB tpos) > 0
THEN LEAVE advance WITH TRUE
FI
PER;
FALSE.
fix it:
fixleft:= back-floatpos;
make fix (back);
fixleft:= tpos.
set fix found:
tpos INCR floatlen;
floatlen:= 0;
alphabet:= "";
TRUE.
set fix not found: tpos:= back;
alphabet:= "";
floatlen:= 0;
FALSE.
find varlength any: IF alphabet = ""
THEN really any
ELSE find varlength any in alphabet
FI.
really any: IF fix
THEN fix:= FALSE;
fixleft:= tpos
ELIF floatpos = 0
THEN fixleft:= tpos (* 6.6. *)
FI;
TRUE .
find varlength any in alphabet:
IF fix THEN fixleft := tpos FI;
IF fix CAND pos (alphabet, t SUB tpos) > 0
COR NOT fix CAND advance
THEN IF NOT fix THEN fix it FI;
set var found
ELSE set var not found
FI.
set var found: tpos:= end of varlength any;
alphabet:= "";
TRUE.
set var not found: tpos:= back;
alphabet:= "";
FALSE.
end of varlength any: IF NOT in alpha(t,tpos,tlen)
THEN failpos
ELSE tlen
FI.
find and store match: get register name;
IF find pattern
THEN next;
store;
TRUE
ELSE next;
FALSE
FI.
store: IF fix
THEN mapos (reg):= fixleft;
maend (reg):= tpos
ELSE stack CAT code(floatlen) +
code(floatpos) + code(fixleft) + c
FI.
get register name: TEXT CONST c:= p SUB (ppos);
INT VAR reg:= code (c);
ppos INCR 1.
find notion: float notion;
exhaust notion .
float notion: j:= back;
REP IF find pattern
THEN IF is notion (t, fixleft)
THEN LEAVE find notion WITH TRUE
ELIF backfix
THEN LEAVE float notion
ELSE go ahead FI
ELIF j=back
THEN next;
LEAVE find notion WITH FALSE
ELSE LEAVE float notion
FI
PER.
go ahead: j INCR 1;
IF simple THEN j:= max (tpos, j) FI;
notion backtrack.
simple: k:= from;
REP k := pos (p, z, k+2);
IF k > ppos-3
THEN LEAVE simple WITH TRUE
ELIF pos (oralpha, p SUB k-1) > 0
THEN LEAVE simple WITH FALSE
FI
PER;
FALSE.
notion backtrack: tpos:= j;
fix:= backfix;
fixleft:= fixleft0;
floatlen:= 0;
floatpos:= backfloat + tpos - back;
stack:= backstack;
ppos:= from + 2 .
exhaust notion: IF notion expansion
COR multi > backmulti
CAND no vari
CAND notion variation
THEN TRUE
ELSE backtrack; FALSE
FI.
notion expansion: j:= 0;
multi:= last multi;
vari:= last vari;
WHILE skipcount = 0
REP skip and try PER;
j:= skipcount;
skipcount:= 0;
j = 0.
skip and try: backtrack;
j INCR 1;
skipcount:=j;
ppos:= from + 2;
IF find pattern
THEN IF is notion (t, fixleft)
THEN LEAVE find notion WITH TRUE
FI
ELSE next; LEAVE find notion WITH FALSE
FI .
notion variation: no vari:= FALSE;
last multi:= multi;
last vari:= vari;
FOR k FROM 1 UPTO (multi+1) DIV (backmulti+1) - 1
REP backtrack with variation;
IF find first subpattern
THEN no vari:= TRUE;
LEAVE find notion WITH TRUE
FI
PER;
no vari:= TRUE;
FALSE.
find full:
find pattern CAND (end of line COR exhaust line).
end of line:
next;
IF fix
THEN tpos = tlen
ELSE tpos:= tlen;
make fix (1);
TRUE
FI.
exhaust line:
IF full expansion COR multi > 0 CAND no vari CAND full variation
THEN TRUE ELSE backtrack;
FALSE
FI.
full expansion:
j:=0;
last multi:= multi;
last vari:= vari;
WHILE skipcount = 0
REP IF tlen = tpos
THEN LEAVE full expansion WITH FALSE
FI;
backtrack;
j INCR 1;
skipcount:= j;
ppos:=from + 2
UNTIL find pattern CAND tpos=tlen
PER;
j:= skipcount;
skipcount:=0;
j=0.
full variation:
no vari:= FALSE;
multi:= last multi;
vari:= last vari;
FOR k FROM 1 UPTO multi
REP backtrack with variation;
IF find first subpattern
THEN no vari:= TRUE;
LEAVE find WITH TRUE
FI
PER;
no vari:= TRUE;
FALSE.
ENDPROC find;
BOOL PROC is notion (TEXT CONST t, INT CONST fixleft):
ppos INCR 2;
( NOT fix
COR tpos = tlen
COR pos (delimiter, t SUB tpos) > 0
COR pos (delimiter, t SUB tpos-1) > 0
COR (t SUB tpos) <= "Z"
CAND (t SUB tpos-1) > "Z" )
CAND ( fixleft <= 1
COR pos (delimiter, t SUB fixleft-1) > 0
COR pos (delimiter, t SUB fixleft) > 0
COR (t SUB fixleft) > "Z"
CAND (t SUB fixleft-1) <= "Z" )
END PROC is notion;
PROC make fix (INT CONST back):
WHILE stack not empty
REP INT VAR reg:= code (stack SUB top),
pos:= code (stack SUB top-1),
len:= code (stack SUB top-3),
dis:= code (stack SUB top-2) - floatpos;
maend(reg):= min (tpos + dis, tlen); (* 6.6. *)
mapos(reg):= pos or fix or float;
stack:= subtext (stack,1,top-4)
PER;
fix:= TRUE;
floatpos:= 0 .
stack not empty: INT VAR top:= LENGTH stack;
top > 0.
pos or fix or float:
IF pos = undefined
THEN IF len = 0
THEN min (back + dis, tlen)
ELSE maend(reg) - len
FI
ELSE pos
FI.
ENDPROC make fix;
BOOL PROC in alpha (TEXT CONST t, INT CONST from, to):
FOR failpos FROM from UPTO to - 1
REP IF pos (alphabet, t SUB failpos) = 0
THEN LEAVE in alpha WITH FALSE
FI
PER;
TRUE
ENDPROC in alpha;
TEXT PROC notion (TEXT CONST t, INT CONST r): notion (t) ** r ENDPROC notion;
ENDPACKET pattern match;