PACKET pattern match DEFINES                     (* Author: P.Heyderhoff *) 
                                                 (* Date:   09.06.1986   *) 
    -, 
    OR, 
    **, 
    any, 
    notion, 
    bound, 
    match, 
    matchpos, 
    matchend, 
    somefix, 
    UNLIKE, 
    LIKE : 
 
(*------- Operation codes of the internal intermeadiate language: --------*) 
 
LET 
    z        = ""0"", 
    stopz    = ""1""0"", 
    closez   = ""2""0"", 
    closor   = ""2""0""3""0"", 
    or       = ""3"", 
    oralpha  = ""3""5"", 
    open2    = ""4""0""4""0"", 
    alpha    = ""5"", 
    alphaz   = ""5""0"", 
    lenz     = ""6""0"", 
    nilz     = ""6""0""0""0""7""0"",      (* =    any (0)     *) 
    starz    = ""7""0"", 
    star     = ""8""0""2""7""0""1""0"",   (* =    any ** 1    *) 
    powerz   = ""8""0"", 
    powerz0  = ""8""0""1"", 
    notionz  = ""9""0"", 
    fullz    = ""10""0"", 
    boundz   = ""11""0""; 
(*------------------------------------------------------------------------*) 
 
LET undefined = 0,                                   (* fixleft value     *) 
    forcer    = 0,                                   (* vaHue parameter   *) 
    delimiter = " !""#$%&'()*+,-./:;<=>?§^_`­";      (* for 'PROC notion' *) 
 
TEXT OP - (TEXT CONST alphabet ): 
     p:= ""; 
     INT VAR j; 
     FOR j FROM 0 UPTO 255 
     REP IF   pos(alphabet,code(j)) = 0 
         THEN p CAT code(j) 
         FI 
     PER; 
     p 
  ENDOP -; 
 
TEXT OP OR (TEXT CONST a, b): 
     open2 + notnil (a) + closor + notnil (b) + closez 
  ENDOP OR; 
 
TEXT OP ** (TEXT CONST p, INT CONST x): 
     powerz + code (1+x) + notnil (p) + stopz 
  ENDOP **; 
 
TEXT CONST any:= starz; 
 
TEXT PROC any (INT CONST n): 
     TEXT VAR t:= "  "; 
     replace (t, 1, ABSn); 
     lenz + t + starz 
  ENDPROC any; 
 
TEXT PROC any (TEXT CONST a): alphaz + a + starz ENDPROC any; 
 
TEXT PROC any (INT CONST n, TEXT CONST a): 
     TEXT VAR t:= "  "; 
     replace (t, 1, ABSn); 
     lenz + t + alphaz + a + starz  
  ENDPROC any; 
 
TEXT PROC notion (TEXT CONST t): notionz + notnil(t) + stopz ENDPROC notion; 
 
TEXT PROC notnil (TEXT CONST t): 
     IF   t = "" 
     THEN nilz 
     ELSE t 
     FI 
  ENDPROC notnil; 
 
TEXT CONST bound := boundz; 
 
TEXT PROC full (TEXT CONST t): fullz + t + stopz ENDPROC full; 
 
TEXT PROC match (INT CONST x): 
     subtext (p, matchpos(x), matchend(x)) 
  ENDPROC match; 
 
INT PROC matchpos (INT CONST x): mapos (1 + x MOD 256) ENDPROC matchpos; 
 
INT PROC matchend (INT CONST x): maend (1 + x MOD 256) - 1 
 ENDPROC matchend; 
 
(*----------------- GLOBAL VARIABLES: -----------------------------------*) 
 
ROW  256 INT VAR 
                 (* Table of match registers. Each entry consists of two *) 
                 (* pointers, which points to the TEXT object 't'        *) 
     mapos,      (* points to the beginning of the match                 *) 
     maend;      (* points to the position after the end of match        *) 
 
INT  VAR ppos, tpos,   (* workpositions in pattern 'p' and text 't'      *) 
         floatpos,     (* accumulation of all pending floatlengths       *) 
         failpos,      (* result of 'PROC in alpha'                      *) 
         plen, tlen,   (* length of pattern 'p' and length of text 't'   *) 
         skipcount,    (* for track forward skipping                     *) 
         multi, vari;  (* for handling of nonexclusive alternatives      *) 
 
TEXT VAR p,            (* the pattern to be find  or some result         *) 
         stack,        (* stack of pending assignments                   *) 
         alphabet:=""; (* result of 'PROC find alpha', reset to nil      *) 
                       (* after its usage by 'find any'                  *) 
 
BOOL VAR fix,          (* text position is fixed and not floating        *) 
         no vari;      (* not variing the order of alternatives          *) 
 
TEXT PROC somefix (TEXT CONST pattern): 
  
     (* delivers the first text occuring unconditionally in the pattern *) 
 
     p:= pattern; 
     INT VAR j:= 1, n:= 0, k, len:= LENGTH p; 
     REP 
         SELECT text( subtext (p, j, j+1), 2) ISUB 1 OF 
         CASE 1,3,7,9,10,11: j INCR 2 
         CASE 2:             j INCR 2; n DECR 1 (* condition closed *) 
         CASE 4:             j INCR 2; n INCR 1 (* condition opened *) 
         CASE 5:             j := pos (p, starz, j+2) + 2 
         CASE 6:             j INCR 4 
         CASE 8:             j INCR 3 
         OTHERWISE   k:= pos(p, z, j+1) - 1; 
                     IF k <= 0 THEN k:= 1+len FI; 
                     IF   star found
                     THEN change (p, starpos, starpos, star);
                          len:= LENGTH p;
                          k:= starpos
                     FI; 
                     IF n =  0 CAND ( p SUB k ) <> or CAND k > j
                     THEN LEAVE somefix WITH subtext(p,j,k-1) 
                     ELSE j:=k 
                     FI 
         ENDSELECT 
     UNTIL j > len 
     PER; 
     "" .

  star found:   
     INT VAR starpos:= pos (p, "*", j);
     starpos > 0 CAND starpos <= k .
     
  ENDPROC somefix; 
 
PROC skip (TEXT CONST p, BOOL CONST upto or): 
 
     (* skips 'ppos' upto the end of the opened nest, n = nesting level *) 
 
     INT VAR n:= 0; 
     REP 
         SELECT text (subtext (p, ppos, ppos+1), 2) ISUB 1 OF 
         CASE 1,2:    IF   n = 0 
                      THEN LEAVE skip 
                      FI; 
                      ppos INCR 2; 
                      nDECR1 
         CASE 3:      IF   n = 0 CAND upto or 
                      THEN LEAVE skip 
                      FI; 
                      ppos INCR 2 
         CASE 7:      ppos INCR 2 
         CASE 4,9,10,11: ppos INCR 2; 
                      n INCR 1 
         CASE 5:      ppos:= pos (p, starz, ppos+2) + 2 
         CASE 6:      ppos INCR 4 
         CASE 8:      ppos INCR 3; 
                      n INCR 1 
         OTHERWISE    ppos:= pos(p, z, ppos+1) - 1; 
                      IF   ppos < 0 
                      THEN ppos:= plen; 
                           LEAVE skip 
                      FI 
         ENDSELECT 
     PER 
  ENDPROC skip; 
 
BOOL OP UNLIKE (TEXT CONST t, p): NOT ( t LIKE p ) ENDOP UNLIKE; 
 
BOOL OP LIKE (TEXT CONST t, pattern): 
     init; 
     BOOL CONST found:= find (t,1,1, fixresult, floatresult); 
     save; 
     found. 
 
  init:    no vari:= TRUE; 
           vari:= 0; 
           tlen:= 1 + LENGTH t; 
           p:= full (pattern); 
           IF   pos (p, bound) > 0 
           THEN 
                IF   subtext (p, 14, 15) = bound 
                THEN p:= subtext (p, 1, 8) + powerz0 + subtext (p, 16) 
                FI; 
                plen:= LENGTH p - 7;  
                IF   subtext (p, plen, plen+1) = bound  
                THEN p:= subtext (p, 1, plen - 1) + stopz + stopz 
                FI; 
           FI; 
           plen:= LENGTH p + 1; 
           INT VAR fixresult, floatresult; 
           tpos:= 1; 
           floatpos:= 0; 
           stack:= ""; 
           alphabet:= ""; 
           fix:= TRUE; 
           skipcount:= 0; 
           multi:= 0. 
 
  save:    p:= t 
 
  ENDOP LIKE; 
 
(*-------- Realisation of the pattern matching algorithms 'find' --------*) 
 
BOOL PROC find 
     (TEXT CONST t, INT CONST unit, from, INT VAR fixleft, floatlen): 
 
     initialize; 
     BOOL CONST found:= pattern unit; 
     SELECT next command * unit OF 
            CASE 0,1,2:         found 
            CASE 3:             next; 
                                find alternative 
            OTHERWISE           find concatenation 
     ENDSELECT . 
 
  find alternative: 
     IF   found 
     THEN save left position; 
          backtrack; 
          IF   find pattern CAND better 
          THEN note multiplicity 
          ELSE back to first one 
          FI 
     ELSE backtrack multi 
     FI. 
 
  better:               permutation XOR more left. 
 
  permutation:          vari MOD 2 = 1. 
 
  save left position:   j:= fixleft. 
 
  more left:            j > fixleft. 
 
  backtrack multi:      multi:= 2 * backmulti + 1; 
                        vari:= backvari DIV 2; 
                        find pattern. 
 
  note multiplicity:    multi:= 2 * multi + 1; 
                        vari:= vari DIV 2; 
                        TRUE. 
 
  back to first one:    backtrack; 
                        IF   find first subpattern 
                        THEN skip (p, FALSE); 
                             note multiplicity 
                        ELSE errorstop ("pattern"); 
                             FALSE 
                        FI. 
 
  find concatenation: 
     IF   found 
     THEN IF ppos=plen COR find pattern COR track forward 
          COR ( multi > backmulti CAND vari = 0 CAND find variation ) 
          THEN TRUE 
          ELSE backtrack; FALSE 
          FI 
     ELSE skip (p, TRUE); FALSE 
     FI. 
 
  track forward:                   (* must be performed before variation *) 
     j:=0; 
     last multi:= multi; 
     last vari:= vari; 
     WHILE skipcount = 0 
     REP   IF tlen = tpos 
           THEN LEAVE track forward WITH FALSE 
           FI; 
           backtrack; 
           j INCR 1; 
           skipcount:= j 
     UNTIL find first subpattern CAND find pattern 
     PER; 
     j:= skipcount; 
     skipcount:=0; 
     j=0. 
 
  find variation: 
     multi:= last multi; 
     vari:= last vari; 
     FOR k FROM 1 UPTO (multi+1) DIV (backmulti+1) - 1 
     REP backtrack with variation; 
         IF   find first subpattern CAND find pattern 
         THEN vari:=0; 
              LEAVE find variation WITH TRUE 
         FI 
     PER; 
     FALSE. 
 
  backtrack with variation: 
     backtrack; 
     vari:= k. 
 
  find pattern: 
     find (t, 1, ppos+forcer, fixresult, floatresult) CAND keep result. 
 
  find first subpattern: 
     find (t, 0, from, fixresult, floatresult) CAND keep result . 
 
  initialize: 
     INT  VAR   j, 
                k, 
                fixresult, 
                floatresult, 
                last multi, 
                last vari; 
     BOOL CONST backfix:= fix; 
     TEXT CONST backstack:= stack; 
     floatlen:= 0; 
     INT  CONST back:= tpos, 
                backfloat:= floatpos, 
                backskip:= skipcount, 
                backmulti:= multi, 
                backvari:= vari; 
     fixleft:= fixleft0. 
 
  fixleft0: IF fix THEN back ELSE undefined FI. 
 
  backtrack: 
     fix:= backfix; 
     tpos:= back; 
     fixleft:= fixleft0; 
     floatlen:= 0; 
     floatpos:= backfloat; 
     stack:= backstack; 
     skipcount:= backskip; 
     multi:= backmulti; 
     vari:= backvari. 
 
  keep result: 
     IF   fixleft = undefined 
     THEN IF   fixresult = undefined 
          THEN floatlen INCR floatresult 
          ELSE fixleft  := fixresult - floatlen; 
               floatpos DECR floatlen; 
               floatlen:= 0 
          FI 
     FI; 
     TRUE. 
 
  pattern unit: 
     init ppos; 
     SELECT command OF 
            CASE 1,2:   find end 
            CASE 3:     find nil 
            CASE 4:     find choice 
            CASE 5:     find alphabet 
            CASE 6:     find fixlength any 
            CASE 7:     find varlength any 
            CASE 8:     find and store match 
            CASE 9:     find notion 
            CASE 10:    find full 
            CASE 11:    next; find nil 
            OTHERWISE   find plain text                     END SELECT. 
 
  init ppos:    ppos:= from + 2. 
 
  command:      text (subtext (p, from, from+1), 2) ISUB 1. 
 
  next command: text (subtext (p, ppos, ppos+1), 2) ISUB 1. 
 
  next:         ppos INCR 2. 
 
  find end:     ppos DECR 2; 
                fixleft:= tpos; 
                LEAVE find WITH TRUE; 
                TRUE. 
 
  find nil:     ppos DECR 2; 
                fixleft:= tpos; 
                TRUE. 
 
  find choice:  IF   find pattern 
                THEN next; TRUE 
                ELSE next; FALSE 
                FI. 
 
  find plain text: find text upto next command; 
                   IF   fix        THEN allow fix position only 
                   ELIF text found THEN allow variable position 
                                   ELSE allow backtrack 
                   FI. 
 
  find text upto next command: 
     ppos:= pos (p, z, from + 1); 
     IF   ppos = 0 
     THEN ppos:= plen 
     ELSE ppos DECR 1 
     FI; 
     IF   star found
     THEN change (p, starpos, starpos, star);
          plen:= 1 + LENGTH p;
          ppos:= starpos
     FI;     
     tpos:= pos (t, subtext (p, from, ppos - 1), tpos). 
 
  star found:
     INT VAR starpos:= pos (p, "*", from);
     starpos > 0 CAND starpos <= ppos .

  text found: 
     WHILE skipcount > 0 CAND tpos > 0 
     REP   skipcount DECR 1; 
           tpos:= pos (t, subtext(p,from,ppos-1), tpos+1) 
     PER; 
     tpos > 0 .
 
  allow fix position only: 
     IF   tpos = back 
     THEN tpos INCR (ppos-from); TRUE 
     ELSE tpos:= back; 
          from = ppos 
     FI. 
 
  allow variable position: 
     IF   alphabet = "" COR in alpha (t, back, tpos)  
     THEN fix it; 
          tpos INCR (ppos-from); 
          TRUE 
     ELSE tpos:= back; 
          FALSE 
     FI. 
 
  allow backtrack: 
     tpos:= back; 
     IF   from = ppos 
     THEN fix it; 
          TRUE 
     ELSE FALSE 
     FI . 
 
  find alphabet: 
     j:= pos (p, starz, ppos); 
     alphabet:= subtext (p, ppos, j-1); 
     ppos := j; 
     TRUE. 
 
  find fixlength any: 
     get length value; 
     find alpha attribut; 
     IF   alphabet = "" 
     THEN find any with fix length 
     ELSE find any in alphabet with fix length 
     FI. 
 
  get length value: 
     floatlen:= subtext(p, ppos, ppos+1) ISUB 1;
     ppos INCR 4. 
 
  find alpha attribut: 
     IF (p SUB (ppos-2)) = alpha CAND find alphabet 
     THEN next 
     FI. 
 
  find any with fix length: 
     tpos INCR floatlen; 
     IF   tpos > tlen 
     THEN tpos:= back; 
          floatlen:=0; 
          FALSE 
     ELSE IF   fix THEN floatlen:= 0 
          ELIF floatlen = 0 
          THEN fix it                         (* unlike niltext 6.6. *) 
          ELSE floatpos INCR floatlen 
          FI; 
          TRUE 
     FI. 
 
  find any in alphabet with fix length: 
     IF   first character in alpha 
     THEN IF NOT fix THEN fix it FI; 
          set fix found 
     ELSE set fix not found 
     FI. 
 
  first character in alpha: 
    (fix COR advance) CAND in alpha (t, tpos, tpos+floatlen). 
 
  advance: 
     FOR tpos FROM back UPTO tlen 
     REP IF   pos (alphabet, t SUB tpos) > 0 
         THEN LEAVE advance WITH TRUE 
         FI 
     PER; 
     FALSE. 
 
  fix it: 
     fixleft:= back-floatpos; 
     make fix (back); 
     fixleft:= tpos. 
 
  set fix found: 
     tpos INCR floatlen; 
     floatlen:= 0; 
     alphabet:= ""; 
     TRUE. 
 
  set fix not found:    tpos:= back; 
                        alphabet:= ""; 
                        floatlen:= 0; 
                        FALSE. 
 
  find varlength any:   IF   alphabet = "" 
                        THEN really any 
                        ELSE find varlength any in alphabet 
                        FI. 
 
  really any:           IF   fix 
                        THEN fix:= FALSE; 
                             fixleft:= tpos 
                        ELIF floatpos = 0 
                        THEN fixleft:= tpos                  (* 6.6. *) 
                        FI; 
                        TRUE . 
 
  find varlength any in alphabet: 
                        IF fix THEN fixleft := tpos FI; 
                        IF   fix CAND pos (alphabet, t SUB tpos) > 0 
                        COR  NOT fix CAND advance 
                        THEN IF NOT fix THEN fix it FI; 
                             set var found 
                        ELSE set var not found 
                        FI. 
 
  set var found:        tpos:= end of varlength any; 
                        alphabet:= ""; 
                        TRUE. 
  set var not found:    tpos:= back; 
                        alphabet:= ""; 
                        FALSE. 
  end of varlength any: IF   NOT in alpha(t,tpos,tlen) 
                        THEN failpos 
                        ELSE tlen 
                        FI. 
 
  find and store match: get register name; 
                        IF  find pattern 
                        THEN next; 
                             store; 
                             TRUE 
                        ELSE next; 
                             FALSE 
                        FI. 
 
  store:                IF   fix 
                        THEN mapos (reg):= fixleft; 
                             maend (reg):= tpos 
                        ELSE stack CAT code(floatlen) + 
                                       code(floatpos) + code(fixleft) + c 
                        FI. 
 
  get register name:    TEXT CONST c:= p SUB (ppos); 
                        INT  VAR reg:= code (c); 
                        ppos INCR 1. 
 
  find notion:    float notion; 
                  exhaust notion . 
 
  float notion:   j:= back; 
                  REP IF   find pattern 
                      THEN IF   is notion (t, fixleft) 
                           THEN LEAVE find notion WITH TRUE 
                           ELIF backfix 
                           THEN LEAVE float notion 
                           ELSE go ahead FI 
                      ELIF j=back 
                      THEN next; 
                           LEAVE find notion WITH FALSE 
                      ELSE LEAVE float notion 
                      FI 
                  PER. 
 
  go ahead:       j INCR 1; 
                  IF simple THEN j:= max (tpos, j) FI; 
                  notion backtrack. 
 
  simple:         k:= from; 
                  REP k := pos (p, z, k+2); 
                      IF   k > ppos-3 
                      THEN LEAVE simple WITH TRUE 
                      ELIF pos (oralpha, p SUB k-1) > 0 
                      THEN LEAVE simple WITH FALSE 
                      FI 
                  PER; 
                  FALSE. 
 
  notion backtrack: tpos:= j; 
                  fix:= backfix; 
                  fixleft:= fixleft0; 
                  floatlen:= 0; 
                  floatpos:= backfloat + tpos - back; 
                  stack:= backstack; 
                  ppos:= from + 2 . 
 
  exhaust notion: IF   notion expansion 
                  COR  multi > backmulti 
                  CAND no vari 
                  CAND notion variation 
                  THEN TRUE 
                  ELSE backtrack; FALSE 
                  FI. 
 
  notion expansion: j:= 0; 
                  multi:= last multi; 
                  vari:= last vari; 
                  WHILE skipcount = 0 
                  REP skip and try PER; 
                  j:= skipcount; 
                  skipcount:= 0; 
                  j = 0. 
 
  skip and try:   backtrack; 
                  j INCR 1; 
                  skipcount:=j; 
                  ppos:= from + 2; 
                  IF   find pattern 
                  THEN IF    is notion (t, fixleft) 
                       THEN  LEAVE find notion WITH TRUE 
                       FI 
                  ELSE next; LEAVE find notion WITH FALSE 
                  FI . 
 
  notion variation: no vari:= FALSE; 
                  last multi:= multi; 
                  last vari:= vari; 
                  FOR k FROM 1 UPTO (multi+1) DIV (backmulti+1) - 1 
                  REP backtrack with variation; 
                      IF   find first subpattern 
                      THEN no vari:= TRUE; 
                           LEAVE find notion WITH TRUE 
                      FI 
                  PER; 
                  no vari:= TRUE; 
                  FALSE. 
 
  find full: 
    find pattern CAND (end of line COR exhaust line). 
 
  end of line: 
    next; 
    IF   fix 
    THEN tpos = tlen 
    ELSE tpos:= tlen; 
         make fix (1); 
         TRUE 
    FI. 
 
  exhaust line: 
    IF   full expansion COR multi > 0 CAND no vari CAND full variation 
    THEN TRUE ELSE backtrack; 
         FALSE 
    FI. 
 
  full expansion: 
     j:=0; 
     last multi:= multi; 
     last vari:= vari; 
     WHILE skipcount = 0 
     REP   IF tlen = tpos 
           THEN LEAVE full expansion WITH FALSE 
           FI; 
           backtrack; 
           j INCR 1; 
           skipcount:= j; 
           ppos:=from + 2 
     UNTIL find pattern CAND tpos=tlen 
     PER; 
     j:= skipcount; 
     skipcount:=0; 
     j=0. 
 
  full variation: 
    no vari:= FALSE; 
    multi:= last multi; 
    vari:= last vari; 
    FOR k FROM 1 UPTO multi 
    REP backtrack with variation; 
        IF   find first subpattern 
        THEN no vari:= TRUE; 
             LEAVE find WITH TRUE 
        FI 
    PER; 
    no vari:= TRUE; 
    FALSE. 
 
 ENDPROC find; 
 
BOOL PROC is notion (TEXT CONST t, INT CONST fixleft): 
     ppos INCR 2; 
     (    NOT  fix 
          COR  tpos = tlen 
          COR  pos (delimiter, t SUB tpos)   > 0 
          COR  pos (delimiter, t SUB tpos-1) > 0 
          COR  (t SUB tpos)  <= "Z" 
          CAND (t SUB tpos-1) > "Z"   ) 
     CAND (    fixleft <= 1 
          COR  pos (delimiter, t SUB fixleft-1) > 0 
          COR  pos (delimiter, t SUB fixleft)   > 0 
          COR  (t SUB fixleft)    > "Z" 
          CAND (t SUB fixleft-1) <= "Z"   ) 
 
 END PROC is notion; 
 
PROC make fix (INT CONST back): 
     WHILE stack not empty 
     REP  INT VAR reg:= code (stack SUB top), 
                  pos:= code (stack SUB top-1), 
                  len:= code (stack SUB top-3), 
                  dis:= code (stack SUB top-2) - floatpos; 
          maend(reg):= min (tpos + dis, tlen);              (* 6.6. *) 
          mapos(reg):= pos or fix or float; 
          stack:= subtext (stack,1,top-4) 
     PER; 
     fix:= TRUE; 
     floatpos:= 0 . 
 
     stack not empty: INT VAR top:= LENGTH stack; 
                      top > 0. 
 
     pos or fix or float: 
          IF   pos = undefined 
          THEN IF len = 0 
               THEN min (back + dis, tlen) 
               ELSE maend(reg) - len 
               FI 
          ELSE pos 
          FI. 
 
 ENDPROC make fix; 
 
BOOL PROC in alpha (TEXT CONST t, INT CONST from, to): 
     FOR failpos FROM from UPTO to - 1 
     REP IF   pos (alphabet, t SUB failpos) = 0 
         THEN LEAVE in alpha WITH FALSE 
         FI 
     PER; 
     TRUE 
  ENDPROC in alpha; 
 
TEXT PROC notion (TEXT CONST t, INT CONST r): notion (t) ** r ENDPROC notion; 
 
ENDPACKET pattern match;