;english.rul
;Rules file for Englex
;last modified 28-Nov-95

;Englex 2.0b5
;Copyright (C) 1991-1995, Summer Institute of Linguistics, Inc.

;Evan Antworth                    |  e-mail: evan.antworth@sil.org
;Academic Computing Department    |  phone:  214-709-3346, -2418
;Summer Institute of LInguistics  |  fax:    214-709-3363
;7500 W. Camp Wisdom Road
;Dallas, TX 75236

;This description of English is based on the article "A two-level
;morphological analysis of English," by Lauri Karttunen and
;K. Wittenburg, Texas Linguistic Forum 22:217-228 (1983).
;See appendix A of the PC-KIMMO book for an exposition of the rules 
;in this file (though this version differs).
;Added upper-case letters 5-Apr-95

;CONTENTS
; Defaults
; Epenthesis
; y:i-spelling
; Elision
; Gemination
; END
; s-deletion
; i:y-spelling

;Symbol key:
; ' = apostrophe
; - = hyphen
; ` = stress
; + = morpheme break
; . = period (used in abbreviations such as U.S.A.)

ALPHABET
 ;lexical (upper) and surface (lower) characters:
     b c d f g h j k l m n p q r s t v w x y z a e i o u ' - .
     sh ch  ;digraphs
     B C D F G H J K L M N P Q R S T V W X Y Z A E I O U
 ;lexical (upper) only characters:
     ` +
NULL     0
ANY      @
BOUNDARY #
SUBSET CN     b c d f g h j k l m n p q r s t v w x y z sh ch
SUBSET CNsib  s x z sh ch              ;sibilant consonants
SUBSET CNpal  c g                      ;palatal consonants
SUBSET CNgem  b d f g l m n p r s t    ;geminated consonants
SUBSET VO     a e i o u
SUBSET VObk   a o u                    ;back vowels

RULE
"Defaults 1" 1 33
    b c d f g h j k l m n p q r s t v w x y z sh ch a e i o u ' - ` + @
    b c d f g h j k l m n p q r s t v w x y z sh ch a e i o u ' - 0 0 @
 1: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1  1  1 1 1 1 1 1 1 1 1 1

;uppercase letters and period
RULE
"Defaults 2" 1 28
    B C D F G H J K L M N P Q R S T V W X Y Z A E I O U . @
    B C D F G H J K L M N P Q R S T V W X Y Z A E I O U . @
 1: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

;This +:- rule is useful for Recognition, as it permits recognition of affixes
; with an optional hyphen, e.g. pre-pay or prepay.
;If you use this rules file for Generation or Synthesis, comment out this rule to 
;avoid multiple results.

;RULE
;"+:-" 1 2
;    + @
;    - @
; 1: 1 1

;Special correspondences are declared in the column headers of the rules that use them.
;(The s:0 and i:y correspondences are found in the optional rules after the END keyword.)
;;   +  y  e  +  +  +  +  +  +  +  +  +  +  +  s  i
;;   e  i  0  b  d  f  g  l  m  n  p  r  s  t  0  y


;==========
;Epenthesis
;==========

; LR: fox+s  kiss+s  church+s  spy+s
; SR: foxes  kisses  churches  spies

RULE
"+:e <=> [CNsib | y:i | o] +:@ ___ s [+:@ | #]" 7 8
     +  CNsib  +    s    #    y    o   @
     e  CNsib  @    s    #    i    o   @
 1:  0    2    1    2    1    2    7   1
 2:  3    2    5    2    1    2    7   1
 3.  0    0    0    4    0    0    0   0
 4.  0    0    1    0    1    0    0   0
 5:  0    1    1    6    1    1    1   1
 6:  0    1    0    1    0    1    1   1
 7:  3    2    1    2    1    2    7   1


;============
;y:i-spelling
;============

; LR: spy+s  happy+ly  spot+y+ness
; SR: spies  happi0ly  spotti0ness

RULE
"y:i <= @:CN ___ +:@ ~[i | ']"  4 7
      @   y   y   +   i   '   @
      CN  i   @   @   i   '   @
 1:   2   1   1   1   1   1   1
 2:   2   1   3   2   1   1   1
 3:   2   1   1   4   1   1   1
 4:   0   0   0   0   1   1   0

RULE
"y:i => @:CN ___ +:@ ~[i | ']"  4 6
      @   y   +   i   '   @
      CN  i   @   i   '   @
 1:   2   0   1   1   1   1
 2:   2   3   2   1   1   1
 3.   0   0   4   0   0   0
 4.   2   1   1   0   0   1


;=======
;Elision
;=======

; LR: `move+ed `move+ing `move+able `be+ing
; SR: 0mov0+ed 0mov00ing 0mov00able 0be0ing

; LR: `trace+ed `trace+ing `trace+able
; SR: 0trac00ed 0trac00ing 0trace0able

RULE
"e:0 <= VO CN CN* ___ +:@ VO" 8 8
      CN  CNpal e   e   +   VO VObk @
      CN  CNpal 0   @   @   VO VObk @
 1:   1    1    1   2   1   2   2   1
 2:   3    6    1   2   1   2   2   1
 3:   3    6    1   4   1   2   2   1
 4:   1    1    1   2   5   2   2   1
 5:   1    1    1   0   1   0   0   1
 6:   1    1    1   7   1   2   2   1
 7:   1    1    1   2   8   2   2   1
 8:   1    1    1   0   1   0   1   1

; LR: `trace+able  `change+able
; SR: 0trace0able  0change0able

;Elision prohibited after CNpal and before back vowel
RULE
"e:0 /<= VO CN* CNpal ___ +:@ VObk" 5 7
      VO  CN  CNpal  e  +  VObk  @
      VO  CN  CNpal  0  @  VObk  @
 1:   2   1     1    1  1    2   1
 2:   2   2     3    1  1    2   1
 3:   2   1     1    4  1    2   1
 4:   2   1     1    1  5    2   1
 5:   2   1     1    1  1    0   1

; LR: a`gree+ed a`gree+ing a`gree+able `hoe+ed `hoe+ing `dye+ed `dye+ing
; SR: a0gre00ed a0gree0ing a0gree0able 0ho00ed 0hoe0ing 0dy00ed 0dye0ing

RULE
"e:0 <= CN [VO | y] ___ +:@ e" 5 8
     CN VO y  e  e  +  `  @
     CN VO y  0  @  @  0  @
 1:  2  1  1  1  1  1  1  1
 2:  2  3  3  1  3  1  2  1
 3:  2  1  1  1  4  1  3  1
 4:  2  1  1  1  1  5  4  1
 5:  2  1  1  1  0  1  5  1

; LR: `argue+ing `argue+able
; SR: 0argu00ing 0argu00able

RULE
"e:0 <= CN u ___ +:@ VO" 5 7
     CN u  e  e  +  VO @
     CN u  0  @  @  VO @
 1:  2  1  1  1  1  1  1
 2:  2  3  1  1  1  1  1
 3:  2  1  1  4  1  1  1
 4:  2  1  1  1  5  1  1
 5:  2  0  1  0  1  0  1

;clean-up rule for three <= Elision rules plus i:y rule
RULE
"e:0 => [ VO CN CN* ___ +:@ VO | CN [VO | y] ___ +:@ e | CN u ___ +:@ VO | i:y ___ +:@ i]" 14 11
      e   @   CN  +   e   y   u   i   `   i   @
      0   VO  CN  @   e   y   u   i   0   y   @
 1:   0   2   6   1   2   6   2   2   1   12  1
 2:   0   2   3   1   2   3   2   2   2   12  1
 3:   4   7   3   1   7   3   11  7   3   12  1
 4.   0   0   0   5   0   0   0   0   4   0   0
 5.   0   2   0   0   2   0   2   2   5   0   0   ;prohibits e:0 before y suffix
 6:   0   7   6   1   7   10  11  7   6   12  1
 7:   8   2   3   1   2   3   2   2   7   12  1
 8.   0   0   0   9   0   0   0   0   8   0   0
 9.   0   0   0   0   2   0   0   0   9   0   0
10:   8   7   6   1   7   10  11  7   10  12  1
11:   4   2   3   1   2   3   2   2   11  12  1
12:   13  1   6   1   1   6   1   1   12  12  1
13.   0   0   0   14  0   0   0   0   0   0   0
14.   0   0   0   0   0   0   0   1   0   0   0


;==========
;i:y-spelling
;==========

;die+ing  tie+ing
;dy00ing  ty00ing

;e:@ instead of e:0 to force Elision to apply
RULE
"i:y <= ___ e:@ +:0 i" 4 6
     i  i  e  +  i  @
     y  @  @  0  i  @
 1:  1  2  1  1  2  1
 2:  1  2  3  1  2  1
 3:  1  2  1  4  2  1
 4:  1  0  1  1  0  1

RULE
"i:y => ___ e:0 +:0 i" 4 5
     i  e  +  i  @
     y  0  0  i  @
 1:  2  1  1  1  1
 2.  0  3  0  0  0
 3.  0  0  4  0  0
 4.  0  0  0  1  0


;==========
;Gemination
;==========

; LR: re`fer+ed  `travel+ed  `travel+ed  `sleep+ing  `slip+ing  `spot+y  `spot+y+ness
; SR: re0ferred  0travel0ed  0travelled  0sleep0ing  0slipping  0spotty  0spotti0ness

RULE
"+:{b,d,f,g,l,m,n,p,r,s,t} /<= `:0 CN* VO {b,d,f,g,l,m,n,p,r,s,t} ___ [VO | y:@]" 5 17
      `   CN  VO  b   +   y   d   f   g   l   m   n   p   r   s   t   @
      0   CN  VO  b   0   @   d   f   g   l   m   n   p   r   s   t   @
 1:   2   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
 2:   2   2   3   2   1   1   2   2   2   2   2   2   2   2   2   2   1
 3:   2   1   1   4   1   1   4   4   4   4   4   4   4   4   4   4   1
 4:   2   1   1   1   5   1   1   1   1   1   1   1   1   1   1   1   1
 5:   2   1   0   1   1   0   1   1   1   1   1   1   1   1   1   1   1

;correctly permits traveled and travelled,
; but also incorrectly permits gemination in affixes (see next two rules)
RULE
"+:{b,d,f,g,l,m,n,p,r,s,t} => CN* VO {b,d,f,g,l,m,n,p,r,s,t} ___ [VO | y:@]" 15 27
      +  +  +  +  +  +  +  +  +  +  +  CN  VO  b  +  y  d  f  g  l  m  n  p  r  s  t  @
      b  d  f  g  l  m  n  p  r  s  t  CN  VO  b  @  @  d  f  g  l  m  n  p  r  s  t  @
 1:   0  0  0  0  0  0  0  0  0  0  0  2   1   2  1  2  2  2  2  2  2  2  2  2  2  2  1
 2:   0  0  0  0  0  0  0  0  0  0  0  2   3   2  1  2  2  2  2  2  2  2  2  2  2  2  1
 3:   0  0  0  0  0  0  0  0  0  0  0  2   1   4  1  1  6  7  8  9  10 11 12 13 14 15 1
 4:   5  0  0  0  0  0  0  0  0  0  0  2   2   2  1  2  2  2  2  2  2  2  2  2  2  2  1
 5.   0  0  0  0  0  0  0  0  0  0  0  0   1   0  0  1  0  0  0  0  0  0  0  0  0  0  0
 6:   0  5  0  0  0  0  0  0  0  0  0  2   2   2  1  2  2  2  2  2  2  2  2  2  2  2  1
 7:   0  0  5  0  0  0  0  0  0  0  0  2   2   2  1  2  2  2  2  2  2  2  2  2  2  2  1
 8:   0  0  0  5  0  0  0  0  0  0  0  2   2   2  1  2  2  2  2  2  2  2  2  2  2  2  1
 9:   0  0  0  0  5  0  0  0  0  0  0  2   2   2  1  2  2  2  2  2  2  2  2  2  2  2  1
10:   0  0  0  0  0  5  0  0  0  0  0  2   2   2  1  2  2  2  2  2  2  2  2  2  2  2  1
11:   0  0  0  0  0  0  5  0  0  0  0  2   2   2  1  2  2  2  2  2  2  2  2  2  2  2  1
12:   0  0  0  0  0  0  0  5  0  0  0  2   2   2  1  2  2  2  2  2  2  2  2  2  2  2  1
13:   0  0  0  0  0  0  0  0  5  0  0  2   2   2  1  2  2  2  2  2  2  2  2  2  2  2  1
14:   0  0  0  0  0  0  0  0  0  5  0  2   2   2  1  2  2  2  2  2  2  2  2  2  2  2  1
15:   0  0  0  0  0  0  0  0  0  0  5  2   2   2  1  2  2  2  2  2  2  2  2  2  2  2  1

RULE
"no gemination in prefixes" 2 6
     #  -  +  +      `  @
     #  -  0  CNgem  0  @
 1:  2  2  1  1      1  1
 2:  1  2  2  0      1  2

RULE
"no gemination in suffixes" 3 4
     `  +  +      @
     0  0  CNgem  @
 1:  2  1  1      1
 2:  2  3  3      2
 3:  3  3  0      3

;==========
;s-deletion
;==========

;These rules handle deletion of the possessive s:

; LR: cat+s+'s  fox+s+'s  Dallas+'s
; SR: cat0s0'0  foxes0'0  Dallas0'0

;If you don't want to use these rules, move them after the END keyword and
; in the file affix.lex, comment out the +'s entry and uncomment the +' entry.
;These rules increase recognition time.

RULE
"s:0 => s +:@ ' ___" 4 5
      s  s  +  '  @
      0  s  @  '  @
 1:   0  2  1  1  1
 2:   0  2  3  1  1
 3:   0  2  1  4  1
 4:   1  2  1  1  1

RULE
"s:0 <= +:@ s +:@ ' ___" 5 5
     s  s  +  '  @
     0  @  @  '  @
 1:  1  1  2  1  1
 2:  1  3  1  1  1
 3:  1  1  4  1  1
 4:  1  1  1  5  1
 5:  1  0  1  1  1


END
