#!/usr/bin/python
from nltk.tag import brill

#training data would normally be input by a per-corpus reader class
#(nltk.corpus.TaggedCorpusReader, nltk.corpus.EuroparlCorpusReader, etc.)
mock_training_data = [[
  ('Should', 'md'),
  ('I', 'pn'),
  ('apologize', 'vb'),
  ('for', 'in'),
  ('the', 'dt'),
  ('wait', 'nn'),
  ('?', '.')]]

### TEMPLATES
templates = [
  brill.ProximateTokensTemplate(brill.ProximateTagsRule, (-2, -1)), 
  brill.ProximateTokensTemplate(brill.ProximateWordsRule, (-1, -1))
  ]

baselineTagger = ... #some object implementing a basic 'tag' method
trainer = brill.FastBrillTaggerTrainer(baselineTagger, templates)
brill_tagger = trainer.train(mock_training_data)
rules = brill_tagger.rules()

### LEARNED RULES
#numbers are score and number of (good, bad, neutral) applications
 1  1  0  0  | vb -> nn if the text of the preceding word is 'the'
 1  1  0  0  | vbp -> vb if the tag of words i-2...i-1 is 'md'