Import bug fixes from pyspf module. CID xml support removed.

8ad4b161 · Stuart Gathman · b28a56ea · 8ad4b161
Commit 8ad4b161 authored Jul 14, 2005 by Stuart Gathman
--- a/spf.py
+++ b/spf.py
 #!/usr/bin/env python
-"""SPF (Sender-Permitted From) implementation.
+"""SPF (Sender Policy Framework) implementation.
 Copyright (c) 2003, Terence Way
 Portions Copyright (c) 2004,2005 Stuart Gathman <stuart@bmsi.com>
@@ -19,10 +19,11 @@ AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
 SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 For more information about SPF, a tool against email forgery, see
-	http://spf.pobox.com
+	http://spf.pobox.com/
 For news, bugfixes, etc. visit the home page for this implementation at
 	http://www.wayforward.net/spf/
+	http://sourceforge.net/projects/pymilter/
 """
 # Changes:
@@ -46,6 +47,37 @@ For news, bugfixes, etc. visit the home page for this implementation at
 # Terrence is not responding to email.
 #
 # $Log$
+# Revision 1.7  2005/07/12 21:43:56  kitterma
+# Added processing to clarify some cases of unknown
+# qualifier errors (to distinguish between unknown qualifier and
+# unknown mechanism).
+# Also cleaned up comments from previous updates.
+#
+# Revision 1.6  2005/06/29 14:46:26  customdesigned
+# Distinguish trivial recursion from missing arg for diagnostic purposes.
+#
+# Revision 1.5  2005/06/28 17:48:56  customdesigned
+# Support extended processing results when a PermError should strictly occur.
+#
+# Revision 1.4  2005/06/22 15:54:54  customdesigned
+# Correct spelling.
+#
+# Revision 1.3  2005/06/22 00:08:24  kitterma
+# Changes from draft-mengwong overall DNS lookup and recursion
+# depth limits to draft-schlitt-spf-classic-02 DNS lookup, MX lookup, and
+# PTR lookup limits.  Recursion code is still present and functioning, but
+# it should be impossible to trip it.
+#
+# Revision 1.2  2005/06/21 16:46:09  kitterma
+# Updated definition of SPF, added reference to the sourceforge project site,
+# and deleted obsolete Microsoft Caller ID for Email XML translation routine.
+#
+# Revision 1.1.1.1  2005/06/20 19:57:32  customdesigned
+# Move Python SPF to its own module.
+#
+# Revision 1.5  2005/06/14 20:31:26  customdesigned
+# fix pychecker nits
+#
 # Revision 1.4  2005/06/02 04:18:55  customdesigned
 # Update copyright notices after reading article on /.
 #
@@ -144,135 +176,6 @@ import struct  # for pack() and unpack()
 import time    # for time()
 import DNS	# http://pydns.sourceforge.net
-import xml.sax
-# -------------------------------------------------------------------------
-# Convert a MS Caller-ID entry (XML) to a SPF entry
-#
-# (c) 2004 by Ernesto Baschny
-# (c) 2004 Python version by Stuart Gathman
-#
-# Date: 2004-02-25
-#
-# A complete reverse translation (SPF -> CID) might be impossible, since
-# there are no ways to handle:
-# - PTR and EXISTS mechanism 
-# - MX mechanism with an different domain as argument
-# - macros
-# 
-# References:
-# http://www.microsoft.com/mscorp/twc/privacy/spam_callerid.mspx
-# http://spf.pobox.com/
-#
-# Known bugs:
-# - Currently it won't handle the exclusions provided in the A and R
-#   tags (prefix '!'). They will show up "as-is" in the SPF record
-# - I really haven't read the MS-CID specs in-depth, so there are probably
-#   other bugs too :)
-#
-# Ernesto Baschny <ernst@baschny.de>
-#
-class CIDParser(xml.sax.ContentHandler):
-  "Convert a MS Caller-ID entry (XML) to a SPF entry."
-  def __init__(self,q=None):
-    self.spf = []
-    self.action = '-all'
-    self.has_servers = None
-    self.spf_entry = None
-    if q:
-      self.spf_query = q
-    else:
-      self.spf_query = query(i='127.0.0.1', s='localhost', h='unknown')
-  def startElement(self,tag,attr):
-      if tag == 'm':
-	if self.has_servers != None and not self.has_servers:
-	  raise ValueError(
-    "Declared <noMailServers\> and later <m>, this CID entry is not valid."
-	  )
-	self.has_servers = True
-      elif tag == 'noMailServers':
-	if self.has_servers:
-	  raise ValueError(
-    "Declared <m> and later <noMailServers\>, this CID entry is not valid."
-	  )
-	self.has_servers = False
-      elif tag == 'ep':
-	if attr.has_key('testing') and attr.getValue('testing') == 'true':
-	  # A CID with 'testing' found:
-	  # From the MS-specs:
-	  #  "Documents in which such attribute is present with a true
-	  #  value SHOULD be entirely ignored (one should act as if the
-	  #  document were absent)"
-	  # From the SPF-specs:
-	  #  "Neutral (?): The SPF client MUST proceed as if a domain did
-	  #  not publish SPF data."
-	  # So we set SPF action to "neutral":
-	  self.action = '?all'
-      elif tag == 'mx':
-	  # The empty MX-tag, same as SPF's MX-mechanism
-	  self.spf.append('mx')
-      self.tag = tag
-  def characters(self,text):
-	tag = self.tag
-	# Remove starting and trailing spaces from text:
-	text = text.strip()
-	if tag == 'a' or tag == 'r':
-	    # The A and R tags from MS-CID are both handled by the 
-	    # ipv4/6-mechanisms from SPF:
-	    if text.find(':') < 0:
-	      mechanism = 'ip4'
-	    else:
-	      mechanism = 'ip6'
-	    self.spf.append(mechanism + ':' + text)
-	elif tag == 'indirect':
-	    # MS-CID's indirect is "sort of" the include from SPF:
-	    # Not really true, because the <indirect> tag from MS-CID also 
-	    # provides a fallback in case the included domain doesn't provide
-	    # _ep-records: The inbound MX-servers of the included domains
-	    # are added to the list of allowed outgoing mailservers for the
-	    # domain that declared the _ep-record with the <indirect> tag.
-	    # In SPF you would use the 'mx:domain' to handle this, but this
-	    # wouldn't depend on referred domain having or not SPF-records.
-	    cid_xml = self.cid_txt(text)
-	    if cid_xml:
-	      p = CIDParser()
-	      xml.sax.parseString(cid_xml,p)
-	      if p.has_servers != False:
-		self.spf += p.spf
-	    else:
-	      self.spf.append('mx:' + text)
-  def cid_txt(self,domain):
-    q = self.spf_query
-    domain='_ep.' + domain
-    a = q.dns_txt(domain)
-    if not a: return None
-    if a[0].lower().startswith('<ep ') and a[-1].lower().endswith('</ep>'):
-      return ''.join(a)
-    return None
-  def endElement(self,tag):
-      if tag == 'ep':
-	# This is the end... assemble what we've got
-	spf_entry = ['v=spf1']
-	if self.has_servers != False:
-	  spf_entry += self.spf
-	spf_entry.append(self.action)
-	self.spf_entry = ' '.join(spf_entry)
-  def spf_txt(self,cid_xml):
-    if not cid_xml.startswith('<'):
-      cid_xml = self.cid_txt(cid_xml)
-      if not cid_xml: return None
-    # Parse the beast. Any XML-problem will be reported by xlm.sax
-    self.spf_entry = None
-    xml.sax.parseString(cid_xml,self)
-    return self.spf_entry
 # 32-bit IPv4 address mask
 MASK = 0xFFFFFFFFL
@@ -297,7 +200,7 @@ RESULTS = {'+': 'pass', '-': 'fail', '?': 'neutral', '~': 'softfail',
 	   'none': 'none', 'deny': 'fail' }
 EXPLANATIONS = {'pass': 'sender SPF verified', 'fail': 'access denied',
-                'unknown': 'SPF unknown',
+                'unknown': 'SPF unknown (PermError)',
 		'softfail': 'domain in transition',
 		'neutral': 'access neither permitted nor denied',
 		'none': ''
@@ -320,7 +223,9 @@ except NameError:
 DEFAULT_SPF = 'v=spf1 a/24 mx/24 ptr'
 # maximum DNS lookups allowed
-MAX_LOOKUP = 100
+MAX_LOOKUP = 10 #draft-schlitt-spf-classic-02 Para 10.1
+MAX_MX = 10 #draft-schlitt-spf-classic-02 Para 10.1
+MAX_PTR = 10 #draft-schlitt-spf-classic-02 Para 10.1
 MAX_RECURSION = 20
 class TempError(Exception):
@@ -328,10 +233,11 @@ class TempError(Exception):
 class PermError(Exception):
 	"Permanent SPF error"
-	def __init__(self,msg,mech=None):
+	def __init__(self,msg,mech=None,ext=None):
 	  Exception.__init__(self,msg,mech)
 	  self.msg = msg
 	  self.mech = mech
+	  self.ext = ext
 	def __str__(self):
 	  if self.mech:
 	    return '%s: %s'%(self.msg,self.mech)
@@ -372,7 +278,7 @@ class query(object):
 	Also keeps cache: DNS cache.
 	"""
-	def __init__(self, i, s, h,local=None,receiver=None):
+	def __init__(self, i, s, h,local=None,receiver=None,strict=True):
 		self.i, self.s, self.h = i, s, h
 		if not s and h:
 		  self.s = 'postmaster@' + h
@@ -387,6 +293,7 @@ class query(object):
 		self.exps = dict(EXPLANATIONS)
 		self.local = local	# local policy
    		self.lookups = 0
+		self.strict = strict
 	def set_default_explanation(self,exp):
 		exps = self.exps
@@ -412,6 +319,11 @@ class query(object):
 	result in ['fail', 'softfail', 'neutral' 'unknown', 'pass', 'error']
 		"""
 		self.mech = []		# unknown mechanisms
+		# If not strict, certain PermErrors (mispelled
+		# mechanisms, strict processing limits exceeded)
+		# will continue processing.  However, the exception
+		# that strict processing would raise is saved here
+		self.perm_error = None
 		if self.i.startswith('127.'):
 			return ('pass', 250, 'local connections always pass')
@@ -421,7 +333,12 @@ class query(object):
 			    spf = self.dns_spf(self.d)
 			if self.local and spf:
 			    spf += ' ' + self.local
-			return self.check1(spf, self.d, 0)
+			rc = self.check1(spf, self.d, 0)
+			if self.perm_error:
+			  # extended processing succeeded, but strict failed
+			  self.perm_error.ext = rc
+			  raise self.perm_error
+			return rc
 		except DNS.DNSError,x:
 			return ('error', 450, 'SPF DNS Error: ' + str(x))
 		except TempError,x:
@@ -431,8 +348,8 @@ class query(object):
 		    self.mech.append(x.mech)
 		    # Pre-Lentczner draft treats this as an unknown result
 		    # and equivalent to no SPF record.
-		    # return ('unknown', 550, 'SPF Permanent Error: ' + str(x))
+		    return ('unknown', 550, 'SPF Permanent Error: ' + str(x))
-		    return ('error', 550, 'SPF Permanent Error: ' + str(x))
+		    # return ('error', 550, 'SPF Permanent Error: ' + str(x))
 	def check1(self, spf, domain, recursion):
 		# spf rfc: 3.7 Processing Limits
@@ -479,6 +396,7 @@ class query(object):
 			exps['fail'] = exps['unknown'] = \
 				self.get_explanation(m[1])
 		    elif m[0] == 'redirect':
+		        self.check_lookups()
 			redirect = self.expand(m[1])
 		    elif m[0] == 'default':
 			# default=- is the same as default=fail
@@ -502,11 +420,15 @@ class query(object):
 			    # default pass
 			    result = 'pass'
-		    if m in ['a', 'mx', 'ptr', 'prt', 'exists', 'include']:
+		    if m in ('a', 'mx', 'ptr', 'exists', 'include'):
+		    	    self.check_lookups()
 			    arg = self.expand(arg)
 		    if m == 'include':
-		      if arg != self.d:
+		      if arg == self.d:
+		        if mech != 'include':
+			  raise PermError('include has trivial recursion',mech)
+			raise PermError('include mechanism missing domain',mech)
 		      res,code,txt = self.check1(self.dns_spf(arg),
 					arg, recursion + 1)
 		      if res == 'pass':
@@ -516,8 +438,6 @@ class query(object):
 			  'No valid SPF record for included domain: %s'%arg,
 			  mech)
 		      continue
-		      else:
-			raise PermError('include mechanism missing domain',mech)
 		    elif m == 'all':
 			    break
@@ -536,6 +456,13 @@ class query(object):
 				    break
 		    elif m in ('ip4', 'ipv4', 'ip') and arg != self.d:
+		        try:
+			  if m != 'ip4':
+			    raise PermError('Unknown mechanism found',mech)
+			except PermError, x:
+			  if self.strict: raise
+			  if not self.perm_error:
+			    self.perm_error = x
 			try:
 			    if cidrmatch(self.i, [arg], cidrlength):
 				break
@@ -543,19 +470,41 @@ class query(object):
 			    raise PermError('syntax error',mech)
 		    elif m in ('ip6', 'ipv6'):
+		        try:
+			  if m != 'ip6':
+			    raise PermError('Unknown mechanism found',mech)
+			except PermError, x:
+			  if self.strict: raise
+			  if not self.perm_error:
+			    self.perm_error = x
 			# Until we support IPV6, we should never
 			# get an IPv6 connection.  So this mech
 			# will never match.
 			pass
 		    elif m in ('ptr', 'prt'):
-			    if domainmatch(self.validated_ptrs(self.i),
+		        try:
-					   arg):
+			  if m != 'ptr':
+			    raise PermError('Unknown mechanism found',mech)
+			except PermError, x:
+			  if self.strict: raise
+			  if not self.perm_error:
+			    self.perm_error = x
+			  self.check_lookups()
+			if domainmatch(self.validated_ptrs(self.i), arg):
 				break
 		    else:
-		      # unknown mechanisms cause immediate unknown
+		      # unknown mechanisms cause immediate PermError
 		      # abort results
+		      # first see if it might be an bad qualifier instead
+		      # of an unknown mechanism (no change to the result, just
+		      # fine tune the error).
+		      # eat one character and try again:
+		      m = m[1:]
+		      if m in ['a', 'mx', 'ptr', 'exists', 'include', 'ip4', 'ip6', 'all']:
+                          raise PermError('Unknown qualifier, IETF draft para 4.6.1, found in',mech)
+		      else:
                          raise PermError('Unknown mechanism found',mech)
 		else:
 		    # no matches
@@ -570,6 +519,17 @@ class query(object):
 		else:
 		    return (result, 250, exps[result])
+	def check_lookups(self):
+	    self.lookups = self.lookups + 1
+	    if self.lookups > MAX_LOOKUP:
+	      try:
+		if self.strict or not self.perm_error:
+		  raise PermError('Too many DNS lookups')
+	      except PermError,x:
+		if self.strict or self.lookups > MAX_LOOKUP*4:
+		  raise x
+		self.perm_error = x
 	def get_explanation(self, spec):
 		"""Expand an explanation."""
 		if spec:
@@ -682,13 +642,6 @@ class query(object):
 		      for t in self.dns_txt(domain+'._spf.'+DELEGATE)
 			if t.startswith('v=spf1')
 		    ]
-		  if not a:
-		    # No SPF record: convert and return CID if present
-		    p = CIDParser(q=self)
-		    try:
-		      return p.spf_txt(domain)
-		    except xml.sax._exceptions.SAXParseException:
-		      raise PermError("Caller-ID parse error",domain)
 		if len(a) == 1:
 			return a[0]
@@ -739,15 +692,22 @@ class query(object):
 		pre: qtype in ['A', 'AAAA', 'MX', 'PTR', 'TXT', 'SPF']
 		post: isinstance(__return__, types.ListType)
 		"""
-		self.lookups += 1
-		if self.lookups > MAX_LOOKUP:
-			raise PermError('Too many DNS lookups')
 		result = self.cache.get( (name, qtype) )
 		cname = None
 		if not result:
+                        mxcount = 0
+                        ptrcount = 0
 			req = DNS.DnsRequest(name, qtype=qtype)
 			resp = req.req()
 			for a in resp.answers:
+                                if a['typename'] == 'MX':
+                                    mxcount = mxcount + 1
+                                    if mxcount > MAX_MX:
+                                        raise PermError('Too many MX lookups')
+                                if a['typename'] == 'PTR':
+                                    ptrcount = ptrcount + 1
+                                    if ptrcount > MAX_PTR:
+                                        raise PermError('Too many PTR lookups')
 				# key k: ('wayforward.net', 'A'), value v
 				k, v = (a['name'], a['typename']), a['data']
 				if k == (name, 'CNAME'):
@@ -838,6 +798,9 @@ def parse_mechanism(str, d):
 	>>> parse_mechanism('a:bar.com/16', 'foo.com')
 	('a', 'bar.com', 16)
+	>>> parse_mechanism('A:bar.com/16', 'foo.com')
+	('a', 'bar.com', 16)
 	"""
 	a = str.split('/')
 	if len(a) == 2:
@@ -847,9 +810,9 @@ def parse_mechanism(str, d):
 	b = a.split(':')
 	if len(b) == 2:
-		return b[0], b[1], port
+		return b[0].lower(), b[1], port
 	else:
-		return a, d, port
+		return a.lower(), d, port
 def reverse_dots(name):
 	"""Reverse dotted IP addresses or domain names.