*** tokenizer.py.orig Thu Apr 8 21:00:41 2004 --- tokenizer.py Sat Jun 26 20:37:48 2004 *************** *** 3,8 **** --- 3,12 ---- from __future__ import generators + import dnscache + cache=dnscache.cache(dnsServer="209.98.98.98") + cache.printStatsAtEnd=True + import email import email.Message import email.Header *************** *** 1051,1056 **** --- 1055,1074 ---- # now remove any obfuscation and probe around a bit url = urllib.unquote(url) scheme, netloc, path, params, query, frag = urlparse.urlparse(url) + + + ips=cache.lookup(netloc) + if len(ips)==0: + pushclue("url-ip:timeout") + else: + for ip in ips: # Should we limit to one A record? + pushclue("url-ip:%s/32" % ip) + dottedQuadList=ip.split(".") + pushclue("url-ip:%s/8" % dottedQuadList[0]) + pushclue("url-ip:%s.%s/16" % (dottedQuadList[0],dottedQuadList[1])) + pushclue("url-ip:%s.%s.%s/24" % (dottedQuadList[0], + dottedQuadList[1],dottedQuadList[2])) + # one common technique in bogus "please (re-)authorize yourself" # scams is to make it appear as if you're visiting a valid