在给网站布置统计代码后,往往需要检查开发人员布置的代码是否到位,采用人力的方式去检查很费时间,于是自己整了一个Python自动检查代码的小程序,现拿出来分享下。
# -*- coding: utf-8 -*-
__author__ = ‘www.biaodianfu.com’
import urllib2 ,gzip, StringIO, re
code = “_gaq.push(['_setAccount', 'UA-123456789-1']);”
filename = ‘urllist.txt’
checklist = open(filename,’r')
#urls = checklist.readlines()
urls = [l.strip() for l in checklist.readlines()]
for url in urls:
page_encode = “utf-8″
try:
request = urllib2.Request(url)
request.add_header(“Accept-encoding”, “gzip”)
sock = urllib2.urlopen(request)
page = sock.read()
if sock.headers.get(‘content-encoding’, None) == ‘gzip’:
page = gzip.GzipFile(fileobj=StringIO.StringIO(page)).read()
if not isinstance(page, unicode):
page = unicode(page, page_encode)
times = page.count(code)
print “%s 出现了 %d 次” % (url,times)
except urllib2.HTTPError,e:
print url,e.code
checklist.close()
如果要判断_setAccount或其他_gaq中的项是否添加正确,可以采用以下代码。
# -*- coding: utf-8 -*-
__author__ = ‘www.biaodianfu.com’
# -*- coding: utf-8 -*-
import urllib2 ,gzip, StringIO, re
filename = ‘urllist.txt’
checklist = open(filename,’r')
#urls = checklist.readlines()
urls = [l.strip() for l in checklist.readlines()]
for url in urls:
page_encode = “utf-8″
try:
request = urllib2.Request(url)
request.add_header(“Accept-encoding”, “gzip”)
sock = urllib2.urlopen(request)
page = sock.read()
if sock.headers.get(‘content-encoding’, None) == ‘gzip’:
page = gzip.GzipFile(fileobj=StringIO.StringIO(page)).read()
if not isinstance(page, unicode):
page = unicode(page, page_encode)
pattern = re.compile(“‘_setAccount’, ‘(.*?)’”)
codes = pattern.findall(page)
account = codes[0].encode(‘utf-8′)
print “%s Account:%s” % (url,account)
except urllib2.HTTPError,e:
print url,e.code
checklist.close()