minor tewaks

This commit is contained in:
2010-04-09 14:17:10 -07:00
parent d51440ad0b
commit 9c288c10c8
5 changed files with 495 additions and 9 deletions

View File

@@ -178,7 +178,7 @@ def reformat_content(data):
return p.sub(asciirepl, data)
def fix_tbox_json(s): # Check :: This is a bad logic by :: not checking for CRTL chars in JSON text -- Murali
def fix_tbox_json(s):
"""Fixes up tinderbox json.
Tinderbox returns strings as single-quoted strings, and occasionally

459
unittest-logs/unittest-log.py~ Executable file
View File

@@ -0,0 +1,459 @@
#!/usr/bin/env python
#Indentation is 2 spaces ***** DO NOT USE TABS *****
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is TopFails site code.
#
# The Initial Developer of the Original Code is
# Mozilla foundation
# Portions created by the Initial Developer are Copyright (C) 2010
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Serge Gautherie <sgautherie.bz@free.fr>
# Ted Mielczarek <ted.mielczarek@gmail.com>.
# Murali Nandigama <Murali.Nandigama@Gmail.COM>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
import re, os, sys, urllib, logging
import MySQLdb # Moved from sqlite3 db to MySQL
from time import ctime, sleep, time
from math import ceil
from optparse import OptionParser
from gzip import GzipFile
import curses.ascii
import binascii
try:
# 'Availability: Unix.'
from time import tzset
except ImportError:
print >>sys.stderr, "WARNING: time.tzset() is not available on non-Unixes!"
# Define a fake function. (for development use only)
# ToDo: Investigate Windows/etc situation. (Bug 525699)
def tzset():
pass
try:
import simplejson as json
except ImportError:
try:
# 'New in version 2.6.'
import json
except ImportError:
print >>sys.stderr, "ERROR: no simplejson nor json package found!"
sys.exit(1)
from dbschema import CreateDBSchema
# Number of seconds in a hour: 60 mn * 60 s = 3600 s.
S_IN_H = 3600
# Download data in 24 hours chunks so as not to overwhelm the tinderbox server.
chunksize = 24 * S_IN_H
# seconds between requests
SLEEP_TIME = 1
class OS():
Windows = 0
Mac = 1
Linux = 2
Unknown = 3
class BuildStatus():
# Unavailable builds to skip.
# Values need (only) to be less than the 'Success' one.
NoBuild = -2
InProgress = -1
# Builds to save in the db.
# Do not change these values (without updating db data).
Success = 0
TestFailed = 1
Burning = 2
Exception = 3
Unknown = 4
csetre = re.compile("rev/([0-9A-Za-z]+)")
def FindChangesetInScrape(scrape):
for line in scrape:
m = csetre.search(line)
if m:
return m.group(1)
return None
def OSFromBuilderName(name):
if name.startswith("Linux"):
return OS.Linux
if name.startswith("MacOSX") or name.startswith("OS X"):
return OS.Mac
if name.startswith("WINNT"):
return OS.Windows
return OS.Unknown
buildStatuses = {
# "No build in progress".
"null": BuildStatus.NoBuild,
# "Build in progress".
"building": BuildStatus.InProgress,
# "Successful build".
"success": BuildStatus.Success,
# "Successful build, but tests failed".
"testfailed": BuildStatus.TestFailed,
# "Build failed".
"busted": BuildStatus.Burning,
# "Non-build failure". (i.e. "automation failure")
"exception": BuildStatus.Exception,
}
def BuildStatusFromText(status):
try:
return buildStatuses[status]
except KeyError:
# Log 'Unknown' status failure: this should not happen (unless new statuses are created), but we want to know if it does.
logging.info("WARNING: unknown status = '%s'!" % status)
return BuildStatus.Unknown
def GetOrInsertTree(conn, tree):
"""Get an id for a tree named |tree|. If it's not already in the trees
table, insert a new row and return the id."""
conn.execute("""SELECT id FROM trees WHERE name = %s""", (tree))
if conn.rowcount > 0:
return conn.lastrowid
# need to insert it
conn.execute("""INSERT INTO trees (name) VALUES (%s)""", (tree,))
return conn.lastrowid
def HaveBuild(conn, treeid, os, starttime):
"""See if we already have this build in our database."""
conn.execute("""SELECT COUNT(*) FROM builds WHERE treeid = %s AND os = %s AND starttime = %s""", (treeid, os, starttime))
return conn.fetchone()[0] == 1
def UpdateLogfile(conn, treeid, os, starttime, logfile):
"""Update empty 'logfile' for a given build (added in db schema v1)."""
conn.execute("""UPDATE builds SET logfile = %s WHERE treeid = %s AND os = %s AND starttime = %s AND logfile IS NULL""", (logfile, treeid, os, starttime))
def InsertBuild(conn, treeid, os, starttime, status, logfile, changeset):
"""Insert a build into the builds table and return the id."""
conn.execute("""INSERT INTO builds (treeid, os, starttime, status, logfile, changeset) VALUES (%s, %s, %s, %s, %s, %s)""", (treeid, os, starttime, status, logfile, changeset))
return conn.lastrowid
def InsertTest(conn, buildid, result, name, description):
# ToDo: Add column to save result.
conn.execute("""INSERT INTO tests (buildid, name, description) VALUES (%s, %s, %s)""", (buildid, name, description))
def asciirepl(match):
# replace the hexadecimal characters with ascii characters
s = match.group()
return binascii.unhexlify(s)
def reformat_content(data):
p = re.compile(r'\\x(\w{2})')
return p.sub(asciirepl, data)
def fix_tbox_json(s): # Check :: This is a bad logic by :: not checking for CRTL chars in JSON text -- Murali
"""Fixes up tinderbox json.
Tinderbox returns strings as single-quoted strings, and occasionally
includes the unquoted substring 'undef' (with quotes) in the output, e.g.
{'key': 'hello 'undef' world'}
should return a dictionary
{'key': 'hello \'undef\' world'}
"""
json_data = re.sub(r"^tinderbox_data\s*=\s*", "", s)
json_data = re.sub(r";$", "", json_data)
retval = []
in_str = False
in_esc = False
skip = 0
for i,c in enumerate(json_data):
# The tinderbox data is a fracked json. and it some times contains
# Control characters. that would totally fail the json.loads step.
# So, eliminate them .. all of them .. here -- Murali
if (c < '\xFD' and c > '\x1F') or c == '\n' or c == '\r' :
if skip > 0:
skip -= 1
continue
if in_str:
if in_esc:
if c == "'":
retval.append("'")
else:
retval.append("\\")
retval.append(c)
in_esc = False
elif c == "\\":
in_esc = True
elif c == "\"":
retval.append("\\\"")
elif c == "'":
if json_data[i:i+7] == "'undef'":
retval.append("'undef'")
skip = 7
else:
retval.append("\"")
in_str = False
else:
retval.append(c)
else:
if c == "'":
retval.append("\"")
in_str = True
else:
retval.append(c)
return "".join(retval)
parser = OptionParser()
parser.add_option("-s", "--span", action="store",
dest="timespan", default="20d",
help="Period of time to fetch data for (N[y,m,w,d,h], default=%default)")
parser.add_option("-t", "--tree", action="store",
dest="tree", default="Firefox",
help="Tinderbox tree to fetch data from (default=%default)")
parser.add_option("-d", "--database", action="store",
dest="db", default="topfailsdb",
help="Database filename (default=%default)")
parser.add_option("--host", action="store",
dest="dbhost", default="localhost",
help="Database host name (default=%default)")
parser.add_option( "--port", action="store",
dest="dbport",default="3306",
help="Database port (default=%default)")
parser.add_option("-u", "--user", action="store",
dest="dbuser", default="root",
help="Database username (default=%default)")
parser.add_option("-p", "--passwd", action="store",
dest="dbpasswd",
help="Database user password")
parser.add_option("-v", "--verbose", action="store_true",
dest="verbose", default="False",
help="Enable verbose logging")
(options, args) = parser.parse_args()
logging.basicConfig(level=options.verbose and logging.DEBUG or logging.WARNING)
os.environ['TZ'] = "US/Pacific"
tzset()
# Get current time, in seconds.
endtime = int(time())
m = re.match("(\d+)([ymwdh])", options.timespan)
if m is None:
print >>sys.stderr, "ERROR: bad timespan = '%s'!" % options.timespan
sys.exit(1)
timespan = int(m.group(1)) * {'y': 365 * 24 * S_IN_H,
'm': 30 * 24 * S_IN_H,
'w': 7 * 24 * S_IN_H,
'd': 24 * S_IN_H,
'h': S_IN_H}[m.group(2)]
# Set current time to beginning of requested timespan ending now.
curtime = endtime - timespan
createdb=False
try:
connection = MySQLdb.connect (host = options.dbhost,
port = int(options.dbport),
db = options.db,
user = options.dbuser,
passwd = options.dbpasswd)
conn=connection.cursor()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
createdb = True
if createdb:
connection = MySQLdb.connect (host = options.dbhost,
port = int(options.dbport),
user = options.dbuser,
passwd = options.dbpasswd)
conn = connection.cursor()
try:
createdatabase='create database %s' %(options.db)
conn.execute (createdatabase)
conn.close()
connection.close()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
sys.exit (1)
try:
connection = MySQLdb.connect (host = options.dbhost,
port = int(options.dbport),
db = options.db,
user = options.dbuser,
passwd = options.dbpasswd)
conn=connection.cursor()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
sys.exit(1)
CreateDBSchema(conn)
treeid = GetOrInsertTree(conn, options.tree)
logging.info("Reading tinderbox data...")
chunk = 0
# add a fudge factor here, since builds can take up to 3 hours to finish,
# and we can't get the changeset unless we ask for time up to the end of the
# build
endtime += 3 * S_IN_H
timespan += 3 * S_IN_H
totalchunks = int(ceil(float(timespan) / chunksize))
while curtime < endtime and chunk < totalchunks:
chunk += 1
logging.info("Chunk %d/%d" % (chunk, totalchunks))
if (endtime - curtime) < chunksize:
chunksize = endtime - curtime
tboxurl = "http://tinderbox.mozilla.org/showbuilds.cgi?tree=%(tree)s&maxdate=%(maxdate)d&noignore=1&hours=%(hours)d&json=1" \
% {'tree': options.tree,
'maxdate': curtime + chunksize, # tbox wants the end time
'hours': int(chunksize / S_IN_H)}
u = urllib.urlopen(tboxurl)
tboxjson = u.read()
#tboxjson = tboxjson.encode('utf-8').decode('string_escape').decode('utf-8')
#tboxjson = ''.join(u.readlines())
u.close()
tboxjson = fix_tbox_json(tboxjson)
try:
tboxdata = json.loads(tboxjson)
except Exception, inst:
print >>sys.stderr, "Error parsing JSON: %s" % inst
continue
# we only care about unit test boxes
unittest_indices = [tboxdata['build_name_index'][x] for x in tboxdata['build_name_index'] if re.search("test|xpc", x)]
# read build table
# 'TestFailed' expected log format is "result | test | optional text".
testfailedRe = re.compile(r"(TEST-UNEXPECTED-.*) \| (.*) \|(.*)")
for timerow in tboxdata['build_table']:
for index in unittest_indices:
if index >= len(timerow) or timerow[index] == -1:
continue
build = timerow[index]
if 'buildname' not in build or \
'logfile' not in build:
continue
status = BuildStatusFromText(build['buildstatus'])
# Skip unavailable "builds".
if status < BuildStatus.Success:
continue
name = build['buildname']
os = OSFromBuilderName(name)
starttime = int(build['buildtime'])
# skip builds we've already seen
if HaveBuild(conn, treeid, os, starttime):
logging.info("Skipping already seen build '%s' at %d (%s)" % (name, starttime, ctime(starttime)))
# Call 'UpdateLogfile()' anyway.
UpdateLogfile(conn, treeid, os, starttime, build['logfile'])
continue
# must have scrape data for changeset
if build['logfile'] not in tboxdata['scrape']:
continue
changeset = FindChangesetInScrape(tboxdata['scrape'][build['logfile']])
if changeset is None:
continue
buildid = InsertBuild(conn, treeid, os, starttime, status, build['logfile'], changeset)
# 'Success' is fine as is.
if status == BuildStatus.Success:
pass
# Parse log to save 'TestFailed' results.
elif status == BuildStatus.TestFailed:
logging.info("Checking build log for '%s' at %d (%s)" % (name, starttime, ctime(starttime)))
try:
# Grab the build log.
log, headers = urllib.urlretrieve("http://tinderbox.mozilla.org/%s/%s" % (options.tree, build['logfile']))
gz = GzipFile(log)
# Look for test failures.
for line in gz:
m = testfailedRe.match(line)
if m:
test = rawtest = m.group(2).strip() or "[unittest-log.py: no logged test]"
if rawtest.find('\\') != -1:
test = rawtest.replace('\\','/')
if test.find('/') != -1:
tup=test.partition('build/')
if len(tup[2]) > 2:
test=tup[2]
else :
test=tup[0]
text = m.group(3).strip() or "[unittest-log.py: no logged text]"
InsertTest(conn, buildid, m.group(1).rstrip(), test, text)
except:
logging.error("Unexpected error: %s" % sys.exc_info()[0])
#XXX: handle me?
# Ignore 'Burning' builds: tests may have run nontheless, but it's safer to discard them :-|
elif status == BuildStatus.Burning:
continue
# Ignore 'Exception' builds: should only be worse than 'Burning'.
# (Don't know much at time of writing, since this feature is not active yet: see bug 476656 and follow-ups.)
elif status == BuildStatus.Exception:
continue
# Save 'Unknown' status failure: this should not happen (unless new statuses are created), but we want to know if it does.
elif status == BuildStatus.Unknown:
# Add a fake test failure.
InsertTest(conn, buildid, "TEST-UNEXPECTED-FAIL", "unittest-log.py", "Unknown status = '%s'!" % build['buildstatus'])
continue
if chunk < totalchunks:
sleep(SLEEP_TIME)
curtime += chunksize
conn.close()
logging.info("Done")

View File

@@ -103,6 +103,5 @@ def get_fails_in_timerange(self):
statement = "select count(*), name from (select builds.id, name from builds inner join tests on builds.id = tests.buildid where builds.starttime >"+str(curtime)+" group by builds.id, name) aaa group by name order by count(*) DESC"
cursor.execute(statement)
for row in cursor:
print row
yield row

View File

@@ -6,10 +6,10 @@
#
# Also note: You'll have to insert the output of 'django-admin.py sqlcustom [appname]'
# into your database.
import re
from django.db import models, connection
from datetime import datetime
from time import ctime, sleep, time
class OS():
Windows = 0
Mac = 1
@@ -44,7 +44,7 @@ class Trees(models.Model):
def __unicode__(self):
return self.name
class Meta:
db_table = u'trees'
db_table = 'trees'
class Builds(models.Model):
id = models.IntegerField(primary_key=True)
@@ -63,18 +63,46 @@ class Builds(models.Model):
return "http://tinderbox.mozilla.org/showlog.cgi?log=%s/%s" % (self.tree.name, self.logfile)
return "http://tinderbox.mozilla.org/showbuilds.cgi?tree=%s&maxdate=%d&hours=3" % (self.tree.name, self.starttime)
class Meta:
db_table = u'builds'
db_table = 'builds'
class Tests(models.Model):
ROWID = models.IntegerField(primary_key=True)
id = models.IntegerField(primary_key=True)
build = models.ForeignKey(Builds, db_column="buildid")
name = models.TextField(blank=True)
description = models.TextField(blank=True)
class Meta:
db_table = u'tests'
db_table = 'tests'
def get_most_failing_tests():
cursor = connection.cursor()
cursor.execute("select count(*), name from (select builds.id, name from builds inner join tests on builds.id = tests.buildid group by builds.id, name) group by name order by count(*) desc limit 250")
cursor.execute("select count(*), name from (select builds.id, name from builds inner join tests on builds.id = tests.buildid group by builds.id, name) aaa group by name order by count(*) desc limit 25")
for row in cursor:
yield row
def get_fails_in_timerange(self):
# Get current time, in seconds.
endtime = int(time())
#print endtime
m = re.match("(\d+)([ymwdh])", self)
#print m.group(1), m.group(2)
if m is None:
print >>sys.stderr, "ERROR: bad timespan = '%s'!" % options.timespan
sys.exit(1)
timespan = int(m.group(1)) * {'y': 365 * 24 * 3600,
'm': 30 * 24 * 3600,
'w': 7 * 24 * 3600,
'd': 24 * 3600,
'h': 3600}[m.group(2)]
# Set current time to beginning of requested timespan ending now.
curtime = endtime - timespan
#print curtime, timespan, endtime-curtime
cursor = connection.cursor()
statement = "select count(*), name from (select builds.id, name from builds inner join tests on builds.id = tests.buildid where builds.starttime >"+str(curtime)+" group by builds.id, name) aaa group by name order by count(*) DESC"
cursor.execute(statement)
for row in cursor:
print row
yield row