rearranging files

2010-05-13 15:06:37 -07:00
parent d49a963b16
commit 27e3468ab6
4 changed files with 0 additions and 0 deletions
--- a/topfails/datascraper.py
+++ b/topfails/datascraper.py
@@ -0,0 +1,532 @@
+#!/usr/bin/env python
+#Indentation is 2 spaces  ***** DO NOT USE TABS *****
+
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is TopFails site code.
+#    
+# The Initial Developer of the Original Code is
+# Mozilla foundation
+# Portions created by the Initial Developer are Copyright (C) 2010
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Serge Gautherie <sgautherie.bz@free.fr>
+#   Ted Mielczarek <ted.mielczarek@gmail.com>.
+#   Murali Nandigama <Murali.Nandigama@Gmail.COM>
+#   Jeff Hammel <jhammel@mozilla.com>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 2 or later (the "GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+import re, os, sys, urllib, logging
+import MySQLdb # Moved from  sqlite3 db  to MySQL
+from time import ctime, sleep, time
+from math import ceil
+from optparse import OptionParser
+from gzip import GzipFile
+import binascii
+
+# local imports
+import logparser
+import mappings
+
+try:
+  # 'Availability: Unix.'
+  from time import tzset
+except ImportError:
+  print >>sys.stderr, "WARNING: time.tzset() is not available on non-Unixes!"
+
+  # Define a fake function. (for development use only)
+  # ToDo: Investigate Windows/etc situation. (Bug 525699)
+  def tzset():
+    pass
+
+try:
+  import simplejson as json
+except ImportError:
+  try:
+    # 'New in version 2.6.'
+    import json
+  except ImportError:
+    print >>sys.stderr, "ERROR: no simplejson nor json package found!"
+    sys.exit(1)
+
+from dbschema import CreateDBSchema  
+
+# Number of seconds in a hour: 60 mn * 60 s = 3600 s.
+S_IN_H = 3600
+# Download data in 24 hours chunks so as not to overwhelm the tinderbox server.
+chunksize = 24 * S_IN_H
+# seconds between requests
+SLEEP_TIME = 1
+
+class BuildStatus():
+  # Unavailable builds to skip.
+  # Values need (only) to be less than the 'Success' one.
+  NoBuild = -2
+  InProgress = -1
+
+  # Builds to save in the db.
+  # Do not change these values (without updating db data).
+  Success = 0
+  TestFailed = 1
+  Burning = 2
+  Exception = 3
+  Unknown = 4
+
+csetre = re.compile("rev/([0-9A-Za-z]+)")
+def FindChangesetInScrape(scrape):
+  for line in scrape:
+    m = csetre.search(line)
+    if m:
+      return m.group(1)
+  return None
+
+buildStatuses = {
+  # "No build in progress".
+  "null":       BuildStatus.NoBuild,
+  # "Build in progress".
+  "building":   BuildStatus.InProgress,
+  # "Successful build".
+  "success":    BuildStatus.Success,
+  # "Successful build, but tests failed".
+  "testfailed": BuildStatus.TestFailed,
+  # "Build failed".
+  "busted":     BuildStatus.Burning,
+  # "Non-build failure". (i.e. "automation failure")
+  "exception":  BuildStatus.Exception,
+}
+def BuildStatusFromText(status):
+  try:
+    return buildStatuses[status]
+  except KeyError:
+    # Log 'Unknown' status failure: this should not happen (unless new statuses are created), but we want to know if it does.
+    logging.info("WARNING: unknown status = '%s'!" % status)
+    return BuildStatus.Unknown
+
+def GetOrInsertTree(conn, tree):
+  """Get an id for a tree named |tree|. If it's not already in the trees
+  table, insert a new row and return the id."""
+  
+  conn.execute("""
+               SELECT id FROM viewer_tree WHERE name = %s
+               """, (tree))
+  if conn.rowcount > 0:
+    return conn.fetchone()[0]
+
+  # need to insert it
+  conn.execute("""
+        INSERT INTO viewer_tree (name) VALUES (%s)
+        """, (tree))
+  connection.commit()
+  return conn.lastrowid
+
+def GetOrInsertTest(conn, testname):
+  """Get an id for a test named |testname|. If it's not already in the testnames
+  table, insert a new row and return the id."""
+  
+  conn.execute("""
+               SELECT id FROM viewer_test WHERE name = %s
+               """, (testname))
+  if conn.rowcount > 0:
+    return conn.fetchone()[0]
+
+  # need to insert it
+  conn.execute("""
+               INSERT INTO viewer_test (name) VALUES (%s)
+               """, (testname))
+  connection.commit()
+  return conn.lastrowid
+
+def HaveBuild(conn, treeid, _os, starttime):
+  """See if we already have this build in our database."""
+  conn.execute("""
+               SELECT COUNT(*) FROM viewer_build WHERE tree_id = %s AND os = %s AND starttime = %s
+               """, (treeid, _os, starttime))
+  return conn.fetchone()[0] == 1
+
+def UpdateLogfile(conn, treeid, _os, starttime, logfile):
+  """Update empty 'logfile' for a given build (added in db schema v1)."""
+  conn.execute("""
+               UPDATE viewer_build SET logfile = %s WHERE tree_id = %s AND os = %s AND starttime = %s AND logfile IS NULL
+               """, (logfile, treeid, _os, starttime))
+  connection.commit()
+
+def InsertBuild(conn, treeid, _os, starttime, status, logfile, changeset):
+  """Insert a build into the builds table and return the id."""
+  conn.execute("""
+               INSERT INTO viewer_build (tree_id, os, starttime, status, logfile, changeset) VALUES (%s, %s, %s, %s, %s, %s)
+               """, (treeid, _os, starttime, status, logfile, changeset))
+  connection.commit()
+  return conn.lastrowid
+
+def HaveFailRecord(conn,buildid, result, testnames_id):
+  """See if we already have this failRecord in our database."""
+  conn.execute("""
+               SELECT COUNT(*) FROM viewer_testfailure WHERE build_id = %s AND test_id = %s 
+               """, (buildid, testnames_id))
+  return conn.fetchone()[0] == 1
+  
+def InsertTest(conn, buildid, result, testnames_id, description):
+  # ToDo: Add column to save result.
+  conn.execute("""
+               INSERT INTO viewer_testfailure (build_id, test_id, failtext) VALUES (%s, %s, %s)
+               """, (buildid, testnames_id, description))
+  connection.commit()
+def asciirepl(match):
+  # replace the hexadecimal characters with ascii characters
+  s = match.group()  
+  return binascii.unhexlify(s)  
+
+def reformat_content(data):
+  p = re.compile(r'\\x(\w{2})')
+  return p.sub(asciirepl, data)
+  
+  
+def fix_tbox_json(s): 
+  """Fixes up tinderbox json.
+
+  Tinderbox returns strings as single-quoted strings, and occasionally
+  includes the unquoted substring 'undef' (with quotes) in the output, e.g.
+
+  {'key': 'hello 'undef' world'}
+
+  should return a dictionary
+
+  {'key': 'hello \'undef\' world'}
+  """
+
+  json_data = re.sub(r"^tinderbox_data\s*=\s*", "", s)
+  json_data = re.sub(r";$", "", json_data)
+  retval = []
+  in_str = False
+  in_esc = False
+  skip = 0
+  for i,c in enumerate(json_data):
+    # The tinderbox data is a fracked json. and it some times contains
+    # Control characters. that would totally fail the json.loads step.
+    # So, eliminate them .. all of them .. here -- Murali
+    if (c < '\xFD' and c > '\x1F') or c == '\n' or c == '\r' :
+      if skip > 0:
+        skip -= 1
+        continue
+  
+      if in_str:
+        if in_esc:
+          if c == "'":
+            retval.append("'")
+          else:
+            retval.append("\\")
+            retval.append(c)
+          in_esc = False
+        elif c == "\\":
+          in_esc = True
+        elif c == "\"":
+          retval.append("\\\"")
+        elif c == "'":
+          if json_data[i:i+7] == "'undef'":
+            retval.append("'undef'")
+            skip = 7
+          else:
+            retval.append("\"")
+            in_str = False
+        else:
+          retval.append(c)
+      else:
+        if c == "'":
+          retval.append("\"")
+          in_str = True
+        else:
+          retval.append(c)
+  return "".join(retval)
+
+parser = OptionParser()
+parser.add_option("-s", "--span", action="store",
+                  dest="timespan", default="20d",
+                  help="Period of time to fetch data for (N[y,m,w,d,h], default=%default)")
+parser.add_option("-t", "--tree", action="store",
+                  dest="tree", default="Firefox",
+                  help="Tinderbox tree to fetch data from (default=%default)")
+parser.add_option("-d", "--database", action="store",
+                  dest="db", default="topfails",
+                  help="Database filename (default=%default)")
+parser.add_option("--host", action="store",
+                  dest="dbhost", default="localhost",
+                  help="Database host name (default=%default)")
+parser.add_option( "--port", action="store",
+                  dest="dbport",default="3306",
+                  help="Database port (default=%default)")
+parser.add_option("-u", "--user", action="store",
+                  dest="dbuser", default="root",
+                  help="Database username (default=%default)")
+parser.add_option("-p", "--passwd", action="store",
+                  dest="dbpasswd",
+                  help="Database user password")
+parser.add_option("-v", "--verbose", action="store_true",
+                  dest="verbose", default="False",
+                  help="Enable verbose logging")
+parser.add_option("--debug", action='store_true',
+                  dest='debug', default=False,
+                  help="enable interactive debugger on exceptions (pdb)")
+parser.add_option("--die", action='store_true',
+                  dest='die', default=False,
+                  help="enable application to die on error")
+(options, args) = parser.parse_args()
+
+# check parsed options
+if options.tree not in mappings.trees:
+  parser.error("Unknown tree: '%s'; should be one of [%s]" % (options.tree, ', '.join(mappings.trees)))
+
+logging.basicConfig(level=options.verbose and logging.DEBUG or logging.WARNING)
+
+os.environ['TZ'] = "US/Pacific"
+tzset()
+# Get current time, in seconds.
+endtime = int(time())
+
+m = re.match("(\d+)([ymwdh])", options.timespan)
+if m is None:
+  print >>sys.stderr, "ERROR: bad timespan = '%s'!" % options.timespan
+  sys.exit(1)
+
+timespan = int(m.group(1)) * {'y': 365 * 24 * S_IN_H,
+                              'm':  30 * 24 * S_IN_H,
+                              'w':   7 * 24 * S_IN_H,
+                              'd':       24 * S_IN_H,
+                              'h':            S_IN_H}[m.group(2)]
+# Set current time to beginning of requested timespan ending now.
+curtime = endtime - timespan
+
+
+createdb=False
+
+
+try:
+  connection  =  MySQLdb.connect (host  =  options.dbhost,
+                                    port  =  int(options.dbport),
+                                    db  =  options.db,
+                                    user  =  options.dbuser,
+                                    passwd  =  options.dbpasswd)
+  conn=connection.cursor()
+except MySQLdb.Error, e:
+  print "Error %d: %s" % (e.args[0], e.args[1])
+  createdb = True
+     
+
+if createdb:
+  connection = MySQLdb.connect (host  =  options.dbhost,
+                                    port  =  int(options.dbport),
+                                    user  =  options.dbuser,
+                                    passwd  =  options.dbpasswd)
+  conn = connection.cursor()
+  try:
+    createdatabase='create database %s' %(options.db)
+    conn.execute (createdatabase)
+    conn.close()
+    connection.commit()
+    connection.close()
+  except  MySQLdb.Error, e:
+    print "Error %d: %s" % (e.args[0], e.args[1])
+    sys.exit (1)
+  try:
+    connection  =  MySQLdb.connect (host  =  options.dbhost,
+                                      port  =  int(options.dbport),
+                                      db  =  options.db,
+                                      user  =  options.dbuser,
+                                      passwd  =  options.dbpasswd)
+    conn=connection.cursor()
+  except MySQLdb.Error, e:
+    print "Error %d: %s" % (e.args[0], e.args[1])
+    sys.exit(1)
+  
+  CreateDBSchema(conn)
+
+
+treeid = GetOrInsertTree(conn, options.tree)
+
+logging.info("Reading tinderbox data...")
+
+chunk = 0
+# add a fudge factor here, since builds can take up to 3 hours to finish,
+# and we can't get the changeset unless we ask for time up to the end of the
+# build
+endtime += 3 * S_IN_H
+timespan += 3 * S_IN_H
+totalchunks = int(ceil(float(timespan) / chunksize))
+
+while curtime < endtime and chunk < totalchunks:
+  chunk += 1
+  logging.info("Chunk %d/%d" % (chunk, totalchunks))
+
+  if (endtime - curtime) < chunksize:
+    chunksize = endtime - curtime
+
+  tboxurl = "http://tinderbox.mozilla.org/showbuilds.cgi?tree=%(tree)s&maxdate=%(maxdate)d&noignore=1&hours=%(hours)d&json=1&noignore=1" \
+              % {'tree': options.tree,
+                 'maxdate': curtime + chunksize, # tbox wants the end time
+                 'hours': int(chunksize / S_IN_H)}
+  u = urllib.urlopen(tboxurl)
+  tboxjson = u.read()
+  #tboxjson = tboxjson.encode('utf-8').decode('string_escape').decode('utf-8')
+  #tboxjson = ''.join(u.readlines())
+  u.close()
+  
+  tboxjson = fix_tbox_json(tboxjson)
+  try:
+    tboxdata = json.loads(tboxjson)
+  except Exception, inst:
+    print >>sys.stderr, "Error parsing JSON: %s" % inst
+    continue
+
+  # dictionary of parsers
+  parsers = {
+    'check': log_parser.CheckParser,
+    'mochitest': log_parser.MochitestParser,
+    'reftest': log_parser.ReftestParser,
+    'jsreftest': log_parser.ReftestParser,
+    'crashtest': log_parser.ReftestParser,
+    'xpcshell': log_parser.XPCshellParser,
+    }
+
+  # regular expression to find the harness
+  harness_regex = r'.* (%s)(-.*)?' % '|'.join(parsers.keys())
+  
+  # we only care about unit test boxes
+  unittest_indices = [(logname, index) #tboxdata['build_name_index'][index]
+                      for logname, index in tboxdata['build_name_index'].items()
+                      if re.search("ref|mochi|xpc|check", logname)]
+
+  # 'TestFailed' expected log format is "result | test | optional text".
+  # testfailedRe = re.compile(r"(TEST-UNEXPECTED-.*) \| (.*) \|(.*)")
+  # XXX ^ to delete
+  
+  # read build table
+  for timerow in tboxdata['build_table']:
+    for logname, index in unittest_indices:
+      if index >= len(timerow) or timerow[index] == -1:
+        continue
+
+      build = timerow[index]
+      if 'buildname' not in build or \
+         'logfile'   not in build:
+        continue
+
+      status = BuildStatusFromText(build['buildstatus'])
+      # Skip unavailable "builds".
+      if status < BuildStatus.Success:
+        continue
+
+      name = build['buildname']
+      build_name_dict = mappings.parse_build_name(name)
+      if build_name_dict:
+        _os = mappings.OS_to_index[build_name_dict['os']]
+      else:
+        _os = -1 # UNKNOWN
+      starttime = int(build['buildtime'])
+      # skip builds we've already seen
+      if HaveBuild(conn, treeid, _os, starttime):
+        logging.info("Skipping already seen build '%s' at %d (%s)" % (name, starttime, ctime(starttime)))
+
+        # Call 'UpdateLogfile()' anyway.
+        UpdateLogfile(conn, treeid, _os, starttime, build['logfile'])
+        continue
+
+      # must have scrape data for changeset
+      if build['logfile'] not in tboxdata['scrape']:
+        continue
+      changeset = FindChangesetInScrape(tboxdata['scrape'][build['logfile']])
+      if changeset is None:
+        continue
+
+      buildid = InsertBuild(conn, treeid, _os, starttime, status, build['logfile'], changeset)
+
+      # 'Success' is fine as is.
+      if status == BuildStatus.Success:
+        pass
+
+      # Parse log to save 'TestFailed' results.
+      elif status == BuildStatus.TestFailed :
+        logging.info("Checking build log for '%s' at %d (%s)" % (name, starttime, ctime(starttime)))
+        try:
+          failures = []
+          # Grab the build log.
+          log, headers = urllib.urlretrieve("http://tinderbox.mozilla.org/%s/%s" % (options.tree, build['logfile']))
+          gz = GzipFile(log) # I need a list of lines from the build log
+
+          # assured to match because we search for this above
+          harness_type = re.match(harness_regex, logname).groups()[0]
+          parser = parsers.get(harness_type, log_parser.LogParser)()
+          failures = parser.parse(gz)
+
+          # add the failures to the database
+          for failure in failures:
+
+            # convenience variables; can probably delete
+            test = failure['test']
+            text = failure['text']
+            reason = failure['reason']
+            
+            testnames_id=GetOrInsertTest(conn,test)
+            if HaveFailRecord(conn,buildid,  reason, testnames_id):
+              logging.info("Skipping already recorded failure '%s' in build with id '%s' with failure record '%s' " % (test, buildid, text))
+            else:  
+              InsertTest(conn, buildid, reason, testnames_id, text)
+                      
+        except Exception, e:
+          errstring = "Unexpected error: %s" % e
+          if options.debug:
+            print errstring
+            import pdb; pdb.set_trace()
+          elif options.die:
+            raise
+          else:
+            logging.error(errstring)
+
+      # Ignore 'Burning' builds: tests may have run nontheless, but it's safer to discard them :-|
+      elif status == BuildStatus.Burning:
+        continue
+      
+      # Ignore 'Exception' builds: should only be worse than 'Burning'.
+      # (Don't know much at time of writing, since this feature is not active yet: see bug 476656 and follow-ups.)
+      elif status == BuildStatus.Exception:
+        continue
+
+      # Save 'Unknown' status failure: this should not happen (unless new statuses are created), but we want to know if it does.
+      elif status == BuildStatus.Unknown:
+        # Add a fake test failure.
+        InsertTest(conn, buildid, "TEST-UNEXPECTED-FAIL", "99999999999", "Unknown status = '%s'!" % build['buildstatus'])
+        continue
+
+
+
+  if chunk < totalchunks:
+    sleep(SLEEP_TIME)
+  curtime += chunksize
+  
+conn.close()
+connection.commit()
+connection.close()
+logging.info("Done")
--- a/topfails/dbschema.py
+++ b/topfails/dbschema.py
@@ -0,0 +1,121 @@
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is TopFails site code.
+#
+# The Initial Developer of the Original Code is
+# Mozilla foundation
+# Portions created by the Initial Developer are Copyright (C) 2010
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Murali Nandigama <Murali.Nandigama@Gmail.COM>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 2 or later (the "GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+#
+# DB schema maintenance functions.
+#
+
+import logging
+
+__all__ = \
+ [
+  "CreateDBSchema"
+ ]
+
+def CreateDBSchema(conn):
+  logging.info("Executing CreateDBSchema()")
+ 
+  
+  conn.execute("""
+  CREATE TABLE viewer_tree (
+      id integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
+      name varchar(45) NOT NULL
+      )
+  """)
+  
+  conn.execute("""
+  CREATE TABLE viewer_build (
+      id integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
+      os integer NOT NULL,
+      tree_id integer NOT NULL,
+      starttime integer,
+      status integer NOT NULL,
+      changeset varchar(80) NOT NULL,
+      logfile varchar(300) NOT NULL
+      )
+  """)
+ 
+  conn.execute("""
+  ALTER TABLE viewer_build 
+  ADD CONSTRAINT tree_id_refs_id_11e44bee 
+  FOREIGN KEY (tree_id) 
+  REFERENCES viewer_tree (id)
+  """)
+
+  conn.execute("""
+  CREATE TABLE viewer_test (
+      id integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
+      name varchar(300) NOT NULL
+      )
+  """)
+ 
+  conn.execute("""
+  CREATE TABLE viewer_testfailure (
+      id integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
+      build_id integer NOT NULL,
+      test_id integer NOT NULL,
+      failtext varchar(400) NOT NULL
+      )
+  """)
+  
+  conn.execute("""
+  ALTER TABLE viewer_testfailure 
+  ADD CONSTRAINT test_id_refs_id_1cc1b9e6 
+  FOREIGN KEY (test_id) 
+  REFERENCES viewer_test (id)
+  """)
+  
+  conn.execute("""
+  ALTER TABLE viewer_testfailure 
+  ADD CONSTRAINT build_id_refs_id_112c09cb 
+  FOREIGN KEY (build_id) 
+  REFERENCES viewer_build (id)
+  """)
+  
+  conn.execute("""
+  CREATE INDEX viewer_build_tree_id ON viewer_build (tree_id)
+  """)
+  
+  conn.execute("""
+  CREATE INDEX viewer_testfailure_build_id ON viewer_testfailure (build_id)
+  """)
+  
+  conn.execute("""
+  CREATE INDEX viewer_testfailure_test_id ON viewer_testfailure (test_id)
+  """)
+
+
--- a/topfails/logparser.py
+++ b/topfails/logparser.py
@@ -0,0 +1,203 @@
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is TopFails site code.
+#    
+# The Initial Developer of the Original Code is
+# Mozilla foundation
+# Portions created by the Initial Developer are Copyright (C) 2010
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Jeff Hammel <jhammel@mozilla.com>
+#   Murali Nandigama <Murali.Nandigama@Gmail.COM>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 2 or later (the "GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+# TODO: document what all of the cases are, e.g
+#
+#  - harness completes and automation.py hangs:
+#  "'62521 INFO TEST-PASS | /tests/content/xbl/test/test_bug542406.xhtml | Field three readonly?\n',
+# '62523 INFO Passed: 60569\n',
+# '62524 INFO Failed: 44\n',
+# '62525 INFO Todo:   770\n',
+# '62526 INFO SimpleTest FINISHED\n',
+# 'TEST-UNEXPECTED-FAIL | automation.py | application timed out after 330 seconds with no output\n',
+# "Can't trigger Breakpad, just killing process\n",
+# 'INFO | automation.py | Application ran for: 0:24:44.270038\n',
+# 'INFO | automation.py | Reading PID log: /var/folders/H5/H5TD8hgwEqKq9hgKlayjWU+++TM/-Tmp-/tmpEjNEf2pidlog\n',
+# "WARNING | automationutils.processLeakLog() | refcount logging is off, so leaks can't be detected!\n",
+# '\n',
+# 'INFO | runtests.py | Running tests: end.\n',
+# 'program finished with exit code 247\n',
+# 'elapsedTime=1496.639870\n',"
+#
+# - leak at harness closure: see sample-logs/leaks-log.txt
+
+import re
+
+class LogParser(object):
+  """abstract base class for parsing unittest logs"""
+
+  # 'TestFailed' expected log format is "result | test | optional text".
+  testfailedRe = re.compile(r"(TEST-UNEXPECTED-.*|PROCESS-CRASH) \| (.*) \|(.*)")
+  
+  def get_potentialTestName(self, line):
+    """return potential test name [None by default]"""
+    return None
+
+  def processTestName(self, test, reason, potentialTestName, lines, idx):
+    """substitute the potential name for the test (if applicable)"""
+
+    # for process crash, take the test-runner (automation) as the test failure
+    # (as already reported in test) and reset the potentialTestName to None
+    if 'PROCESS-CRASH' in reason:
+      return test, idx
+
+    # an automation.py failure will ALWAYS be followed by a
+    # automationutils.processLeakLog line;  so send a None here
+    # which will cause the parsing to continue and don't record this failure
+    if 'automation.py' in test:
+      return None, idx
+
+    if 'automationutils.processLeakLog' and (potentialTestName is not None):
+      len_lines = len(lines)
+      while (idx+1) < len_lines and ('automationutils.processLeakLog' in lines[idx+1]):
+        idx += 1
+      return potentialTestName, idx
+    
+    # if these conditions are not met, return
+    # the test name and potentialTestName untouched
+    return test, idx # no name substitution
+    
+  def parse(self, fp):
+    """
+    parse the file, returning the test failures:
+    {'test': test, 'text': text, 'reason': mgroup(1)} ]
+    -fp: file-like object
+    """
+    # Look for test failures.
+    failures = []
+    lines = fp.readlines()
+    potentialTestName = None
+
+    idx = 0
+    while idx < len(lines):
+      line = lines[idx]
+
+      # get the potential real name for reporting
+      # a test for an automation.py or automationutils.processLeakLog failure
+      potentialTestName = self.get_potentialTestName(line) or potentialTestName
+
+      # test to see if the line is a failure
+      m = self.testfailedRe.match(line)
+      if not m:
+        idx += 1
+        continue
+
+      # reason for failure [TEST-UNEXPECTED-.* or PROCESS-CRASH]
+      reason = m.group(1).rstrip()
+
+      # name of the test
+      test = m.group(2).strip() or "[unittest-log.py: no logged test]"
+
+      # fail log text
+      text = m.group(3).strip() or "[unittest-log.py: no logged text]"
+
+      # test to see if the harness hangs after a run completion
+      if lines[idx-1].strip().endswith('FINISHED'):
+        text = 'harness hangs after end of test run (or something)'
+      else:
+        # substitute potentialTestName for the test name if
+        # test is automation.py or automationutils.processLeakLog
+        test, idx = self.processTestName(test, reason, potentialTestName, lines, idx)
+
+        if test is None: # don't add this test (and don't reset potentialTestName)
+          idx += 1
+          continue
+
+      # reset potentialTestName
+      potentialTestName = None
+        
+      # Code bits below try to change back slash to forward slash
+      # and get rid of varibale prepends to the /test/../.. names              
+      if test.find('\\') != -1:
+        test = test.replace('\\','/')
+      if test.find('/') != -1:
+        tup=test.partition('build/')
+        if len(tup[2]) > 2:
+          test=tup[2]
+        else :
+          test=tup[0]
+
+      # append interesting data to failures return value
+      failures.append({'test': test, 'text': text, 'reason': reason})
+
+      # increment the line counter
+      idx += 1
+      
+    return failures
+  
+class ReftestParser(LogParser):
+  """
+  applies to
+  - Reftest
+  - Crashtest
+  - JSReftest
+  """
+
+  def get_potentialTestName(self, line):
+    """
+    If it is jsreftest,crashtest we see 'INFO | Loading' in line
+    as the potential real test name
+    """
+    if "INFO | Loading" in line:
+      return line.split('INFO | Loading ', 1)[-1]
+    
+  
+class MochitestParser(LogParser):
+  """
+  applies to
+  - Mochitest-plain
+  - Mochitest-chrome
+  - Mochitest-browserchrome
+  - Mochitest-a11y
+  """
+
+  def get_potentialTestName(self, line):
+    """Check all lines if they have INFO Running"""
+    if "INFO Running" in line:
+      return line.split('INFO Running ', 1)[-1].rstrip('.') # strip trailing ellipsis
+
+  
+class XPCshellParser(LogParser):
+  """
+  parser XPCShell results
+  """
+
+class CheckParser(LogParser):
+  """
+  parses results from `make check` (C compiled code tests)
+  """
--- a/topfails/mappings.py
+++ b/topfails/mappings.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+"""
+Build name:
+<OS> <branch> [type of build]
+
+Examples:
+
+The builder: <OS> <branch> build (e.g. 'Linux mozilla-central build')
+-- in this case [type of build] is the string 'build'
+
+A debug test: <OS> <branch> debug test <test type>
+-- in this case [type of build] is the string 'debug test <test type>'
+
+An opt test: <OS> <branch> opt test <test type>
+
+A leak test: <OS> <branch> leak test <test type>
+
+Talos:
+<OS> <branch> talos (e.g. 'Rev3 Fedora 12x64 mozilla-central talos')
+-or-
+<OS> <branch> talos <type> (e.g. 'Rev3 Fedora 12x64 mozilla-central talos cold')
+
+Currently, the mappings are coded here (in python);  this has the restriction
+that the mappings cannot be (intelligently) programmatically updated.
+If it is desired that the mappings may be undated programmatically
+(e.g. from the command line), then a [presumedly text-ish] storage method
+should be used for these mappings, e.g. an .ini file
+
+So....
+
+All of this data lives in the buildbot-configs.
+See http://hg.mozilla.org/build/buildbot-configs/file/tip/mozilla2/config.py
+The mappings are duplicated here for expediency.
+
+ - what really should happen is that this config file should be imported and
+   used here.  In order for this to happen:
+   - the config.py file should be refactored so that it is consumable (and probably the entire buildbot-configs as well)
+   - buildbot-configs (or whatever this piece is refactored into) should
+     become a real python package or otherwise installable/depended upon
+"""
+
+import re
+
+# OS mappings
+OSes = [ 'Linux',
+         'Linux x86-64',
+         'OS X 10.5.2',
+         'OS X 10.6.2',
+         'Rev3 Fedora 12',
+         'Rev3 Fedora 12x64',
+         'Rev3 MacOSX Leopard 10.5.8',
+         'Rev3 MacOSX Snow Leopard 10.6.2',
+         'Rev3 WINNT 5.1',
+         'Rev3 WINNT 6.1',
+         'WINNT 5.2' ]
+OS_to_index = dict([(OS, index) for index, OS in enumerate(OSes)])
+index_to_OS = dict([(index, OS) for index, OS in enumerate(OSes)])
+OSdata = { 'Linux': {'name': 'Linux', 'bits': 32 },
+           'Rev3 Fedora 12': { 'name': 'Fedora', 'bits': 32},
+           'Rev3 Fedora 12x64': { 'name': 'Fedora', 'bits': 64},
+	   'Linux x86-64': { 'name': 'Linux', 'bits': 64},
+	   'OS X 10.5.2': { 'name': 'MAC OSX', 'bits': 32},
+	   'OS X 10.6.2': { 'name': 'MAC OSX', 'bits': 64},
+	   'Rev3 MacOSX Leopard 10.5.8': { 'name': 'MAC OSX', 'bits': 32},
+	   'Rev3 MacOSX Snow Leopard 10.6.2': { 'name': 'MAC OSX', 'bits': 64},
+	   'Rev3 WINNT 5.1': { 'name': 'Windows', 'bits': 32},
+	   'Rev3 WINNT 6.1': { 'name': 'Windows', 'bits': 64},
+	   'WINNT 5.2': { 'name': 'Windows', 'bits': 32},
+    }
+
+# branch objects
+# branches = [ 'mozilla-central',
+#              'mozilla-1.9.2',
+#              'comm-central',
+#              'comm-central-trunk'
+#     ]
+trees = { 'Firefox': 'mozilla-central',
+          'Firefox3.6': 'mozilla-1.9.2',
+          'Thunderbird': 'comm-central',
+          'SeaMonkey': 'comm-central-trunk',
+          }
+
+build_name_regex = r'(?P<os>%s) (?P<branch>%s) (?P<type>.*)' % ('|'.join(OSes), '|'.join(trees.values()))
+build_name_regex = re.compile(build_name_regex)
+def parse_build_name(name):
+  match = re.match(build_name_regex, name)
+  if match is None:
+    return None
+  return match.groupdict()
+
+if __name__ == '__main__':
+  import sys
+  for arg in sys.argv[1:]:
+    print parse_build_name(arg)
+