Index: /logger.py =================================================================== --- /logger.py (revision 1) +++ /logger.py (revision 1) @@ -0,0 +1,118 @@ +import sys, re + +######################################################## +############### PRODUCERS ############################## +######################################################## + +class Message(object): + def __init__(self, keywords, args): + self.keywords = keywords + self.args = args + + def content(self): + return " ".join(map(str, self.args)) + + def prefix(self): + return "[%s] " % (":".join(self.keywords)) + + def __str__(self): + return self.prefix() + self.content() + +class Producer(object): + """ Log producer API which sends messages to be logged + to a 'consumer' object, which then prints them to stdout, + stderr, files, etc. + """ + + Message = Message # to allow later customization + keywords2consumer = {} + + def __init__(self, keywords): + if isinstance(keywords, str): + keywords = tuple(keywords.split()) + self.keywords = keywords + + def __repr__(self): + return "" % ":".join(self.keywords) + + def __getattr__(self, name): + if name[0] == '_': + raise AttributeError, name + producer = self.__class__(self.keywords + (name,)) + setattr(self, name, producer) + return producer + + def __call__(self, *args): + func = self._getconsumer(self.keywords) + if func is not None: + func(self.Message(self.keywords, args)) + + def _getconsumer(self, keywords): + for i in range(len(self.keywords), 0, -1): + try: + return self.keywords2consumer[self.keywords[:i]] + except KeyError: + continue + return self.keywords2consumer.get('default', default_consumer) + +default = Producer('default') + +def default_consumer(msg): + print str(msg) + +Producer.keywords2consumer['default'] = default_consumer + +class MultipleProducer(Producer): + + def __call__(self, *args, **kwargs): + for func in self._getconsumer(self.keywords): + if func is not None: + return func(self.Message(self.keywords, args), **kwargs) + + def _getconsumer(self, keywords): + found_consumer = False + for keyword in keywords: + consumer = self.keywords2consumer.get((keyword,)) + found_consumer = True + yield consumer + if not found_consumer: + yield self.keywords2consumer.get('default', default_consumer) + +######################################################## +############### CONSUMERS ############################## +######################################################## + +class File(object): + def __init__(self, f): + assert hasattr(f, 'write') + assert isinstance(f, file) or not hasattr(f, 'open') + self._file = f + + def __call__(self, msg): + print >>self._file, str(msg) + +class Path(File): + def __init__(self, filename, append=False): + mode = append and 'a' or 'w' + f = open(str(filename), mode, buffering=1) + super(Path, self).__init__(f) + +def STDOUT(msg): + print >>sys.stdout, str(msg) + +def STDERR(msg): + print >>sys.stderr, str(msg) + +def setconsumer(keywords, consumer): + # normalize to tuples + if isinstance(keywords, str): + keywords = tuple(map(None, keywords.split())) + elif not isinstance(keywords, tuple): + raise TypeError("key %r is not a string or tuple" % (keywords,)) + if consumer is not None and not callable(consumer): + if not hasattr(consumer, 'write'): + raise TypeError("%r should be None, callable or file-like" % (consumer,)) + consumer = File(consumer) + #print "setting consumer for " + str(keywords) + "to " + str(consumer) + Producer.keywords2consumer[keywords] = consumer + Index: /cheesecake_index.py =================================================================== --- /cheesecake_index.py (revision 1) +++ /cheesecake_index.py (revision 1) @@ -0,0 +1,1049 @@ +#!/usr/bin/env python +""" +Cheesecake: How tasty is your code? + +The idea of the Cheesecake project is to rank Python packages +based on various empiric "kwalitee" factors, such as: + + * whether the package can be downloaded + * whether the package can be unpacked + * whether the package can be installed into an alternate directory + * existence of certain files such as README, INSTALL, LICENSE, setup.py etc. + * existence of certain directories such as doc, test, demo, examples + * percentage of modules/functions/classes/methods with docstrings + * percentage of functions/methods that are unit tested + * average pylint score for all non-test and non-demo modules + * whether the package can be unpacked + * whether the package can be installed into an alternate directory +""" + +import os, sys, re, shutil +import tarfile, zipfile +from optparse import OptionParser +from urllib import urlretrieve +from urlparse import urlparse +from subprocess import call, Popen, PIPE, STDOUT +from inspect import isclass, ismethod, isfunction +from math import ceil +import logger + +INDEX_PYPI_DOWNLOAD = 50 +INDEX_PYPI_DISTANCE = 5 +INDEX_URL_DOWNLOAD = 25 +INDEX_UNPACK = 25 +INDEX_UNPACK_DIR = 15 +INDEX_INSTALL = 50 +INDEX_FILE_CRITICAL = 15 +INDEX_FILE = 10 +INDEX_REQUIRED_FILES = 100 +INDEX_FILE_PYC = -20 +INDEX_DIR_CRITICAL = 25 +INDEX_DIR = 20 +INDEX_DIR_EMPTY = 5 + +MAX_INDEX_DOCSTRINGS = 100 # max. percentage of modules/classes/methods/functions with docstrings +MAX_INDEX_PYLINT = 100 # max. pylint score + +PAD_TEXT = 40 +PAD_VALUE = 4 + +class Index(object): + """ + Encapsulates index attributes such as name, value, details + """ + + def __init__(self, type, name="", value=0, details=""): + self.type = "index_" + type + self.name = self.type + if name: self.name += "_" + name + self.value = value + self.details = details + + def print_info(self): + """ + Print index name padded with dots, followed by value and details + """ + msg = pad_with_dots(self.name) + msg += pad_left_spaces(self.value) + msg += " (" + self.details + ")" + print msg + +class CompositeIndex(object): + """ + Collection of indexes of same type (e.g. files, dirs) + """ + + def __init__(self, type): + """ + Indexes is a dict mapping names to Index objects + """ + self.type = type + self.indexes = {} + + def set_index(self, name, value=0, details=""): + """ + Create new index or update existing index with specified attributes + """ + if self.indexes.has_key(name): + index = self.indexes[name] + index.value = value + index.details = details + else: + self.indexes[name] = Index(self.type, name, value, details) + + def print_info(self): + """ + Print index info for all indexes sorted alphanumerically by name + """ + names = self.indexes.keys() + names.sort() + for name in names: + index = self.indexes[name] + index.print_info() + + def get_value(self): + """ + Return sum of individual index values + """ + value = 0 + for key in self.indexes.keys(): + index = self.indexes[key] + value += index.value + return value + + value = property(get_value) + +class CheesecakeError(Exception): + """ + Custom exception class for Cheesecake-specific errors + """ + pass + +class Cheesecake(object): + """ + Computes 'goodness' of Python packages + + Generates "cheesecake index" that takes into account things like: + + * whether the package can be downloaded + * whether the package can be unpacked + * whether the package can be installed into an alternate directory + * existence of certain files such as README, INSTALL, LICENSE, setup.py etc. + * existence of certain directories such as doc, test, demo, examples + * percentage of modules/functions/classes/methods with docstrings + * percentage of functions/methods that are unit tested + * average pylint score for all non-test and non-demo modules + """ + + def __init__(self, name="", url="", path="", sandbox=None, + verbose=False, quiet=False): + """ + Initialize critical variables, download and unpack package, walk package tree + + """ + self.name = name + self.url = url + self.package_path = path + if not self.name and not self.url and not self.package_path: + self.raise_exception("No package name, URL or path specified ... exiting") + self.sandbox = sandbox or "/tmp/cheesecake_sandbox" + if not os.path.isdir(self.sandbox): + os.mkdir(self.sandbox) + self.verbose = verbose + self.quiet = quiet + + self.package_types = ["tar.gz", "tgz", "zip"] + self.sandbox_pkg_file = "" + self.sandbox_pkg_dir = "" + self.sandbox_install_dir = "" + + self.determine_pkg_name() + self.configure_logging() + self.init_indexes() + self.retrieve_pkg() + self.unpack_pkg() + self.walk_pkg() + + def raise_exception(self, msg): + """ + Cleanup, print error message and raise CheesecakeError + + Don't use logging, since it can be called before logging has been setup + """ + self.cleanup() + os.unlink(os.path.join(self.sandbox, self.logfile)) + + msg += "\n" + pad_msg("CHEESECAKE INDEX", 0) + raise CheesecakeError(msg) + + def cleanup(self): + """ + Delete temporary directories and files that were + created in the sandbox + """ + if os.path.isfile(self.sandbox_pkg_file): + self.log("Removing file %s" % self.sandbox_pkg_file) + os.unlink(self.sandbox_pkg_file) + if os.path.isdir(self.sandbox_pkg_dir): + self.log("Removing directory %s" % self.sandbox_pkg_dir) + shutil.rmtree(self.sandbox_pkg_dir) + if os.path.isdir(self.sandbox_install_dir): + self.log("Removing directory %s" % self.sandbox_install_dir) + shutil.rmtree(self.sandbox_install_dir) + + def determine_pkg_name(self): + if self.name: + self.package = self.name + elif self.package_path: + self.package = self.get_package_from_path(self.package_path) + else: + self.package = self.get_package_from_url() + + def configure_logging(self): + """ + Default settings for logging + + if verbose, log goes to console, else it goes to logfile + log.debug goes to logfile + log.info goes to console + log.warn and log.error go to both logfile and stdout + """ + self.logfile = os.path.join(self.sandbox, self.package + ".log") + + logger.setconsumer('logfile', open(str(self.logfile), 'w', buffering=1)) + logger.setconsumer('console', logger.STDOUT) + logger.setconsumer('null', None) + + if self.verbose: + self.log = logger.MultipleProducer('cheesecake console') + else: + self.log = logger.MultipleProducer('cheesecake logfile') + if self.quiet: + self.log.info = logger.MultipleProducer('cheesecake logfile') + else: + self.log.info = logger.MultipleProducer('cheesecake console') + self.log.debug = logger.MultipleProducer('cheesecake logfile') + self.log.warn = logger.MultipleProducer('cheesecake console') + self.log.error = logger.MultipleProducer('cheesecake console') + + def init_indexes(self): + """ + Initialize variables used in index computation + + * cheesecake_index: overall index for the package + * index: dict holding Index or CompositeIndex objects of various types + """ + self.cheesecake_index = 0 + self.cheesecake_index_installability = 0 + self.cheesecake_index_documentation = 0 + self.cheesecake_index_codekwalitee = 0 + self.max_cheesecake_index = INDEX_PYPI_DOWNLOAD + \ + INDEX_UNPACK + \ + INDEX_UNPACK_DIR + \ + INDEX_INSTALL + \ + MAX_INDEX_DOCSTRINGS + \ + MAX_INDEX_PYLINT + self.max_cheesecake_index_installability = INDEX_PYPI_DOWNLOAD + \ + INDEX_UNPACK + \ + INDEX_UNPACK_DIR + \ + INDEX_INSTALL + self.max_cheesecake_index_documentation = INDEX_REQUIRED_FILES + \ + MAX_INDEX_DOCSTRINGS + self.max_cheesecake_index_codekwalitee = MAX_INDEX_PYLINT + self.index = {} + for index_type in ["file", "dir"]: + self.index[index_type] = CompositeIndex(index_type) + for index_type in ["pypi_download", "url_download", + "unpack_dir", "unpack", "install", + "docstrings", "pylint"]: + self.index[index_type] = Index(index_type) + + self.cheese_files = ["readme", "install", "changelog", + "news", "faq", + "todo", "thanks", + "license", "announce", + "setup.py", "ez_setup.py", + ] + self.critical_cheese_files = ["readme", "license", "setup.py"] + for cheese_file in self.cheese_files: + self.index["file"].set_index(name=cheese_file, details="file not found") + if cheese_file in self.critical_cheese_files: + self.max_cheesecake_index += INDEX_FILE_CRITICAL + self.max_cheesecake_index_documentation += INDEX_FILE_CRITICAL + else: + self.max_cheesecake_index += INDEX_FILE + self.max_cheesecake_index_documentation += INDEX_FILE + self.log.debug("cheese_files: " + ",".join(self.cheese_files)) + self.log.debug("critical_cheese_files: " + ",".join(self.critical_cheese_files)) + + self.cheese_dirs = ["doc", "test", "example", "demo"] + self.critical_cheese_dirs = ["doc", "test"] + for cheese_dir in self.cheese_dirs: + self.index["dir"].set_index(name=cheese_dir, details="directory not found") + if cheese_dir in self.critical_cheese_dirs: + self.max_cheesecake_index += INDEX_DIR_CRITICAL + self.max_cheesecake_index_documentation += INDEX_DIR_CRITICAL + else: + self.max_cheesecake_index += INDEX_DIR + self.max_cheesecake_index_documentation += INDEX_DIR + self.log.debug("cheese_dirs: " + ",".join(self.cheese_dirs)) + self.log.debug("critical_cheese_dirs: " + ",".join(self.critical_cheese_dirs)) + + self.pkg_files = {} + self.pkg_dirs = {} + self.file_types = ["py", "pyc", "test", + ] + for type in self.file_types: + self.pkg_files[type] = [] + + def retrieve_pkg(self): + if self.name: + self.get_pkg_from_pypi() + elif self.url: + self.download_pkg() + else: + self.copy_pkg() + + def get_package_from_url(self): + """ + Use ``urlparse`` to obtain package path from URL + """ + (scheme,location,path,param,query,fragment_id) = urlparse(self.url) + return self.get_package_from_path(path) + + + def get_package_from_path(self, path): + """ + Get package name as file portion of path + """ + dir, file = os.path.split(path) + return file + + def get_pkg_from_pypi(self): + """ + Download package using setuptools utilities + """ + try: + self.log.info("Trying to download package %s from PyPI using setuptools utilities" % self.name) + from setuptools.package_index import PackageIndex + from pkg_resources import Requirement + from distutils import log + # Temporarily set the log verbosity to INFO so we can capture setuptools info messages + old_threshold = log.set_threshold(log.INFO) + pkgindex = PackageIndex() + old_stdout = sys.stdout + sys.stdout = StdoutRedirector() + output = pkgindex.fetch(Requirement.parse(self.name), + self.sandbox, + force_scan=True, + source=True) + captured_stdout = sys.stdout.read_buffer() + sys.stdout = old_stdout + log.set_threshold(old_threshold) + if output is None: + self.raise_exception("Error: Could not find distribution for " + self.name) + download_url = "" + distance_from_pypi = 0 + #print captured_stdout + for line in captured_stdout.split('\n'): + s = re.search(r"Reading http(.*)", line) + if s: + inspected_url = s.group(1) + if not re.search(r"www.python.org\/pypi", inspected_url): + distance_from_pypi += 1 + continue + s = re.search(r"Downloading (.*)", line) + if s: + download_url = s.group(1) + break + self.sandbox_pkg_file = output + self.package = self.get_package_from_path(output) + self.log.info("Downloaded package %s from %s" % (self.package, download_url)) + index_type = "pypi_download" + found_on_cheeseshop = False + if re.search(r"cheeseshop.python.org", download_url): + value = INDEX_PYPI_DOWNLOAD + found_on_cheeseshop = True + else: + value = INDEX_PYPI_DOWNLOAD - distance_from_pypi * INDEX_PYPI_DISTANCE + self.index[index_type].value = value + details = "downloaded package " + self.package + if found_on_cheeseshop: + details += " directly from the Cheese Shop" + elif distance_from_pypi: + details += " following %d link" % distance_from_pypi + if distance_from_pypi > 1: + details += "s" + details += " from PyPI" + else: + details += "from " + download_url + self.index[index_type].details = details + except ImportError, e: + msg = "Error: setuptools is not installed and is required for downloading a package by name\n" + msg += "You can donwload and process a package by its full URL via the -u or --url option\n" + msg += "Example: python cheesecake.py --url=http://www.mems-exchange.org/software/durus/Durus-3.1.tar.gz" + self.raise_exception(msg) + + def download_pkg(self): + """ + Use ``urllib.urlretrieve`` to download package to file in sandbox dir + """ + #self.log("Downloading package %s from URL %s" % (self.package, self.url)) + self.sandbox_pkg_file = os.path.join(self.sandbox, self.package) + try: + downloaded_filename, headers = urlretrieve(self.url, self.sandbox_pkg_file) + except IOError, e: + self.log.error("Error downloading package %s from URL %s" % (self.package, self.url)) + self.raise_exception(str(e)) + #self.log("Downloaded package %s to %s" % (self.package, downloaded_filename)) + if re.search("Content-Type: details/html", str(headers)): + f = open(downloaded_filename) + if re.search("404 Not Found", "".join(f.readlines())): + f.close() + self.raise_exception("Got '404 Not Found' error while trying to download package ... exiting") + f.close() + index_type = "url_download" + self.index[index_type].value = INDEX_URL_DOWNLOAD + self.index[index_type].details = "downloaded package %s from URL %s" % (self.package, self.url) + + def copy_pkg(self): + """ + Copy package file to sandbox directory + """ + self.sandbox_pkg_file = os.path.join(self.sandbox, self.package) + if not os.path.isfile(self.package_path): + self.raise_exception("%s is not a valid file ... exiting" % self.package_path) + self.log("Copying file %s to %s" % (self.package_path, self.sandbox_pkg_file)) + shutil.copyfile(self.package_path, self.sandbox_pkg_file) + + def unpack_pkg(self): + """ + Unpack the package in the sandbox directory + + Currently supported archive types: + + * .tar.gz (handled with ``tarfile`` module) + * .zip (handled with ``zipfile`` module) + """ + self.package_type = "" + for type in self.package_types: + s = re.search(r"(.+)\.%s" % type, self.package) + if s: + # package_name is name of package without file extension (ex. twill-7.3) + self.package_name = s.group(1) + self.package_type = type + break + if not self.package_type: + msg = "Could not determine package type for package '%s'" % self.package + msg += "\nCurrently recognized types: " + " ".join(self.package_types) + self.raise_exception(msg) + self.log.debug("Package name: " + self.package_name) + self.log.debug("Package type: " + self.package_type) + + self.sandbox_pkg_dir = os.path.join(self.sandbox, self.package_name) + if os.path.isdir(self.sandbox_pkg_dir): + shutil.rmtree(self.sandbox_pkg_dir) + + if self.package_type in ["tar.gz", "tgz"]: + self.untar_pkg() + elif self.package_type == "zip": + self.unzip_pkg() + + index_type = "unpack_dir" + details = "unpack directory is " + self.unpack_dir + if self.unpack_dir != self.package_name: + details += " instead of the expected " + self.package_name + self.package_name = self.unpack_dir + else: + details += " as expected" + self.index[index_type].value = INDEX_UNPACK_DIR + self.index[index_type].details = details + + if not self.quiet: + self.log.info("Detailed info available in log file %s" % self.logfile) + + def untar_pkg(self): + """ + Untar the package in the sandbox directory + + Uses tarfile module + """ + try: + t = tarfile.open(self.sandbox_pkg_file) + except tarfile.ReadError, e: + self.raise_exception("Could not read tar file %s ... exiting" % self.sandbox_pkg_file) + + for member in t.getmembers(): + t.extract(member, self.sandbox) + + tarinfo = t.members[0] + self.unpack_dir = tarinfo.name.split(os.sep)[0] + + index_type = "unpack" + self.index[index_type].value = INDEX_UNPACK + self.index[index_type].details = "package untar-ed successfully" + + def unzip_pkg(self): + """ + Unzip the package in the sandbox directory + + Uses zipfile module + """ + try: + z = zipfile.ZipFile(self.sandbox_pkg_file) + except zipfile.error: + self.raise_exception("Error unzipping file %s ... exiting" % self.sandbox_pkg_file) + + # Get directory structure from zip and create it in sandbox + for name in z.namelist(): + (dir, file) = os.path.split(name) + unpack_dir = dir + target_dir = os.path.join(self.sandbox, dir) + if not os.path.exists(target_dir): + os.makedirs(target_dir) + + # Extract files to directory structure + for i, name in enumerate(z.namelist()): + if not name.endswith('/'): + outfile = open(os.path.join(self.sandbox, name), 'wb') + outfile.write(z.read(name)) + outfile.flush() + outfile.close() + + self.unpack_dir = unpack_dir.split(os.sep)[0] + + index_type = "unpack" + self.index[index_type].value = INDEX_UNPACK + self.index[index_type].details = "package unzipped successfully" + + def walk_pkg(self): + """ + Traverse the file system tree rooted at sandbox/package_name + + * Compute indexes for special files and directories + * Identify Python files, test files, etc. + """ + cwd = os.getcwd() + os.chdir(self.sandbox) + for rootdir, dirs, files in os.walk(self.package_name): + head, tail = os.path.split(rootdir) + dirs_in_rootdir = rootdir.split(os.path.sep) + for cheese_dir in self.cheese_dirs: + if re.search("^%s" % cheese_dir, tail): + if files or dirs: + if cheese_dir in self.critical_cheese_dirs: + value = INDEX_DIR_CRITICAL + details = "critical directory found" + self.log.debug("critical_cheese_dir found: " + cheese_dir) + else: + value = INDEX_DIR + details = "directory found" + self.log.debug("cheese_dir found: " + cheese_dir) + else: + value = INDEX_DIR_EMPTY + details = "empty directory found" + self.log.debug("empty cheese_dir found: " + cheese_dir) + self.index["dir"].set_index(cheese_dir, value, details) + for file in files: + fullpath = os.path.join(rootdir, file) + for cheese_file in self.cheese_files: + if re.search(r"^%s(\.txt)*" % cheese_file, file, re.IGNORECASE): + if cheese_file in self.critical_cheese_files: + value = INDEX_FILE_CRITICAL + details = "critical file found" + self.log.debug("critical_cheese_file found: " + cheese_file) + else: + value = INDEX_FILE + details = "file found" + self.log.debug("cheese_file found: " + cheese_file) + self.index["file"].set_index(cheese_file, value, details) + + if self.is_py_file(file, dirs_in_rootdir): + self.pkg_files["py"].append(fullpath) + self.log.debug("py file found: " + fullpath) + if os.path.splitext(file)[1] == ".pyc": + self.pkg_files["pyc"].append(fullpath) + self.log.debug("pyc file found: " + fullpath) + if self.is_test_file(file, dirs_in_rootdir): + self.pkg_files["test"].append(fullpath) + self.log.debug("test file found: " + fullpath) + len_pyc_list = len(self.pkg_files["pyc"]) + if len_pyc_list: + self.index["file"].set_index("pyc", value=-INDEX_FILE_PYC, + details="%d .pyc files found" % len_pyc_list) + self.log.debug("Found %d py files" % len(self.pkg_files["py"])) + self.log.debug("Found %d pyc files" % len(self.pkg_files["pyc"])) + self.log.debug("Found %d test files" % len(self.pkg_files["test"])) + + os.chdir(cwd) + + def is_py_file(self, file, dirs): + """ + Return True if file ends with .py and it is not a special file and it is not + in special directory + """ + if os.path.splitext(file)[1] != ".py": + return False + if file in ["setup.py", "ez_setup.py", "__init__.py", "__pkginfo__.py"]: + return False + for dir in dirs: + if dir.startswith("test") or \ + dir.startswith("docs") or \ + dir.startswith("demo") or \ + dir.startswith("example"): + return False + return True + + def is_test_file(self, file, dirs): + """ + Return True is file is in directory rooted at "test" or "tests" + """ + if file in ["__init__.py"]: + return False + for dir in dirs: + if dir.startswith("test"): + return True + return False + + def index_file(self): + """ + Return CompositeIndex object of type "file" + """ + return self.index["file"] + + def index_dir(self): + """ + Return CompositeIndex object of type "dir" + """ + return self.index["dir"] + + def index_pypi_download(self): + """ + Verify that package can be downloaded from PyPI + + Return Index object of type "pypi_download" + """ + index_type = "pypi_download" + if self.url: + # Package was downloaded directly from URL + self.index[index_type].value = 0 + self.index[index_type].details = "package was downloaded directly from URL" + + if self.package_path: + # Package was processed from file system path + self.index[index_type].value = 0 + self.index[index_type].details = "package was processed from file system path" + + # Otherwise, index["pypi_download"] was already set in get_pkg_from_pypi() + return self.index["pypi_download"] + + def index_url_download(self): + """ + Verify that package can be downloaded from an URL + + Return Index object of type "download" + """ + # index["download"] is already set in download_pkg() + return self.index["url_download"] + + def index_unpack(self): + """ + Verify that package can be unpacked + + Return Index object of type "unpack" + """ + # index["unpack"] is already set in unpack_pkg() + return self.index["unpack"] + + + def index_unpack_dir(self): + """ + Verify that unpack directory has same name as package + + Return Index object of type "unpack_dir" + """ + # index["unpack_dir"] is already set in unpack_pkg() + return self.index["unpack_dir"] + + def index_install(self): + """ + Verify that package can be installed in alternate directory + + Return Index object of type "install" + """ + index_type = "install" + self.sandbox_install_dir = os.path.join(self.sandbox, "tmp_install_%s" % self.package_name) + cwd = os.getcwd() + os.chdir(os.path.join(self.sandbox, self.package_name)) + p = Popen(["python", "setup.py", "install", "--home=%s" % self.sandbox_install_dir], stdout=PIPE, stderr=STDOUT) + output = p.communicate()[0] + if not p.returncode: + # Install succeeded + self.index[index_type].value = INDEX_INSTALL + self.index[index_type].details = details="package installed in %s" % self.sandbox_install_dir + else: + # Install failed + self.index[index_type].details = "could not install package in %s" % self.sandbox_install_dir + os.chdir(cwd) + return self.index[index_type] + + def index_docstrings(self): + """ + Compute docstring index as percentage of modules/classes/methods/functions + that have docstrings associated with them + + Return Index object of type "docstrings" + """ + cnt = 0 + docstring_cnt = 0 + index_type = "docstrings" + for pyfile in self.pkg_files["py"]: + fullpath = os.path.join(self.sandbox, pyfile) + code = CodeParser(fullpath, self.log.debug) + cnt += code.object_count() + docstring_cnt += code.docstring_count() + if cnt: + percent = float(docstring_cnt)/float(cnt) + else: + percent = 0 + index_value = int(ceil(percent*100)) + details = "found %d/%d=%.2f%% modules/classes/methods/functions with docstrings" % (docstring_cnt, cnt, percent*100) + self.index[index_type].value = index_value + self.index[index_type].details = details + return self.index[index_type] + + def index_pylint(self): + """ + Compute pylint index as average of positive pylint scores obtained for + the Python files identified in the package + + Return Index object of type "pylint" + """ + index_type = "pylint" + try: + import pylint + except ImportError, e: + self.index[index_type].details = "pylint not available" + return self.index[index_type] + index_pylint = 0 + cnt = 0 + for pyfile in self.pkg_files["py"]: + (path, filename) = os.path.split(pyfile) + (module, ext) = os.path.splitext(filename) + if module == "setup" or module == "ez_setup" or module.startswith("__"): + continue + fullpath = os.path.join(self.sandbox, pyfile) + self.log.debug("Running pylint on file " + fullpath) + p = Popen(["pylint", fullpath], stdout=PIPE, stderr=STDOUT) + output = p.communicate()[0] + rc = p.returncode + if rc: + # We encountered an error + continue + score_line = output.split("\n")[-3] + s = re.search(r" (\d+\.\d+)/10", score_line) + # We only take positive scores into account + if s: + score = s.group(1) + self.log.debug("pylint score for module %s: %s" % (module, score)) + if score == "0.00": + self.log.debug("Ignoring scores of 0.00") + continue + index_pylint += float(score) + cnt += 1 + avg_value = float(index_pylint)/float(cnt) + index_value = int(ceil(avg_value*10)) + self.index[index_type].value = index_value + self.index[index_type].details = "average score is %.2f out of 10" % avg_value + return self.index[index_type] + + def compute_cheesecake_index(self): + """ + Compute overall Cheesecake index for the package by adding up + specific indexes + """ + self.log.info("A given package can currently reach a MAXIMUM number of %d points" % self.max_cheesecake_index) + self.log.info("Starting computation of Cheesecake index for package '%s'" % (self.package)) + + index_types = [] + #if self.name: + # index_types.append("pypi_download") + index_types.append("pypi_download") + if self.url: + index_types.append("url_download") + index_types += ["unpack", "unpack_dir", "install"] + self.cheesecake_index_installability = self.process_partial_index("INSTALLABILITY",\ + index_types, self.max_cheesecake_index_installability) + + index_types = ["file", "dir", "docstrings"] + self.cheesecake_index_documentation = self.process_partial_index("DOCUMENTATION",\ + index_types, self.max_cheesecake_index_documentation) + + index_types = ["pylint"] + self.cheesecake_index_codekwalitee = self.process_partial_index("CODE KWALITEE",\ + index_types, self.max_cheesecake_index_codekwalitee) + + print + self.print_line("=" * (PAD_TEXT + PAD_VALUE + 1)) + print pad_msg("OVERALL CHEESECAKE INDEX (ABSOLUTE)", self.cheesecake_index) + percentage = (self.cheesecake_index * 100) / self.max_cheesecake_index + msg = pad_msg("OVERALL CHEESECAKE INDEX (RELATIVE)", percentage) + msg += " (%d out of a maximum of %d points is %d%%)" %\ + (self.cheesecake_index, self.max_cheesecake_index, percentage) + print msg + self.cleanup() + + return self.cheesecake_index + + def process_partial_index(self, partial_index_name, index_types, max_value): + print + self.log.info("Starting computation of %s index (max. points = %d)" % \ + (partial_index_name, max_value)) + partial_index_value = 0 + for index_type in index_types: + partial_index_value += self.process_index(index_type) + + self.print_line("-" * (PAD_TEXT + PAD_VALUE + 1)) + print pad_msg("%s INDEX (ABSOLUTE)" % partial_index_name, partial_index_value) + percentage = (partial_index_value * 100) / max_value + msg = pad_msg("%s INDEX (RELATIVE)" % partial_index_name, percentage) + msg += " (%d out of a maximum of %d points is %d%%)" %\ + (partial_index_value, max_value, percentage) + print msg + return partial_index_value + + def process_index(self, index_type): + """ + Compute and print index of specified type + """ + index = self.index[index_type] + index_method = "index_" + index_type + getattr(self, index_method)() + if not self.quiet: + index.print_info() + self.cheesecake_index += index.value + return index.value + + def print_line(self, line): + """ + Print line of text, unless quiet flag was given + """ + if self.quiet: + return + print line + + +class CodeParser(object): + """ + Information about the structure of a Python module + + * Collects classes, methods, functions and any associated docstrings + * Does some dumb grep-style parsing, but in the future may do some real smart parsing + """ + def __init__(self, pyfile, log=None): + if log: + self.log = log.codeparser + else: + self.log = logger.default.codeparser + self.classes = [] + self.methods = [] + self.functions = [] + self.docstrings = {} + self.object_at_page = {} + try: + self.fh = open(pyfile) + except IOError, e: + print str(e) + return + + (path, filename) = os.path.split(pyfile) + (self.module, ext) = os.path.splitext(filename) + self.log("Inspecting file: " + pyfile) + + self.parse_file() + self.log("classes: " + ",".join(self.classes)) + self.log("methods: " + ",".join(self.methods)) + self.log("functions: " + ",".join(self.functions)) + + def parse_file(self): + """ + Parse module text and retrieve classes, methods, functions + and associated docstrings + """ + cls_found = method_found = func_found = 0 + cls = method = func = "" + crt_line = 0 + for line in self.fh: + crt_line += 1 + if not self.object_at_page.has_key(crt_line-1): + s = re.search(r"\"\"\"", line) + if s: + self.docstrings[self.module] = 1 + s = re.search(r"^class (\S+)(\(|:)", line) + if s: + cls = s.group(1) + self.classes.append(cls) + self.object_at_page[crt_line] = cls + self.log("Found class " + cls) + continue + s = re.search(r"^\s+def (\S+)\(", line) + if s: + method = s.group(1) + if method.startswith("__"): + self.log("Skipping method " + method) + continue + self.methods.append(method) + self.object_at_page[crt_line] = method + self.log("Found method " + method) + continue + s = re.search(r"^def (\S+)\(", line) + if s: + func = s.group(1) + self.functions.append(func) + self.object_at_page[crt_line] = func + self.log("Found function " + func) + continue + s1 = re.search(r"\"\"\"", line) + s2 = re.search(r"\".*\S+.*\"", line) + if s1 or s2: + # LOok at object if any on previous line + obj1 = self.object_at_page.get(crt_line-1) + obj2 = None + if crt_line > 1: + # Look at object is any 2 lines before + obj2 = self.object_at_page.get(crt_line-2) + obj = obj1 or obj2 + if obj: + self.docstrings[obj] = 1 + self.log("Found docstring for object " + obj) + + def object_count(self): + """ + Return number of objects found in this module + + * module + * classes + * methods + * functions + """ + module_count = 1 + cls_count = len(self.classes) + method_count = len(self.methods) + func_count = len(self.functions) + return module_count + cls_count + method_count + func_count + + def docstring_count(self): + """ + Return number of docstrings found in this module + """ + return len(self.docstrings.keys()) + +class StdoutRedirector(object): + """ + Redirect stdout to a temp file + """ + + def __init__(self, filename=None): + if filename: + self.fh = open(filename, 'w') + else: + self.fh = os.tmpfile() + + def write(self, buf): + self.fh.write(buf) + + def flush(self): + self.fh.flush() + + def read_buffer(self): + """ + Return contents of the temp file + """ + self.fh.seek(0) + return self.fh.read() + +### Utility functions ### + +def pad_with_dots(msg, length=PAD_TEXT): + """ + Pad text with dots up to given length + """ + length = len(msg) + msg = msg + " " + for i in range(length, PAD_TEXT): + msg += "." + return msg + +def pad_left_spaces(value, length=PAD_VALUE): + """ + Pad value with spaces at left up to given length + """ + msg = "" + diff = length - len(str(value)) + for i in range(diff): + msg += " " + msg += str(value) + return msg + +def pad_msg(msg, value): + """ + Pad message with dots and pad value with spaces + """ + length = len(msg) + msg = msg + " " + for i in range(length, PAD_TEXT): + msg += "." + diff = PAD_VALUE - len(str(value)) + for i in range(diff): + msg += " " + msg += str(value) + return msg + +### End utility functions ### + +def process_cmdline_args(): + """ + Parse command-line options + """ + parser = OptionParser() + parser.add_option("-n", "--name", dest="name", + default="", help="package name (will be retrieved via setuptools utilities, if present)") + parser.add_option("-u", "--url", dest="url", + default="", help="package URL") + parser.add_option("-p", "--path", dest="path", + default="", help="package path on local file system") + parser.add_option("-s", "--sandbox", dest="sandbox", + default="/tmp/cheesecake_sandbox", + help="directory where package will be unpacked (default=/tmp/cheesecake_sandbox)") + parser.add_option("-v", "--verbose", action="store_true", dest="verbose", + default=False, help="verbose output (default=False)") + parser.add_option("-q", "--quiet", action="store_true", dest="quiet", + default=False, help="only print Cheesecake index value (default=False)") + + (options, args) = parser.parse_args() + return options + +def main(): + """ + Display Cheesecake index for package specified via command-line options + """ + options = process_cmdline_args() + name = options.name + url = options.url + path = options.path + sandbox = options.sandbox + verbose = options.verbose + quiet = options.quiet + + if not name and not url and not path: + print "Error: No package name, URL or path specified (see --help)" + sys.exit(1) + + try: + c = Cheesecake(name=name, url=url, path=path, sandbox=sandbox, verbose=verbose, quiet=quiet) + c.compute_cheesecake_index() + except CheesecakeError, e: + print str(e) + +if __name__ == "__main__": + main()