#!/usr/bin/python -t # -*- mode: Python; indent-tabs-mode: nil; -*- # # Crude unowned directories checker for repositories. # Author: Michael Schwendt # Version: 2009-09-07 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import os, sys import re from optparse import OptionParser import yum, yum.Errors from yum.misc import getCacheDir import rpmUtils.miscutils, rpmUtils.arch from yum.packageSack import ListPackageSack jobroot = 'dircheck_work' mingroups = ['Base', 'Core'] # list of unowned directories which are way too common extrawhitelist = ['/usr/lib/pkgconfig', '/usr/lib64/pkgconfig', '/usr/share/aclocal', '/usr/share/fonts', '/usr/share/gnome/help', '/usr/share/icons', '/usr/share/idl', '/usr/share/javadoc', '/usr/share/omf', '/var', # missing from filesystem pkg returnFileEntries (#246485) ] # brute-force list of prefix values for paths we want to ignore wlstartswith = [ '/usr/share/man', '/usr/share/locale', '/usr/share/icons/hicolor', '/usr/share/icons/locolor', '/usr/share/icons/crystalsvg', '/usr/share/icons/oxygene', ] def iswhitelisted(path): for p in wlstartswith: if path.startswith(p): return True return False def isWantedName(name,regexplist): for r in regexplist: if re.compile(r).search(name): return True return False # reject by default def parseArgs(): usage = "usage: %s [-c ] [-a ] [-r ] [-r ]" % sys.argv[0] parser = OptionParser(usage=usage) parser.add_option("-a", "--arch", default=[], action='append', help='check packages of the given archs, can be specified multiple ' + 'times (default: current arch)') parser.add_option("-c", "--config", default='/etc/yum.conf', help='config file to use (defaults to /etc/yum.conf)') parser.add_option("-d", "--cachedir", default='', help="specify a custom directory for metadata cache") parser.add_option("-n", "--name", default=[], action='append', help="specify package name search pattern (regexp)") parser.add_option("-r", "--repoid", default=[], action='append', help="specify repo ids to include, can be specified multiple times (default is all disabled)") (opts, args) = parser.parse_args() return (opts, args) class RepoLister(yum.YumBase): def __init__(self, arch = [], config = "/etc/yum.conf"): yum.YumBase.__init__(self) if yum.__version__ < '3.2.24': if arch: self.arch = arch[0] else: self.arch = None else: self._rc_arches = arch if yum.__version__ < '3.0': # TODO: check self.doConfigSetup(fn = config) else: self.doConfigSetup(fn = config, init_plugins = False) if hasattr(self.repos, 'sqlite'): self.repos.sqlite = False self.repos._selectSackType() self.resetResolver() self.resetFilesList() # TODO: Caching too much increases the Python memory usage beyond # what is helpful for checking entire repos. def resetResolver(self): self.resolved = {} self.seenbefore = [] def resetFilesList(self): self.fileslist = [] self.headcache = [] def fixYum(self): # Work around bug in yum <= 2.6.1. Without this, when using # multiple instances of this class, package objects point to # unconfigured repositories, e.g. in downloadHeader(). import yum.packages yum.packages.base = self def readMetadata(self): self.doRepoSetup() if yum.__version__ < '3.2.24': self.doSackSetup(rpmUtils.arch.getArchList(self.arch)) else: archs = [] if not self._rc_arches: archs.extend(self.arch.archlist) else: for arch in self._rc_arches: archs.extend(self.arch.get_arch_list(arch)) self.doSackSetup(archs) #self.doSackFilelistPopulate() # yum >= 2.9 for repo in self.repos.listEnabled(): self.repos.populateSack(which=[repo.id], mdtype='filelists') def returnNewest(self): pkgs = self.pkgSack.returnNewestByNameArch() mypkgSack = ListPackageSack(pkgs) pkgtuplist = mypkgSack.simplePkgList() # Support new checkForObsolete code in Yum (#190116) # so we don't examine old _obsolete_ sub-packages. import rpmUtils.updates self.up = rpmUtils.updates.Updates([],pkgtuplist) self.up.rawobsoletes = mypkgSack.returnObsoletes() self.pkglist = [] for pkg in pkgs: thispkgobsdict = {} try: thispkgobsdict = self.up.checkForObsolete([pkg.pkgtup]) if thispkgobsdict.has_key(pkg.pkgtup): continue except AttributeError: pass self.pkglist.append(pkg) def sortbyname(a,b): return cmp(a.__str__(),b.__str__()) self.pkglist.sort(sortbyname) return self.pkglist def resolveRequires(self,pkg,recurse=True): if pkg.__str__() in self.seenbefore: return # print pkg self.seenbefore.append( pkg.__str__() ) # Construct list of paths to examine. ftypes = pkg.returnFileTypes() if 'dir' in ftypes: for f in pkg.returnFileEntries(ftype='dir'): # print ' ', f f = f.replace('//','/') # TODO: use os.path.normpath everywhere? self.fileslist.append(f) if 'file' in ftypes: for f in pkg.returnFileEntries(ftype='file'): f = f.replace('//','/') # TODO: use os.path.normpath everywhere? # If multiple files share a common path, add the path only once. (head,tail) = os.path.split(f) if head in self.headcache: continue self.headcache.append(head) self.fileslist.append(f) if not recurse: return # yum-2.6.0-1 # for (req,flags,(reqe,reqv,reqr)) in pkg.returnPrco('requires'): # File "packageObject.py", line 224, in returnPrco #AttributeError: YumLocalPackage instance has no attribute 'prco' #for (req,flags,evr) in getRequires( ... ): # old for dep in pkg.requiresList(): if dep.startswith('rpmlib'): continue # ignore rpmlib deps if self.resolved.has_key(dep): continue # resolved before results = [] try: results = self.returnPackagesByDep(dep) except KeyboardInterrupt: raise except: # TODO: which exceptions do we want to catch? pass if len(results) < 1: #print 'UNRESOLVED:', dep continue self.resolved[dep] = True for p in results: self.resolveRequires(p,recurse) def log(self, value, msg): # print msg pass def findOrphans(filelist,whitelist,depchainfilelist): orphans = [] seenbefore = [] for f in filelist: pathcomps = f.split('/') try: pathcomps.remove('') except ValueError: pass testpath = '' for p in pathcomps: testpath += '/%s' % p if testpath in seenbefore: continue else: seenbefore.append(testpath) if (not iswhitelisted(testpath)) and (testpath not in whitelist) and (testpath not in depchainfilelist) and (testpath not in filelist) and (testpath not in orphans): orphans.append(testpath) return orphans def main(): (opts, cruft) = parseArgs() if yum.__version__ < '3.2.24': if len(opts.arch)>1: print 'ERROR: can handle only a single arch with Yum < 3.2.24' sys.exit(errno.EINVAL) # Don't treat double-"+" as regexp meta-characters. for i in range(len(opts.name)): while opts.name[i].find('++')>=0: opts.name[i] = opts.name[i].replace('++','\+\+') usesearch = len(opts.name)>0 # Full set of repositories. lister = RepoLister(arch = opts.arch, config = opts.config) print 'Target distribution:' if not opts.repoid: for repo in lister.repos.listEnabled(): print ' ',repo else: for repo in lister.repos.repos.values(): if repo.id not in opts.repoid: repo.disable() else: repo.enable() print ' ',repo if os.geteuid() != 0 or opts.cachedir != '': if opts.cachedir != '': cachedir = opts.cachedir else: cachedir = getCacheDir() if cachedir is None: print "Error: Could not make cachedir, exiting" sys.exit(50) lister.repos.setCacheDir(cachedir) try: print 'Reading repository metadata...' sys.stdout.flush() lister.readMetadata() except yum.Errors.RepoError, e: print e raise pkglist = lister.returnNewest() print '%d packages in repositories...' % len(pkglist) whitelist = extrawhitelist for group in mingroups: print 'Adding packages from comps group:', group for p in lister.comps.return_group(group).packages: # TODO: avoid duplicates, treat it like a set try: whitelist += lister.pkgSack.returnNewestByName(p)[0].returnFileEntries(ftype='dir') except yum.Errors.PackageSackError, e: print ' ', e print sys.stdout.flush() # Remove duplicate '/' at beginning of names. w2 = [] for d in whitelist: w2.append( d.replace('//','/') ) # TODO: use os.path.normpath everywhere? whitelist = w2 # print whitelist def cmpbyid(a,b): return cmp(a.__str__(),b.__str__()) pkglist.sort(cmpbyid) jobpath = os.path.join(os.getcwd(),jobroot) if not usesearch and not os.path.exists(jobpath): os.makedirs(jobpath) for pkg in pkglist: if usesearch: if not isWantedName( pkg.returnSimple('name'), opts.name ): continue else: # only process pkgs not seen before jobpath = os.path.join( os.getcwd(),jobroot,pkg.__str__() ) if os.path.exists(jobpath): continue # Pass 1 lister.resetResolver() lister.resetFilesList() lister.resolveRequires(pkg,False) filesinpkg = lister.fileslist # (!) keep this for pass 2 # No orphans? Then we don't need to resolve deps. o = findOrphans(filesinpkg,whitelist,filesinpkg) if len(o) == 0: if not usesearch: f = open(jobpath,'w') f.close() continue # Pass 2 with output. lister.resetResolver() lister.resetFilesList() lister.resolveRequires(pkg,True) orphans = findOrphans(filesinpkg,whitelist,lister.fileslist) provcache = {} report = [] if len(orphans): report.append('=> %s' % pkg.returnSimple('sourcerpm')) report.append('=> %s (%s)' % (pkg,pkg.repoid)) for o in orphans: report.append(o) # Add some details about potential providers. if provcache.has_key(o): provby = provcache[o] else: provby = lister.returnPackagesByDep( o ) provcache[o] = provby if len(provby)>=1: for p in provby: report.append(' provided by: %s' % p) report.append('') for l in report: print l sys.stdout.flush() if not usesearch: # Write report to job file => pkg done. outf = open(jobpath, 'w') for l in report: print >> outf, l outf.close() elif not usesearch: # Just create pkg job file => pkg done. f = open(jobpath,'w') f.close() #return if __name__ == "__main__": main()