#!/usr/bin/python -t # -*- coding: utf-8; mode: Python; indent-tabs-mode: nil; -*- # # Unfinished brute-force 2-pass conflicts checker for repositories. # Author: Michael Schwendt # Version: 2009-04-01 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import errno, os, sys import fnmatch, glob import re from optparse import OptionParser #import pdb import codecs, pickle import yum, yum.Errors from yum.misc import getCacheDir import rpmUtils.miscutils, rpmUtils.arch from yum.constants import * from yum.packageSack import ListPackageSack multicompatmode = False # off by default and just a hidden hack def isWantedName(name,regexplist): for r in regexplist: if re.compile(r).search(name): return True return False # reject by default def parseArgs(): usage = "usage: %s [-c ] [-a ] [-r ] [-r ]" % sys.argv[0] parser = OptionParser(usage=usage) parser.add_option("-a", "--arch", default=[], action='append', help='check packages of the given archs, can be specified multiple ' + 'times (default: current arch)') parser.add_option("-c", "--config", default='/etc/yum.conf', help='config file to use (defaults to /etc/yum.conf)') parser.add_option("-d", "--cachedir", default='', help="specify a custom directory for metadata cache") parser.add_option("-n", "--name", default=[], action='append', help="add a package name search pattern (regexp)") parser.add_option("-r", "--repoid", default=[], action='append', help="specify repo to search within, can be specified multiple times") (opts, args) = parser.parse_args() return (opts, args) def archCheck(arch1,arch2): if ( arch1 == arch2 ): return True if ( arch1 == 'noarch' or arch2 == 'noarch' ): # TODO: new check return True if ( rpmUtils.arch.getBaseArch(arch1) == rpmUtils.arch.getBaseArch(arch2) ): return True return False class RepoLister(yum.YumBase): def __init__(self, arch = [], config = ''): yum.YumBase.__init__(self) if yum.__version__ < '3.2.24': if arch: self.arch = arch[0] else: self.arch = None else: self._rc_arches = arch if yum.__version__ < '3.0': # TODO: check self.doConfigSetup(fn = config) else: self.doConfigSetup(fn = config, init_plugins = False) if hasattr(self.repos, 'sqlite'): self.repos.sqlite = False self.repos._selectSackType() self.pkglist = [] def fixYum(self): # Work around bug in yum <= 2.6.1. Without this, when using # multiple instances of this class, package objects point to # unconfigured repositories, e.g. in downloadHeader(). import yum.packages yum.packages.base = self def readMetadata(self): self.doRepoSetup() if yum.__version__ < '3.2.24': self.doSackSetup(rpmUtils.arch.getArchList(self.arch)) else: archs = [] if not self._rc_arches: archs.extend(self.arch.archlist) else: for arch in self._rc_arches: archs.extend(self.arch.get_arch_list(arch)) self.doSackSetup(archs) #self.doSackFilelistPopulate() # yum >= 2.9 for repo in self.repos.listEnabled(): self.repos.populateSack(which=[repo.id], mdtype='filelists') def returnNewest(self): pkgs = self.pkgSack.returnNewestByName() mypkgSack = ListPackageSack(pkgs) pkgtuplist = mypkgSack.simplePkgList() # Support new checkForObsolete code in Yum (#190116) # so we don't examine old _obsolete_ sub-packages. import rpmUtils.updates self.up = rpmUtils.updates.Updates([],pkgtuplist) self.up.rawobsoletes = mypkgSack.returnObsoletes() self.pkglist = [] for pkg in pkgs: thispkgobsdict = {} try: thispkgobsdict = self.up.checkForObsolete([pkg.pkgtup]) if thispkgobsdict.has_key(pkg.pkgtup): continue except: pass self.pkglist.append(pkg) def sortbyname(a,b): return cmp(a.__str__(),b.__str__()) self.pkglist.sort(sortbyname) return self.pkglist def findDuplicates(self,recreate=False): if not recreate: return outf = codecs.open('_filelist.txt', 'w', 'utf-8') for pkg in self.returnNewest(): (n,a,e,v,r) = pkg.pkgtup pkgtup = '%s %s %s %s %s' % (n,e,v,r,a) for f in pkg.returnFileEntries(): print >> outf, U'%ls ||| %s' % (unicode(f,'utf-8'), pkgtup) for d in pkg.returnFileEntries(ftype='dir'): print >> outf, U'%ls ||| %s' % (unicode(d,'utf-8'), pkgtup) outf.close() print "Sorting..." sys.stdout.flush() rv = os.system('cat _filelist.txt | sort > _filelist.sorted.txt') # print rv print "Pruning..." sys.stdout.flush() inf = codecs.open('_filelist.sorted.txt', 'r', 'utf-8') dupes = {} curpath = None curname = None curpkgtup = None for line in inf: (path,pkgtup) = line.rstrip().split(' ||| ') (n,e,v,r,a) = pkgtup.split(' ') if not curpath or (path != curpath): curpath = path curname = n curpkgtup = pkgtup continue # path == curpath dupes.setdefault(curpath,[curpkgtup]) dupes[curpath].append(pkgtup) inf.close() outf = open('_duplicates.pickle','w') pickle.dump(dupes,outf) outf.close() # Diagnostic output only. # outf = codecs.open('_filelist.duplicates.txt', 'w', 'utf-8') # for (k,v) in dupes.items(): # print >> outf, k # v.sort() # for pkgtup in v: # print >> outf, ' %s' % pkgtup # outf.close() del dupes return def createFilesDict(self,pkgs): self.filesdict = {} for pkg in pkgs: for f in pkg.returnFileEntries(): self.filesdict.setdefault(f,[]) self.filesdict[f].append(pkg) def createDirsDict(self,pkgs): self.dirsdict = {} for pkg in pkgs: for d in pkg.returnFileEntries(ftype='dir'): self.dirsdict.setdefault(d,[]) self.dirsdict[d].append(pkg) def log(self, value, msg): # print msg pass def checkPrco(prcotype,pkg1,pkg2): """returns True if pkg1 satisfies the prco given by pkg2 or vice versa""" for reqtuple in pkg1.returnPrco(prcotype): # (n,f,(e,v,r)) if pkg2.inPrcoRange('provides',reqtuple): return True for reqtuple in pkg2.returnPrco(prcotype): # (n,f,(e,v,r)) if pkg1.inPrcoRange('provides',reqtuple): return True return False class Conflicts: def __init__(self): self.pkgid = None self.repoid = None self.srpm = None self.filecount = 0 self.count = 0 self.files = {} def comparePkgs(paths,pkg): for path in paths: comparePathWithPkg(path,pkg) def comparePathWithPkg(path,pkg1): global conflicts (name,a,e,v,r) = pkg1.pkgtup # yum-3.2.17 (F-8) if not lister.filesdict.has_key(path): # is not a file? return if lister.dirsdict.has_key(path): # is file _and_ dir? print "CONFLICT DIR/FILE: %s" % path for pkg2 in lister.filesdict[path]: (name2,a2,e2,v2,r2) = pkg2.pkgtup if (name2 == name): # Dumb: Avoid comparing compat-arch upgrades. # This includes noarch<->arch switches. if archCheck(a,a2): continue # TODO: Filter out multi-compat dirs, /usr/bin, ... # For this, archs must differ, at least. if multicompatmode: if (a != a2) and (path.startswith('/usr/bin') or path.startswith('/usr/include') or path.startswith('/usr/sbin') or path.startswith('/sbin') or path.startswith('/usr/libexec') or path.startswith('/bin')): continue else: # Dumb: If NEVR is the same, only arch can be different. # Avoid comparing such packages. if (e,v,r) == (e2,v2,r2): continue # Ignore packages if they conflict with or obsolete eachother. if checkPrco('conflicts',pkg1,pkg2): continue if checkPrco('obsoletes',pkg1,pkg2): continue # pdb.set_trace() lister.fixYum() lister.downloadHeader(pkg1) lister.fixYum() lister.downloadHeader(pkg2) hdrchecksums1 = {} hdrfilemodes1 = {} for (hdrfile,hdrfilemd5,hdrfilemode) in zip(pkg1.returnLocalHeader()['filenames'],pkg1.returnLocalHeader()['filemd5s'],pkg1.returnLocalHeader()['filemodes']): hdrchecksums1[hdrfile] = hdrfilemd5 hdrfilemodes1[hdrfile] = hdrfilemode hdrchecksums2 = {} hdrfilemodes2 = {} for (hdrfile,hdrfilemd5,hdrfilemode) in zip(pkg2.returnLocalHeader()['filenames'],pkg2.returnLocalHeader()['filemd5s'],pkg2.returnLocalHeader()['filemodes']): hdrchecksums2[hdrfile] = hdrfilemd5 hdrfilemodes2[hdrfile] = hdrfilemode hdrfilemode1 = hdrfilemodes1[path] hdrfilemode2 = hdrfilemodes2[path] if hdrfilemode1 != hdrfilemode2: print "WARNING: file mode conflict" print ' =>', path print ' =>', pkg1, pkg2 hdrchecksum1 = hdrchecksums1[path] hdrchecksum2 = hdrchecksums2[path] if hdrchecksum1 != hdrchecksum2: pkgid1 = pkg1.__str__() if not conflicts.has_key(pkgid1): co = Conflicts() co.pkgid = pkgid1 co.repoid = pkg1.repoid co.srpm = pkg1.returnSimple('sourcerpm') co.filecount = len(pkg1.returnFileEntries()) conflicts[pkgid1] = co else: co = conflicts[pkgid1] pkgid2 = pkg2.__str__() # conflicts with this pkg co.files.setdefault(pkgid2,[]) if path not in co.files[pkgid2]: co.files[pkgid2].append(path) co.count += 1 def main(): (opts, cruft) = parseArgs() if yum.__version__ < '3.2.24': if len(opts.arch)>1: print 'ERROR: can handle only a single arch with Yum < 3.2.24' sys.exit(errno.EINVAL) # Don't treat double-"+" as regexp meta-characters. for i in range(len(opts.name)): while opts.name[i].find('++')>=0: opts.name[i] = opts.name[i].replace('++','\+\+') usesearch = len(opts.name)>0 # Full set of repositories. global lister lister = RepoLister(arch=opts.arch, config=opts.config) if os.geteuid() != 0 or opts.cachedir != '': if opts.cachedir != '': cachedir = opts.cachedir else: cachedir = getCacheDir() if cachedir is None: print "Error: Could not make cachedir, exiting" sys.exit(50) lister.repos.setCacheDir(cachedir) if len(opts.repoid)>0: for repo in lister.repos.repos.values(): if repo.id not in opts.repoid: repo.disable() else: repo.enable() print 'Target distribution:' for r in lister.repos.listEnabled(): print ' ', r # print r.dump() try: print 'Reading repository metadata...' sys.stdout.flush() lister.readMetadata() except yum.Errors.RepoError, e: print e raise ### Pass 1 print 'Pass 1: Potential duplicate files in metadata...' sys.stdout.flush() lister.findDuplicates(recreate=True) # lister.findDuplicates(recreate=False) ### Pass 2 print 'Pass 2: Examining duplicates closer...' sys.stdout.flush() # TODO inf = open('_duplicates.pickle','r') dupes = pickle.load(inf) inf.close() # Create a map of all package sets that conflict with eachother # in at least one path. pathsperset = {} for (path,pkgtups) in dupes.items(): pkgtups.sort() key = '' for p in pkgtups: key += '%s' % p pathsperset.setdefault(key,[[],[]]) pathsperset[key][0] = pkgtups pathsperset[key][1].append(path) del dupes # Examine each package set individually. global conflicts conflicts = {} for v in pathsperset.values(): pkgtups,paths = v[0],v[1] if usesearch: skipset = True for pkgtup in pkgtups: (n,e,v,r,a) = pkgtup.split(' ') if isWantedName( n, opts.name ): skipset = False break if skipset: continue pkgs = [] for pkgtup in pkgtups: (n,e,v,r,a) = pkgtup.split(' ') for po in lister.pkgSack.searchNevra(n,e,v,r,a): pkgs.append(po) lister.createFilesDict(pkgs) lister.createDirsDict(pkgs) for pkg1 in pkgs: comparePkgs(paths,pkg1) def sortbypkgid(a,b): return cmp(a.pkgid,b.pkgid) colist = conflicts.values() colist.sort(sortbypkgid) for co in colist: print '=> %s' % co.srpm print '=> %s in %s' % (co.pkgid, co.repoid) if co.count == co.filecount: print ' All files conflict!' pkgs = co.files.keys() pkgs.sort() for p in pkgs: print ' File conflict with:', p co.files[p].sort() for f in co.files[p]: print ' ', f print print 'Done.' if __name__ == "__main__": main()