summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZac Medico <zmedico@gentoo.org>2009-04-30 06:53:00 +0000
committerZac Medico <zmedico@gentoo.org>2009-04-30 06:53:00 +0000
commitf507968fe298f3f894f9d7ef44bbdc6dccbca663 (patch)
tree912162bcf014b543125e400b4125da2c9d715be2
parentBug #263370 - In create_message(), use email.header.Header to wrap the (diff)
downloadportage-multirepo-f507968fe298f3f894f9d7ef44bbdc6dccbca663.tar.gz
portage-multirepo-f507968fe298f3f894f9d7ef44bbdc6dccbca663.tar.bz2
portage-multirepo-f507968fe298f3f894f9d7ef44bbdc6dccbca663.zip
Add a new egencache --rsync option which enables a stat collision workaround
for cases in which the content of a cache entry changes and neither the file mtime nor size changes (preventing rsync from detecting changes). See bug #139134. This option should only be needed for distribution via something like rsync, which relies on timestamps and file sizes to detect changes. It's not needed with git since that uses a more thorough mechanism which allows it to detect changed inode numbers (described in racy-git.txt in the git technical docs). (trunk r13262) svn path=/main/branches/2.1.6/; revision=13441
-rwxr-xr-xbin/egencache51
-rw-r--r--pym/portage/cache/cache_errors.py22
-rw-r--r--pym/portage/cache/metadata.py49
3 files changed, 104 insertions, 18 deletions
diff --git a/bin/egencache b/bin/egencache
index de6db88d..e8981deb 100755
--- a/bin/egencache
+++ b/bin/egencache
@@ -24,7 +24,7 @@ import optparse
import os
import portage
import _emerge
-from portage.cache.cache_errors import CacheError
+from portage.cache.cache_errors import CacheError, StatCollision
from portage.util import writemsg_level
def parse_args(args):
@@ -46,6 +46,10 @@ def parse_args(args):
action="store",
help="max load allowed when spawning multiple jobs",
dest="load_average")
+ parser.add_option("--rsync",
+ action="store_true",
+ help="enable rsync stat collision workaround " + \
+ "for bug 139134 (use with --update)")
options, args = parser.parse_args(args)
if not options.update:
@@ -73,7 +77,8 @@ def parse_args(args):
return options, args
class GenCache(object):
- def __init__(self, portdb, cp_iter=None, max_jobs=None, max_load=None):
+ def __init__(self, portdb, cp_iter=None, max_jobs=None, max_load=None,
+ rsync=False):
self._portdb = portdb
# We can globally cleanse stale cache only if we
# iterate over every single cp.
@@ -90,22 +95,47 @@ class GenCache(object):
metadbmodule = portdb.mysettings.load_best_module("portdbapi.metadbmodule")
self._trg_cache = metadbmodule(portdb.porttree_root,
"metadata/cache", portage.auxdbkeys[:])
+ if rsync:
+ self._trg_cache.raise_stat_collision = True
self._existing_nodes = set()
def _metadata_callback(self, cpv, ebuild_path, repo_path, metadata):
self._existing_nodes.add(cpv)
if metadata is not None:
- # TODO: Implement a workaround for bug 139134 here. The cache
- # should be able to optionally raise an exception in order to
- # indicate any mtime + size collisions that will prevent rsync
- # from detecting changes. These exceptions will be handled by
- # bumping the mtime on the ebuild (and the corresponding cache
- # entry).
if metadata.get('EAPI') == '0':
del metadata['EAPI']
try:
- self._trg_cache[cpv] = metadata
+ try:
+ self._trg_cache[cpv] = metadata
+ except StatCollision, sc:
+ # If the content of a cache entry changes and neither the
+ # file mtime nor size changes, it will prevent rsync from
+ # detecting changes. Cache backends may raise this
+ # exception from _setitem() if they detect this type of stat
+ # collision. These exceptions are be handled by bumping the
+ # mtime on the ebuild (and the corresponding cache entry).
+ # See bug #139134.
+ max_mtime = sc.mtime
+ for ec, (loc, ec_mtime) in metadata['_eclasses_'].iteritems():
+ if max_mtime < ec_mtime:
+ max_mtime = ec_mtime
+ if max_mtime == sc.mtime:
+ max_mtime += 1
+ max_mtime = long(max_mtime)
+ try:
+ os.utime(ebuild_path, (max_mtime, max_mtime))
+ except OSError, e:
+ self.returncode |= 1
+ writemsg_level(
+ "%s writing target: %s\n" % (cpv, e),
+ level=logging.ERROR, noiselevel=-1)
+ else:
+ metadata['_mtime_'] = max_mtime
+ self._trg_cache[cpv] = metadata
+ self._portdb.auxdb[repo_path][cpv] = metadata
+
except CacheError, ce:
+ self.returncode |= 1
writemsg_level(
"%s writing target: %s\n" % (cpv, ce),
level=logging.ERROR, noiselevel=-1)
@@ -195,7 +225,8 @@ def egencache_main(args):
gen_cache = GenCache(portdb, cp_iter=cp_iter,
max_jobs=options.jobs,
- max_load=options.load_average)
+ max_load=options.load_average,
+ rsync=options.rsync)
gen_cache.run()
return gen_cache.returncode
diff --git a/pym/portage/cache/cache_errors.py b/pym/portage/cache/cache_errors.py
index f63e5994..e1e8eead 100644
--- a/pym/portage/cache/cache_errors.py
+++ b/pym/portage/cache/cache_errors.py
@@ -39,3 +39,25 @@ class ReadOnlyRestriction(CacheError):
self.info = info
def __str__(self):
return "cache is non-modifiable"+str(self.info)
+
+class StatCollision(CacheError):
+ """
+ If the content of a cache entry changes and neither the file mtime nor
+ size changes, it will prevent rsync from detecting changes. Cache backends
+ may raise this exception from _setitem() if they detect this type of stat
+ collision. See bug #139134.
+ """
+ def __init__(self, key, filename, mtime, size):
+ self.key = key
+ self.filename = filename
+ self.mtime = mtime
+ self.size = size
+
+ def __str__(self):
+ return "%s has stat collision with size %s and mtime %s" % \
+ (self.key, self.size, self.mtime)
+
+ def __repr__(self):
+ return "portage.cache.cache_errors.StatCollision(%s)" % \
+ (', '.join((repr(self.key), repr(self.filename),
+ repr(self.mtime), repr(self.size))),)
diff --git a/pym/portage/cache/metadata.py b/pym/portage/cache/metadata.py
index 5222223c..a8be0109 100644
--- a/pym/portage/cache/metadata.py
+++ b/pym/portage/cache/metadata.py
@@ -3,7 +3,7 @@
# License: GPL2
# $Id$
-import errno, os, re
+import errno, os, re, sys
from portage.cache import cache_errors, flat_hash
import portage.eclass_cache
from portage.cache.template import reconstruct_eclasses
@@ -30,6 +30,7 @@ class database(flat_hash.database):
super(database, self).__init__(location, *args, **config)
self.location = os.path.join(loc, "metadata","cache")
self.ec = portage.eclass_cache.cache(loc)
+ self.raise_stat_collision = False
def _parse_data(self, data, cpv):
_hashed_re_match = self._hashed_re.match
@@ -73,31 +74,63 @@ class database(flat_hash.database):
values = ProtectedDict(values)
values["INHERITED"] = ' '.join(sorted(values["_eclasses_"]))
+ new_content = []
+ for k in self.auxdbkey_order:
+ new_content.append(unicode(values.get(k, ''), errors='replace'))
+ new_content.append(u'\n')
+ for i in xrange(magic_line_count - len(self.auxdbkey_order)):
+ new_content.append(u'\n')
+ new_content = u''.join(new_content)
+ new_content = new_content.encode(
+ sys.getdefaultencoding(), 'backslashreplace')
+
+ new_fp = os.path.join(self.location, cpv)
+ try:
+ f = open(new_fp, 'rb')
+ except EnvironmentError:
+ pass
+ else:
+ try:
+ try:
+ existing_st = os.fstat(f.fileno())
+ existing_content = f.read()
+ finally:
+ f.close()
+ except EnvironmentError:
+ pass
+ else:
+ existing_mtime = long(existing_st.st_mtime)
+ if values['_mtime_'] == existing_mtime and \
+ existing_content == new_content:
+ return
+
+ if self.raise_stat_collision and \
+ values['_mtime_'] == existing_mtime and \
+ len(new_content) == existing_st.st_size:
+ raise cache_errors.StatCollision(cpv, new_fp,
+ existing_mtime, existing_st.st_size)
+
s = cpv.rfind("/")
fp = os.path.join(self.location,cpv[:s],
".update.%i.%s" % (os.getpid(), cpv[s+1:]))
try:
- myf = open(fp, "w")
+ myf = open(fp, 'wb')
except EnvironmentError, e:
if errno.ENOENT == e.errno:
try:
self._ensure_dirs(cpv)
- myf = open(fp, "w")
+ myf = open(fp, 'wb')
except EnvironmentError, e:
raise cache_errors.CacheCorruption(cpv, e)
else:
raise cache_errors.CacheCorruption(cpv, e)
try:
- for k in self.auxdbkey_order:
- myf.write(values.get(k, "") + "\n")
- for i in xrange(magic_line_count - len(self.auxdbkey_order)):
- myf.write("\n")
+ myf.write(new_content)
finally:
myf.close()
self._ensure_access(fp, mtime=values["_mtime_"])
- new_fp = os.path.join(self.location, cpv)
try:
os.rename(fp, new_fp)
except EnvironmentError, e: