# HG changeset patch # User Arseniy Alekseyev # Date 1704913122 0 # Wed Jan 10 18:58:42 2024 +0000 # Node ID 02e7d79edf62f6d051767c71ca128178d1b444a0 # Parent 54a75576287aa6a7cf41d98704c41b2b093bb29b branchmap: use mmap for faster revbranchcache loading A typical revbranchmap usage is: - load the entire revbranchmap file into memory - maybe do a few lookups - add a few bytes to it - write the addition to disk There's no reason to load the entire revbranchmap into memory. We can split it into a large immutable prefix and a mutable suffix, and then memorymap the prefix, thus saving all the useless loading. Benchmarking on some real-world pushes suggests that out of ~100s server-side push handling revbranchcache handling is responsible for: * ~7s with no change * ~1.3s with the change, without mmap * 0.04s with the change, with mmap diff --git a/mercurial/branchmap.py b/mercurial/branchmap.py --- a/mercurial/branchmap.py +++ b/mercurial/branchmap.py @@ -621,6 +621,74 @@ _rbccloseflag = 0x80000000 +class rbcrevs: + """a byte string consisting of an immutable prefix followed by a mutable suffix""" + + def __init__(self, revs): + self._prefix = revs + self._rest = bytearray() + + def __len__(self): + return len(self._prefix) + len(self._rest) + + def unpack_record(self, rbcrevidx): + if rbcrevidx < len(self._prefix): + return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx) + else: + return unpack_from( + _rbcrecfmt, + util.buffer(self._rest), + rbcrevidx - len(self._prefix), + ) + + def make_mutable(self): + if len(self._prefix) > 0: + entirety = bytearray() + entirety[:] = self._prefix + entirety.extend(self._rest) + self._rest = entirety + self._prefix = bytearray() + + def truncate(self, pos): + self.make_mutable() + del self._rest[pos:] + + def pack_into(self, rbcrevidx, node, branchidx): + if rbcrevidx < len(self._prefix): + self.make_mutable() + buf = self._rest + start_offset = rbcrevidx - len(self._prefix) + end_offset = start_offset + _rbcrecsize + + if len(self._rest) < end_offset: + # bytearray doesn't allocate extra space at least in Python 3.7. + # When multiple changesets are added in a row, precise resize would + # result in quadratic complexity. Overallocate to compensate by + # using the classic doubling technique for dynamic arrays instead. + # If there was a gap in the map before, less space will be reserved. + self._rest.extend(b'\0' * end_offset) + return pack_into( + _rbcrecfmt, + buf, + start_offset, + node, + branchidx, + ) + + def extend(self, extension): + return self._rest.extend(extension) + + def slice(self, begin, end): + if begin < len(self._prefix): + acc = bytearray() + acc[:] = self._prefix[begin:end] + acc.extend( + self._rest[begin - len(self._prefix) : end - len(self._prefix)] + ) + return acc + return self._rest[begin - len(self._prefix) : end - len(self._prefix)] + + class revbranchcache: """Persistent cache, mapping from revision number to branch name and close. This is a low level cache, independent of filtering. @@ -648,7 +716,7 @@ assert repo.filtername is None self._repo = repo self._names = [] # branch names in local encoding with static index - self._rbcrevs = bytearray() + self._rbcrevs = rbcrevs(bytearray()) self._rbcsnameslen = 0 # length of names read at _rbcsnameslen try: bndata = repo.cachevfs.read(_rbcnames) @@ -664,8 +732,12 @@ if self._names: try: - data = repo.cachevfs.read(_rbcrevs) - self._rbcrevs[:] = data + if repo.ui.configbool(b'format', b'mmap-revbranchcache'): + with repo.cachevfs(_rbcrevs) as fp: + data = util.buffer(util.mmapread(fp)) + else: + data = repo.cachevfs.read(_rbcrevs) + self._rbcrevs = rbcrevs(data) except (IOError, OSError) as inst: repo.ui.debug( b"couldn't read revision branch cache: %s\n" @@ -685,7 +757,7 @@ del self._names[:] self._rbcnamescount = 0 self._rbcrevslen = len(self._repo.changelog) - self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize) + self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize)) util.clearcachedproperty(self, b'_namesreverse') @util.propertycache @@ -708,9 +780,7 @@ # fast path: extract data from cache, use it if node is matching reponode = changelog.node(rev)[:_rbcnodelen] - cachenode, branchidx = unpack_from( - _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx - ) + cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx) close = bool(branchidx & _rbccloseflag) if close: branchidx &= _rbcbranchidxmask @@ -733,7 +803,7 @@ b"revision branch cache to revision %d\n" % rev ) truncate = rbcrevidx + _rbcrecsize - del self._rbcrevs[truncate:] + self._rbcrevs.truncate(truncate) self._rbcrevslen = min(self._rbcrevslen, truncate) # fall back to slow path and make sure it will be written to disk @@ -782,16 +852,7 @@ if rev == nullrev: return rbcrevidx = rev * _rbcrecsize - requiredsize = rbcrevidx + _rbcrecsize - rbccur = len(self._rbcrevs) - if rbccur < requiredsize: - # bytearray doesn't allocate extra space at least in Python 3.7. - # When multiple changesets are added in a row, precise resize would - # result in quadratic complexity. Overallocate to compensate by - # use the classic doubling technique for dynamic arrays instead. - # If there was a gap in the map before, less space will be reserved. - self._rbcrevs.extend(b'\0' * max(_rbcmininc, requiredsize)) - pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx) + self._rbcrevs.pack_into(rbcrevidx, node, branchidx) self._rbcrevslen = min(self._rbcrevslen, rev) tr = self._repo.currenttransaction() @@ -866,5 +927,5 @@ f.seek(start) f.truncate() end = revs * _rbcrecsize - f.write(self._rbcrevs[start:end]) + f.write(self._rbcrevs.slice(start, end)) self._rbcrevslen = revs diff --git a/mercurial/configitems.toml b/mercurial/configitems.toml --- a/mercurial/configitems.toml +++ b/mercurial/configitems.toml @@ -2913,3 +2913,8 @@ name = "date-format" default = "" in_core_extension = "blackbox" + +[[items]] +section = "format" +name = "mmap-revbranchcache" +default = false diff --git a/tests/test-branches.t b/tests/test-branches.t --- a/tests/test-branches.t +++ b/tests/test-branches.t @@ -1,3 +1,12 @@ +#testcases mmap nommap + +#if mmap + $ cat <> $HGRCPATH + > [format] + > mmap-revbranchcache=true + > EOF +#endif + $ hg init a $ cd a @@ -921,17 +930,10 @@ $ f --size --hexdump .hg/cache/rbc-* .hg/cache/rbc-names-v1: size=1 0000: 61 |a| - .hg/cache/rbc-revs-v1: size=152 + .hg/cache/rbc-revs-v1: size=48 0000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| 0010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| 0020: 00 00 00 00 00 00 00 00 d8 cb c6 1d 00 00 00 00 |................| - 0030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| - 0040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| - 0050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| - 0060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| - 0070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| - 0080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| - 0090: 00 00 00 00 00 00 00 00 |........| $ cd ..